peridot_archive/
write.rs

1use crc::crc32;
2use libflate::deflate as zlib;
3use peridot_serialization_utils::VariableULong;
4use std::{
5    collections::HashMap,
6    io::{IoSlice, Result as IOResult, Write},
7};
8
9use crate::{
10    CompressionMethod, ContentFlags,
11    entry::{AssetEntryHeadingPair, AssetName},
12    entry_tree::EntryTreePointer,
13};
14
15pub struct ArchiveWrite {
16    compression_method: CompressionMethod,
17    entries: HashMap<AssetName, AssetEntryHeadingPair>,
18    data_bytes: Vec<u8>,
19}
20impl ArchiveWrite {
21    pub fn new(comp: CompressionMethod) -> Self {
22        Self {
23            compression_method: comp,
24            entries: HashMap::new(),
25            data_bytes: Vec::new(),
26        }
27    }
28
29    /// エントリを追加する 成功したらtrue
30    pub fn add(&mut self, name: String, ext: String, content: Vec<u8>) -> bool {
31        let n = AssetName { name, ext };
32        if self.entries.contains_key(&n) {
33            // すでにある
34            return false;
35        }
36
37        let relative_offset = self.data_bytes.len() as u64;
38        self.data_bytes.extend(content);
39        self.entries.insert(
40            n,
41            AssetEntryHeadingPair {
42                relative_offset,
43                byte_length: self.data_bytes.len() as u64 - relative_offset,
44            },
45        );
46
47        true
48    }
49
50    fn emit_exact_match_block(
51        block_buffer: &mut Vec<u8>,
52        entries: &[(&AssetName, &AssetEntryHeadingPair)],
53    ) -> IOResult<u64> {
54        let ptr = block_buffer.len() as u64;
55        VariableULong(entries.len() as _).write(block_buffer)?;
56        for (n, h) in entries {
57            VariableULong((n.name.len() + 1 + n.ext.len()) as _).write(block_buffer)?;
58            block_buffer.extend(n.name.as_bytes());
59            block_buffer.push(0);
60            block_buffer.extend(n.ext.as_bytes());
61            h.write(block_buffer)?;
62        }
63
64        Ok(ptr)
65    }
66
67    fn write_exact_hash_tree(
68        hash_tree_block: &mut Vec<u8>,
69        exact_match_block: &mut Vec<u8>,
70        sorted_hash_list: &[(u64, Vec<(&AssetName, &AssetEntryHeadingPair)>)],
71    ) -> IOResult<()> {
72        let write_base_ptr = hash_tree_block.len();
73        hash_tree_block.resize(
74            hash_tree_block.len()
75                + sorted_hash_list.len() * crate::entry_tree::EXACT_TREE_ENTRY_STRIDE,
76            0,
77        );
78
79        for (n, &(k, ref xs)) in sorted_hash_list.iter().enumerate() {
80            let exact_match_pointer = Self::emit_exact_match_block(exact_match_block, xs)?;
81
82            let mut e = crate::entry_tree::ExactBlockMutableView::at(
83                &mut hash_tree_block[write_base_ptr..],
84                n,
85            );
86            e.set_name_hash(k);
87            e.set_exact_block_offset(exact_match_pointer);
88        }
89
90        Ok(())
91    }
92
93    fn gen_asset_entry_blocks(
94        &self,
95        header_size: usize,
96    ) -> IOResult<(Vec<u8>, Vec<u8>, ContentFlags)> {
97        // ここもしかしたらもうちょい最適化できるかも?(毎回binary_searchするとでかいテーブルになったときにメモリのキャッシュ効率が悪そう)
98        // 一旦これはオフラインで動くコードなので(Readよりは頻度低い)あとで考える
99        let mut sorted_hash_table: Vec<(u64, Vec<(&AssetName, &AssetEntryHeadingPair)>)> =
100            Vec::with_capacity(self.entries.len());
101        for (name, heading) in self.entries.iter() {
102            let name_hash = name.hash();
103            match sorted_hash_table.binary_search_by_key(&name_hash, |&(nh, _)| nh) {
104                Ok(x) => sorted_hash_table[x].1.push((name, heading)),
105                Err(x) => sorted_hash_table.insert(x, (name_hash, vec![(name, heading)])),
106            }
107        }
108
109        let mut content_flags = ContentFlags::EMPTY;
110        let mut exact_match_block = Vec::new();
111        let mut hash_tree_block = Vec::new();
112
113        let first_block_size = crate::entry_tree::first_hash_tree_block_size(header_size);
114        if crate::entry_tree::exact_root_tree_block_size(sorted_hash_table.len())
115            <= first_block_size
116        {
117            // このページで十分入ってしまう
118            content_flags |= ContentFlags::ROOT_HASH_TREE_EXACT;
119
120            Self::write_exact_hash_tree(
121                &mut hash_tree_block,
122                &mut exact_match_block,
123                &sorted_hash_table,
124            )?;
125        } else {
126            // サブツリー構成が必要
127            fn gen_subtree(
128                hash_table: &[(u64, Vec<(&AssetName, &AssetEntryHeadingPair)>)],
129                hash_block: &mut Vec<u8>,
130                exact_match_block: &mut Vec<u8>,
131            ) -> IOResult<EntryTreePointer> {
132                if hash_table.len() < crate::entry_tree::NON_ROOT_EXACT_TREE_MAX_ELEMENT_COUNT {
133                    // このページで十分に入る
134                    let this_tree_ptr =
135                        EntryTreePointer::from_u64(hash_block.len() as _).exact_tree();
136                    hash_block.extend(u16::to_le_bytes(hash_table.len() as _));
137                    ArchiveWrite::write_exact_hash_tree(hash_block, exact_match_block, hash_table)?;
138
139                    return Ok(this_tree_ptr);
140                }
141
142                // まだサブツリーが必要
143                let this_tree_ptr = EntryTreePointer::from_u64(hash_block.len() as _);
144
145                let entry_count = crate::entry_tree::MAX_ENTRY_COUNT;
146                let hash_block_base = hash_block.len();
147                hash_block.resize(
148                    hash_block.len() + crate::entry_tree::normal_tree_block_size(entry_count),
149                    0,
150                );
151                let mut subtree_base = 0;
152                for n in 0..entry_count {
153                    let nx = hash_table.len() * (n + 1) / (entry_count + 2);
154                    let less_ptr =
155                        gen_subtree(&hash_table[subtree_base..nx], hash_block, exact_match_block)?;
156                    subtree_base = nx + 1;
157
158                    let mut e =
159                        crate::entry_tree::EntryMutableView::at(hash_block, hash_block_base, n);
160                    e.set_name_hash(hash_table[nx].0);
161                    e.set_exact_block_offset(ArchiveWrite::emit_exact_match_block(
162                        exact_match_block,
163                        &hash_table[nx].1,
164                    )?);
165                    e.set_smaller_tree_pointer(less_ptr);
166                }
167
168                let greater_ptr =
169                    gen_subtree(&hash_table[subtree_base..], hash_block, exact_match_block)?;
170                crate::entry_tree::BlockMutableView::from_offset_and_element_count(
171                    hash_block,
172                    hash_block_base,
173                    entry_count,
174                )
175                .set_larger_tree_pointer(greater_ptr);
176
177                Ok(this_tree_ptr)
178            }
179
180            let entry_count = crate::entry_tree::normal_tree_entry_count(first_block_size);
181            let hash_block_base = 0;
182            hash_tree_block.resize(
183                hash_tree_block.len() + crate::entry_tree::normal_tree_block_size(entry_count),
184                0,
185            );
186            let mut subtree_base = 0;
187            for n in 0..entry_count {
188                let nx = sorted_hash_table.len() * (n + 1) / (entry_count + 2);
189                let less_ptr = gen_subtree(
190                    &sorted_hash_table[subtree_base..nx],
191                    &mut hash_tree_block,
192                    &mut exact_match_block,
193                )?;
194                subtree_base = nx + 1;
195
196                let mut e = crate::entry_tree::EntryMutableView::at(
197                    &mut hash_tree_block,
198                    hash_block_base,
199                    n,
200                );
201                e.set_name_hash(sorted_hash_table[nx].0);
202                e.set_exact_block_offset(Self::emit_exact_match_block(
203                    &mut exact_match_block,
204                    &sorted_hash_table[nx].1,
205                )?);
206                e.set_smaller_tree_pointer(less_ptr);
207            }
208
209            let greater_ptr = gen_subtree(
210                &sorted_hash_table[subtree_base..],
211                &mut hash_tree_block,
212                &mut exact_match_block,
213            )?;
214            crate::entry_tree::BlockMutableView::from_offset_and_element_count(
215                &mut hash_tree_block,
216                hash_block_base,
217                entry_count,
218            )
219            .set_larger_tree_pointer(greater_ptr);
220        }
221
222        Ok((hash_tree_block, exact_match_block, content_flags))
223    }
224
225    fn write_compression_target_contents(
226        &self,
227        writer: &mut (impl Write + ?Sized),
228        file_header_size: usize,
229    ) -> IOResult<()> {
230        let (hash_tree_block, exact_match_block, content_flags) =
231            self.gen_asset_entry_blocks(file_header_size)?;
232
233        crate::utils::write_all_vectored(
234            writer,
235            &mut [
236                IoSlice::new(&[content_flags.bits()]),
237                IoSlice::new(&u32::to_le_bytes((hash_tree_block.len() >> 3) as _)),
238                IoSlice::new(&u64::to_le_bytes(exact_match_block.len() as _)),
239                IoSlice::new(&hash_tree_block),
240                IoSlice::new(&exact_match_block),
241                IoSlice::new(&self.data_bytes),
242            ],
243        )?;
244
245        Ok(())
246    }
247
248    pub fn write(&self, writer: &mut (impl Write + ?Sized)) -> IOResult<()> {
249        match self.compression_method {
250            CompressionMethod::None => {
251                let mut body = Vec::new();
252                self.write_compression_target_contents(&mut body, 4 + 4)?;
253
254                Self::write_common(writer, b"par ", None, &body)
255            }
256            CompressionMethod::Zlib(_) => {
257                let mut body = zlib::Encoder::new(Vec::new());
258                self.write_compression_target_contents(&mut body, 4 + 8 + 4)?;
259                let uncompressed_bytes = body.as_inner_ref().len() as u64;
260
261                Self::write_common(
262                    writer,
263                    b"pard",
264                    Some(uncompressed_bytes),
265                    &body.finish().into_result()?,
266                )
267            }
268            CompressionMethod::Lz4(_) => {
269                let mut body = Vec::new();
270                self.write_compression_target_contents(&mut body, 4 + 8 + 4)?;
271                let uncompressed_bytes = body.len() as u64;
272                let body = lz4_compression::prelude::compress(&body);
273
274                Self::write_common(writer, b"parz", Some(uncompressed_bytes), &body[..])
275            }
276            CompressionMethod::Zstd11(_) => {
277                let mut body = zstd::Encoder::new(Vec::new(), 11)?;
278                self.write_compression_target_contents(&mut body, 4 + 8 + 4)?;
279                let uncompressed_bytes = body.get_ref().len() as u64;
280
281                Self::write_common(writer, b"par1", Some(uncompressed_bytes), &body.finish()?)
282            }
283        }
284    }
285
286    #[cfg(feature = "async-rt-async-std")]
287    pub async fn write_async(
288        &self,
289        writer: &mut (impl async_std::io::Write + Unpin + ?Sized),
290    ) -> IOResult<()> {
291        match self.compression_method {
292            CompressionMethod::None => {
293                let mut body = Vec::new();
294                self.write_compression_target_contents(&mut body, 4 + 4)?;
295
296                Self::write_common_async(writer, b"par ", None, &body).await
297            }
298            CompressionMethod::Zlib(_) => {
299                let mut body = zlib::Encoder::new(Vec::new());
300                self.write_compression_target_contents(&mut body, 4 + 8 + 4)?;
301                let uncompressed_bytes = body.as_inner_ref().len() as u64;
302
303                Self::write_common_async(
304                    writer,
305                    b"pard",
306                    Some(uncompressed_bytes),
307                    &body.finish().into_result()?,
308                )
309                .await
310            }
311            CompressionMethod::Lz4(_) => {
312                let mut body = Vec::new();
313                self.write_compression_target_contents(&mut body, 4 + 8 + 4)?;
314                let uncompressed_bytes = body.len() as u64;
315                let body = lz4_compression::prelude::compress(&body);
316
317                Self::write_common_async(writer, b"parz", Some(uncompressed_bytes), &body[..]).await
318            }
319            CompressionMethod::Zstd11(_) => {
320                let mut body = zstd::Encoder::new(Vec::new(), 11)?;
321                self.write_compression_target_contents(&mut body, 4 + 8 + 4)?;
322                let uncompressed_bytes = body.get_ref().len() as u64;
323
324                Self::write_common_async(writer, b"par1", Some(uncompressed_bytes), &body.finish()?)
325                    .await
326            }
327        }
328    }
329
330    fn write_common(
331        writer: &mut (impl Write + ?Sized),
332        signature: &[u8],
333        uncompressed_bytes: Option<u64>,
334        body: &[u8],
335    ) -> IOResult<()> {
336        let checksum = crc32::checksum_ieee(body);
337        let checksum_buf = checksum.to_le_bytes();
338        let uncompressed_bytes_buf = uncompressed_bytes.map(u64::to_le_bytes);
339
340        let mut vectors = Vec::with_capacity(4);
341        vectors.push(IoSlice::new(signature));
342        vectors.extend(
343            uncompressed_bytes_buf
344                .as_ref()
345                .map(|x| IoSlice::new(&x[..])),
346        );
347        vectors.extend([IoSlice::new(&checksum_buf), IoSlice::new(body)]);
348
349        crate::utils::write_all_vectored(writer, &mut vectors)?;
350        Ok(())
351    }
352
353    #[cfg(feature = "async-rt-async-std")]
354    async fn write_common_async(
355        writer: &mut (impl async_std::io::Write + Unpin + ?Sized),
356        signature: &[u8],
357        uncompressed_byte_length: Option<u64>,
358        body: &[u8],
359    ) -> IOResult<()> {
360        let checksum = crc32::checksum_ieee(body);
361        let checksum_bytes = checksum.to_le_bytes();
362        let uncompressed_byte_length_bytes = uncompressed_byte_length.map(u64::to_le_bytes);
363
364        let mut write_buffers = Vec::with_capacity(4);
365        write_buffers.push(std::io::IoSlice::new(signature));
366        write_buffers.extend(
367            uncompressed_byte_length_bytes
368                .as_ref()
369                .map(|bs| std::io::IoSlice::new(&bs[..])),
370        );
371        write_buffers.extend([
372            std::io::IoSlice::new(&checksum_bytes),
373            std::io::IoSlice::new(body),
374        ]);
375
376        crate::utils::write_all_vectored_async(writer, &mut write_buffers).await?;
377        Ok(())
378    }
379}