diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dd721da9..fcb7fe1d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - build: [stable, nightly, 1.50.0, macos, windows, mingw] + build: [stable, nightly, 1.56.0, macos, windows, mingw] include: - build: stable os: ubuntu-latest @@ -15,9 +15,9 @@ jobs: - build: nightly os: ubuntu-latest rust: nightly - - build: 1.50.0 + - build: 1.56.0 os: ubuntu-latest - rust: 1.50.0 + rust: 1.56.0 - build: macos os: macos-latest rust: stable @@ -28,42 +28,27 @@ jobs: os: windows-latest rust: stable-x86_64-gnu steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable with: - profile: minimal - override: true toolchain: ${{ matrix.rust }} - run: cargo test --manifest-path ./miniz_oxide/Cargo.toml - run: cargo test --manifest-path ./miniz_oxide/Cargo.toml --features simd - run: cargo test --manifest-path ./miniz_oxide/Cargo.toml --no-default-features - - run: cargo build --manifest-path ./miniz_oxide/Cargo.toml --no-default-features - - run: cargo test - - # rustfmt: - # name: Rustfmt - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v3 - # - uses: actions-rs/toolchain@v1 - # with: - # profile: minimal - # components: rustfmt # seems to not work? - # toolchain: stable - # - run: rustup toolchain install stable --component rustfmt - # - run: cargo fmt -p miniz_oxide -- --check + - name: Test minimal + if: ${{ matrix.rust != '1.56.0' }} + run: cargo test wasm: name: WebAssembly runs-on: ubuntu-latest strategy: matrix: - target: [wasm32-unknown-unknown, wasm32-wasi] + target: [wasm32-unknown-unknown, wasm32-wasip1] steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable with: - profile: minimal toolchain: stable - target: ${{ matrix.target }} + targets: ${{ matrix.target }} - run: cargo build -p miniz_oxide --target ${{ matrix.target }} diff --git a/CHANGELOG.md b/CHANGELOG.md index ca43534a..1ad19fbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,62 @@ All notable changes to this project will be documented in this file. +--- +## [0.8.5](https://github.com/Frommi/miniz_oxide/compare/0.8.4..0.8.5) - 2025-02-21 + +### Bug Fixes + +- **(deflate)** some cleanups and evade a bounds check in compress_lz_codes - ([4c38ff8](https://github.com/Frommi/miniz_oxide/commit/4c38ff8abb3f8ee1f3708f8facd15d1fe9975fbc)) - oyvindln +- **(deflate)** fix bug causing 0 length stored block to be output incorrectly causing corrupt stream - ([3d62e6b](https://github.com/Frommi/miniz_oxide/commit/3d62e6b6b81441b4a1867bf1504672c835654919)) - oyvindln + + +--- +## [0.8.4](https://github.com/Frommi/miniz_oxide/compare/0.8.3..0.8.4) - 2025-02-11 + +### Bug Fixes + +- **(deflate)** work around upstream rust change causing performance regression - ([7014124](https://github.com/Frommi/miniz_oxide/commit/701412465814a5add1b620c82a7c4eafb1936b45)) - oyvindln +- **(doc)** typo on example code ([#162](https://github.com/Frommi/miniz_oxide/issues/162)) - ([2119168](https://github.com/Frommi/miniz_oxide/commit/2119168eeee4ff8a8b12505755611e00fe6b96cc)) - Iván Izaguirre +- **(inflate)** Guard against edge case with invalid match distance wrapping around too far when using wrapping buffer - ([4037fee](https://github.com/Frommi/miniz_oxide/commit/4037fee77fd5811ea10fe62a9c772942b6b72cb1)) - oyvindln +- **(deflate)** Avoid stack overflow when initializing HashBuffers. ([#164](https://github.com/Frommi/miniz_oxide/issues/164)) - ([921bc2c](https://github.com/Frommi/miniz_oxide/commit/921bc2c51e450f22a2a9405a908c64005caa92fe)) - Lukasz Anforowicz + +--- +## [0.8.3](https://github.com/Frommi/miniz_oxide/compare/0.8.2..0.8.3) - 2025-01-13 + +### Bug Fixes + +- **(bench)** add some basic criterion benchmarks - ([ac03751](https://github.com/Frommi/miniz_oxide/commit/ac03751c43df22b9bb7f47e50b7dbb8fc11ac141)) - oyvindln +- **(deflate)** write directly to output buffer instaed of bit buffer to reduce overhead and improve performance of stored blocks a little - ([97ee3f1](https://github.com/Frommi/miniz_oxide/commit/97ee3f1673b0d8bd88f3abcafb6fe392b086e4b7)) - oyvindln +- **(deflate)** split some code into new module and fix panic in pad_to_bytes from prev commit - ([04973ca](https://github.com/Frommi/miniz_oxide/commit/04973cad7b088868e51fd7970d028dad0ef0c5d0)) - oyvindln +- **(deflate)** move stored level to it's own function and simplify to improve performance - ([1f829d2](https://github.com/Frommi/miniz_oxide/commit/1f829d2574a7842f4d5e5a3ff9c33f249451f79f)) - oyvindln +- **(deflate)** remove no longer needed checks for raw mode in compress_normal and commend out accidentally enabled criterion dev dep - ([f357aa1](https://github.com/Frommi/miniz_oxide/commit/f357aa1462f8370592d2a23214490a7391c9f9de)) - oyvindln +- **(miniz_oxide)** add richgel99 (original miniz author) as author and add copyright info from orig miniz in license files - ([c8a4485](https://github.com/Frommi/miniz_oxide/commit/c8a448500ccd9ab040a244dd7db37702ab9e6449)) - oyvindln + +--- +## [0.8.2](https://github.com/Frommi/miniz_oxide/compare/0.8.1..0.8.2) - 2024-12-17 + +### Bug Fixes + +- **(deflate)** fix ([#159](https://github.com/Frommi/miniz_oxide/issues/159)) - ([e3536a7](https://github.com/Frommi/miniz_oxide/commit/e3536a779451012db9d6f8d803252a4f30ce6b91)) (fix for bug accidentally introduced in the previous release causing panics in some cases)- Matthew Deville + +--- +## [0.8.1](https://github.com/Frommi/miniz_oxide/compare/0.8.0..0.8.1) - 2024-12-17 + +### Bug Fixes + +- **(fuzzing)** update fuzzing to work again - ([b7a5908](https://github.com/Frommi/miniz_oxide/commit/b7a5908e1b83bde6b60568f6a67952890ab925a9)) - user +- **(deflate)** use built in fill instead of custom memset function - ([c0662f1](https://github.com/Frommi/miniz_oxide/commit/c0662f11528cbc32291bf91d6caa1890774c2729)) - oyvindln +- **(inflate)** use smaller types in inflate struct, split up huffman table arrays to make struct smaller, make zlib level 0 if using rle, other minor tweaks - ([c5f8f76](https://github.com/Frommi/miniz_oxide/commit/c5f8f761148a3a8a0a7f1b42e698c5e630a8cdf6)) - oyvindln +- **(inflate)** use function instead of lookup table for distance extra bits for tiny space/perf saving and fix clippy warnings - ([9f1fc5e](https://github.com/Frommi/miniz_oxide/commit/9f1fc5e5aeee4ce54be3a766e259b030f3b3cfa9)) - oyvindln +- **(inflate)** use inputwrapper struct instead of iter to simplify input reading and change some data types for performance - ([423bdf8](https://github.com/Frommi/miniz_oxide/commit/423bdf84360c087bea6d3e2b463f3c3a2c1a2867)) - oyvindln +- **(inflate)** don't use lookup table on aarch64 and loong since we have bit rev instruction there, fix clippy warnings and fix conditional in tree_lookup that seemed to break perf - ([083e4b3](https://github.com/Frommi/miniz_oxide/commit/083e4b3e66e9e4e45e7c48a56481d62ee6a78bce)) - oyvindln +- **(inflate)** fill fast lookup table with invalid code value instead of zero so we can avoid check in hot code path givin a small performance boost - ([f73e6a4](https://github.com/Frommi/miniz_oxide/commit/f73e6a4600fbfa795d500d45caef4d48f8c85eff)) - oyvindln +- **(inflate)** skip pointlessly clearing unused huffman code length tree - ([b3b1604](https://github.com/Frommi/miniz_oxide/commit/b3b16048bd459782964f10a23aef63bf058389d5)) - oyvindln +- **(inflate)** use built in fill instead of custom memset function - ([e6ee54e](https://github.com/Frommi/miniz_oxide/commit/e6ee54e82c16ddccb6b55d5a20b8aa5cb4669ca0)) - oyvindln +- **(tests)** change workflow to use rust 1.56.0 - ([7258c06](https://github.com/Frommi/miniz_oxide/commit/7258c064bf39cc124210546d535d82c9c6cd1b5f)) - oyvindln +- **(deflate)** set min window bits in inflate header when using rle - ([02a8857](https://github.com/Frommi/miniz_oxide/commit/02a88571dcc58182df15abb5c1b0410bbd5db428)) - oyvindln +- **(inflate)** Derive Clone for InflateState to allow random-access reads ([#157](https://github.com/Frommi/miniz_oxide/issues/157)) - ([0a33eff](https://github.com/Frommi/miniz_oxide/commit/0a33effd414711b379e01b0613ba5ae85a0e14d0)) - Phil Hord + --- ## [0.8.0](https://github.com/Frommi/miniz_oxide/compare/0.7.4..0.8.0) - 2024-08-08 diff --git a/LICENSE-MIT.md b/LICENSE-MIT.md index 64c53792..ec71b31c 100644 --- a/LICENSE-MIT.md +++ b/LICENSE-MIT.md @@ -1,6 +1,9 @@ MIT License +Copyright 2013-2014 RAD Game Tools and Valve Software +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC Copyright (c) 2017 Frommi +Copyright (c) 2017-2024 oyvindln Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index ade0dcb6..1fc5700c 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -9,14 +9,12 @@ publish = false cargo-fuzz = true [dependencies] -libc="0.2.22" +libfuzzer-sys="0.4.0" [dependencies.miniz_oxide_c_api] path = ".." [dependencies.miniz_oxide] path = "../miniz_oxide" -[dependencies.libfuzzer-sys] -git = "https://github.com/rust-fuzz/libfuzzer-sys.git" # Prevent this from interfering with workspaces [workspace] diff --git a/fuzz/seeds/inflate_nonwrapping/empty_comp b/fuzz/seeds/inflate_nonwrapping/empty_comp new file mode 100644 index 00000000..01e9e398 Binary files /dev/null and b/fuzz/seeds/inflate_nonwrapping/empty_comp differ diff --git a/fuzz/seeds/inflate_nonwrapping/issue_130_table_size.bin b/fuzz/seeds/inflate_nonwrapping/issue_130_table_size.bin new file mode 100644 index 00000000..2e80c50b --- /dev/null +++ b/fuzz/seeds/inflate_nonwrapping/issue_130_table_size.bin @@ -0,0 +1,2 @@ +x$I$IGDDfffVUUUUwwwwwtwwwwWWUUUUffFFD +LfWwuwwOLG=>c? ^ \ No newline at end of file diff --git a/miniz_oxide/Cargo.toml b/miniz_oxide/Cargo.toml index dab4e338..b4949c39 100644 --- a/miniz_oxide/Cargo.toml +++ b/miniz_oxide/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "miniz_oxide" -authors = ["Frommi ", "oyvindln "] -version = "0.8.0" +authors = ["Frommi ", "oyvindln ", "Rich Geldreich richgel99@gmail.com"] +version = "0.8.5" license = "MIT OR Zlib OR Apache-2.0" readme = "Readme.md" keywords = ["zlib", "miniz", "deflate", "encoding"] @@ -26,6 +26,15 @@ core = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-core alloc = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-alloc' } compiler_builtins = { version = '0.1.2', optional = true } +[dev-dependencies] +## Messes with minimum rust version and drags in deps just for running tests +## so just comment out for now and enable manually when needed for enabling benches +#criterion = "0.5" + +[[bench]] +name = "benchmark" +harness = false + [features] default = ["with-alloc"] with-alloc = [] diff --git a/miniz_oxide/LICENSE b/miniz_oxide/LICENSE index 64c53792..a1980025 100644 --- a/miniz_oxide/LICENSE +++ b/miniz_oxide/LICENSE @@ -1,6 +1,10 @@ MIT License +Copyright 2013-2014 RAD Game Tools and Valve Software +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC Copyright (c) 2017 Frommi +Copyright (c) 2017-2024 oyvindln + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/miniz_oxide/LICENSE-MIT.md b/miniz_oxide/LICENSE-MIT.md index 64c53792..ec71b31c 100644 --- a/miniz_oxide/LICENSE-MIT.md +++ b/miniz_oxide/LICENSE-MIT.md @@ -1,6 +1,9 @@ MIT License +Copyright 2013-2014 RAD Game Tools and Valve Software +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC Copyright (c) 2017 Frommi +Copyright (c) 2017-2024 oyvindln Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/miniz_oxide/LICENSE-ZLIB.md b/miniz_oxide/LICENSE-ZLIB.md index 7f513d1a..92871cd2 100644 --- a/miniz_oxide/LICENSE-ZLIB.md +++ b/miniz_oxide/LICENSE-ZLIB.md @@ -1,4 +1,7 @@ +Copyright 2013-2014 RAD Game Tools and Valve Software +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC Copyright (c) 2020 Frommi +Copyright (c) 2017-2024 oyvindln This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. diff --git a/miniz_oxide/Readme.md b/miniz_oxide/Readme.md index 6c177b0e..6ef5c3a1 100644 --- a/miniz_oxide/Readme.md +++ b/miniz_oxide/Readme.md @@ -25,7 +25,7 @@ Simple compression/decompression: ```rust use miniz_oxide::deflate::compress_to_vec; -use miniz_oxide::inflate::decompress_to_vec; +use miniz_oxide::inflate::decompress_to_vec_with_limit; fn roundtrip(data: &[u8]) { // Compress the input diff --git a/miniz_oxide/benches/benchmark.rs b/miniz_oxide/benches/benchmark.rs new file mode 100644 index 00000000..c6fefe07 --- /dev/null +++ b/miniz_oxide/benches/benchmark.rs @@ -0,0 +1,57 @@ +extern crate criterion; + +use std::hint::black_box; +use std::io::Read; + +use criterion::{criterion_group, criterion_main, Criterion}; +use miniz_oxide::deflate::{compress_to_vec, compress_to_vec_zlib}; +use miniz_oxide::inflate::{decompress_to_vec, decompress_to_vec_zlib}; + +fn get_test_file_data(name: &str) -> Vec { + use std::fs::File; + let mut input = Vec::new(); + let mut f = File::open(name).unwrap(); + + f.read_to_end(&mut input).unwrap(); + input +} + +fn get_test_data() -> Vec { + use std::env; + let path = env::var("TEST_FILE").unwrap_or_else(|_| "../miniz/miniz.c".to_string()); + get_test_file_data(&path) +} + +fn bench_inflate(c: &mut Criterion) { + let data = get_test_data(); + let compressed = compress_to_vec(&data, 6); + c.bench_function("inflate_raw", |b| { + b.iter(|| decompress_to_vec(black_box(&compressed))) + }); + let compressed_zlib = compress_to_vec_zlib(&data, 6); + c.bench_function("inflate_zlib", |b| { + b.iter(|| decompress_to_vec_zlib(black_box(&compressed_zlib))) + }); +} + +fn bench_deflate(c: &mut Criterion) { + let data = get_test_data(); + c.bench_function("deflate_l6_raw", |b| { + b.iter(|| compress_to_vec(black_box(&data), 6)) + }); + c.bench_function("deflate_zlib_l6", |b| { + b.iter(|| compress_to_vec_zlib(black_box(&data), 6)) + }); + c.bench_function("deflate_l1_raw", |b| { + b.iter(|| compress_to_vec(black_box(&data), 1)) + }); + c.bench_function("deflate_zlib_l1", |b| { + b.iter(|| compress_to_vec_zlib(black_box(&data), 1)) + }); + c.bench_function("deflate_l0_raw", |b| { + b.iter(|| compress_to_vec(black_box(&data), 0)) + }); +} + +criterion_group!(benches, bench_inflate, bench_deflate); +criterion_main!(benches); diff --git a/miniz_oxide/src/deflate/buffer.rs b/miniz_oxide/src/deflate/buffer.rs index f246c07d..c3c4dcb9 100644 --- a/miniz_oxide/src/deflate/buffer.rs +++ b/miniz_oxide/src/deflate/buffer.rs @@ -3,6 +3,8 @@ //! static length info. use crate::deflate::core::{LZ_DICT_SIZE, MAX_MATCH_LEN}; +use alloc::boxed::Box; +use alloc::vec; /// Size of the buffer of lz77 encoded data. pub const LZ_CODE_BUF_SIZE: usize = 64 * 1024; @@ -23,24 +25,29 @@ pub fn update_hash(current_hash: u16, byte: u8) -> u16 { } pub struct HashBuffers { - pub dict: [u8; LZ_DICT_FULL_SIZE], - pub next: [u16; LZ_DICT_SIZE], - pub hash: [u16; LZ_DICT_SIZE], + pub dict: Box<[u8; LZ_DICT_FULL_SIZE]>, + pub next: Box<[u16; LZ_DICT_SIZE]>, + pub hash: Box<[u16; LZ_DICT_SIZE]>, } impl HashBuffers { #[inline] pub fn reset(&mut self) { - *self = HashBuffers::default(); + self.dict.fill(0); + self.next.fill(0); + self.hash.fill(0); } } impl Default for HashBuffers { fn default() -> HashBuffers { HashBuffers { - dict: [0; LZ_DICT_FULL_SIZE], - next: [0; LZ_DICT_SIZE], - hash: [0; LZ_DICT_SIZE], + dict: vec![0; LZ_DICT_FULL_SIZE] + .into_boxed_slice() + .try_into() + .unwrap(), + next: vec![0; LZ_DICT_SIZE].into_boxed_slice().try_into().unwrap(), + hash: vec![0; LZ_DICT_SIZE].into_boxed_slice().try_into().unwrap(), } } } diff --git a/miniz_oxide/src/deflate/core.rs b/miniz_oxide/src/deflate/core.rs index 3e822a20..dc2a15ab 100644 --- a/miniz_oxide/src/deflate/core.rs +++ b/miniz_oxide/src/deflate/core.rs @@ -11,17 +11,19 @@ use crate::deflate::buffer::{ update_hash, HashBuffers, LocalBuf, LZ_CODE_BUF_SIZE, LZ_DICT_FULL_SIZE, LZ_HASH_BITS, LZ_HASH_SHIFT, LZ_HASH_SIZE, OUT_BUF_SIZE, }; +use crate::deflate::stored::compress_stored; +use crate::deflate::zlib; use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER, MZ_ADLER32_INIT}; use crate::DataFormat; // Currently not bubbled up outside this module, so can fill in with more // context eventually if needed. type Result = core::result::Result; -struct Error {} +pub(crate) struct Error {} -const MAX_PROBES_MASK: i32 = 0xFFF; +pub(crate) const MAX_PROBES_MASK: u32 = 0xFFF; -const MAX_SUPPORTED_HUFF_CODESIZE: usize = 32; +const MAX_SUPPORTED_HUFF_CODESIZE: usize = 15; /// Length code for length values. #[rustfmt::skip] @@ -157,7 +159,7 @@ const BITMASKS: [u32; 17] = [ /// The maximum number of checks for matches in the hash table the compressor will make for each /// compression level. -const NUM_PROBES: [u32; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500]; +pub(crate) const NUM_PROBES: [u16; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500]; #[derive(Copy, Clone)] struct SymFreq { @@ -206,6 +208,13 @@ pub enum CompressionStrategy { Fixed = 4, } +impl From for i32 { + #[inline(always)] + fn from(value: CompressionStrategy) -> Self { + value as i32 + } +} + /// A list of deflate flush types. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum TDEFLFlush { @@ -290,111 +299,13 @@ const MAX_HUFF_SYMBOLS_2: usize = 19; /// Size of the chained hash table. pub(crate) const LZ_DICT_SIZE: usize = 32_768; /// Mask used when stepping through the hash chains. -const LZ_DICT_SIZE_MASK: usize = (LZ_DICT_SIZE as u32 - 1) as usize; +pub(crate) const LZ_DICT_SIZE_MASK: usize = (LZ_DICT_SIZE as u32 - 1) as usize; /// The minimum length of a match. -const MIN_MATCH_LEN: u8 = 3; +pub(crate) const MIN_MATCH_LEN: u8 = 3; /// The maximum length of a match. pub(crate) const MAX_MATCH_LEN: usize = 258; -const DEFAULT_FLAGS: u32 = NUM_PROBES[4] | TDEFL_WRITE_ZLIB_HEADER; - -mod zlib { - const DEFAULT_CM: u8 = 8; - const DEFAULT_CINFO: u8 = 7 << 4; - const _DEFAULT_FDICT: u8 = 0; - const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO; - /// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid. - const FCHECK_DIVISOR: u8 = 31; - - /// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the - /// specification, i.e (CMF*256 + FCHK) % 31 = 0. - /// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored). - fn add_fcheck(cmf: u8, flg: u8) -> u8 { - let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR); - - // Clear existing FCHECK if any - let flg = flg & 0b11100000; - - // Casting is safe as rem can't overflow since it is a value mod 31 - // We can simply add the value to flg as (31 - rem) will never be above 2^5 - flg + (FCHECK_DIVISOR - rem as u8) - } - - const fn zlib_level_from_flags(flags: u32) -> u8 { - use super::NUM_PROBES; - - let num_probes = flags & (super::MAX_PROBES_MASK as u32); - if flags & super::TDEFL_GREEDY_PARSING_FLAG != 0 { - if num_probes <= 1 { - 0 - } else { - 1 - } - } else if num_probes >= NUM_PROBES[9] { - 3 - } else { - 2 - } - } - - /// Get the zlib header for the level using the default window size and no - /// dictionary. - fn header_from_level(level: u8) -> [u8; 2] { - let cmf = DEFAULT_CMF; - [cmf, add_fcheck(cmf, level << 6)] - } - - /// Create a zlib header from the given compression flags. - /// Only level is considered. - pub fn header_from_flags(flags: u32) -> [u8; 2] { - let level = zlib_level_from_flags(flags); - header_from_level(level) - } - - #[cfg(test)] - mod test { - #[test] - fn zlib() { - use super::super::*; - use super::*; - - let test_level = |level, expected| { - let flags = create_comp_flags_from_zip_params( - level, - MZ_DEFAULT_WINDOW_BITS, - CompressionStrategy::Default as i32, - ); - assert_eq!(zlib_level_from_flags(flags), expected); - }; - - assert_eq!(zlib_level_from_flags(DEFAULT_FLAGS), 2); - test_level(0, 0); - test_level(1, 0); - test_level(2, 1); - test_level(3, 1); - for i in 4..=8 { - test_level(i, 2) - } - test_level(9, 3); - test_level(10, 3); - } - - #[test] - fn test_header() { - let header = super::header_from_level(3); - assert_eq!( - ((usize::from(header[0]) * 256) + usize::from(header[1])) % 31, - 0 - ); - } - } -} - -fn memset(slice: &mut [T], val: T) { - for x in slice { - *x = val - } -} +pub(crate) const DEFAULT_FLAGS: u32 = NUM_PROBES[4] as u32 | TDEFL_WRITE_ZLIB_HEADER; #[cfg(test)] #[inline] @@ -405,19 +316,19 @@ fn write_u16_le(val: u16, slice: &mut [u8], pos: usize) { // Read the two bytes starting at pos and interpret them as an u16. #[inline] -const fn read_u16_le(slice: &[u8], pos: usize) -> u16 { +const fn read_u16_le(slice: &[u8; N], pos: usize) -> u16 { // The compiler is smart enough to optimize this into an unaligned load. slice[pos] as u16 | ((slice[pos + 1] as u16) << 8) } /// Main compression struct. pub struct CompressorOxide { - lz: LZOxide, - params: ParamsOxide, + pub(crate) lz: LZOxide, + pub(crate) params: ParamsOxide, /// Put HuffmanOxide on the heap with default trick to avoid /// excessive stack copies. - huff: Box, - dict: DictOxide, + pub(crate) huff: Box, + pub(crate) dict: DictOxide, } impl CompressorOxide { @@ -532,7 +443,7 @@ pub struct CallbackFunc<'a> { pub put_buf_func: &'a mut dyn FnMut(&[u8]) -> bool, } -impl<'a> CallbackFunc<'a> { +impl CallbackFunc<'_> { fn flush_output( &mut self, saved_output: SavedOutputBufferOxide, @@ -556,7 +467,7 @@ struct CallbackBuf<'a> { pub out_buf: &'a mut [u8], } -impl<'a> CallbackBuf<'a> { +impl CallbackBuf<'_> { fn flush_output( &mut self, saved_output: SavedOutputBufferOxide, @@ -585,7 +496,7 @@ enum CallbackOut<'a> { Buf(CallbackBuf<'a>), } -impl<'a> CallbackOut<'a> { +impl CallbackOut<'_> { fn new_output_buffer<'b>( &'b mut self, local_buf: &'b mut [u8], @@ -614,7 +525,7 @@ impl<'a> CallbackOut<'a> { } } -struct CallbackOxide<'a> { +pub(crate) struct CallbackOxide<'a> { in_buf: Option<&'a [u8]>, in_buf_size: Option<&'a mut usize>, out_buf_size: Option<&'a mut usize>, @@ -665,6 +576,10 @@ impl<'a> CallbackOxide<'a> { CallbackOut::Buf(ref mut cb) => cb.flush_output(saved_output, params), } } + + pub(crate) fn buf(&mut self) -> Option<&'a [u8]> { + self.in_buf + } } struct OutputBufferOxide<'a> { @@ -676,7 +591,10 @@ struct OutputBufferOxide<'a> { pub bits_in: u32, } -impl<'a> OutputBufferOxide<'a> { +impl OutputBufferOxide<'_> { + /// Write bits to the bit buffer and flushes + /// the bit buffer so any whole bytes are output + /// to the underlying buffer. fn put_bits(&mut self, bits: u32, len: u32) { // TODO: Removing this assertion worsens performance // Need to figure out why @@ -692,6 +610,14 @@ impl<'a> OutputBufferOxide<'a> { } } + #[inline] + /// Write the provided bits to the bit buffer without flushing + /// anything. Does not check if there is actually space for it. + fn put_bits_no_flush(&mut self, bits: u32, len: u32) { + self.bit_buffer |= bits << self.bits_in; + self.bits_in += len; + } + const fn save(&self) -> SavedOutputBufferOxide { SavedOutputBufferOxide { pos: self.inner_pos, @@ -708,12 +634,22 @@ impl<'a> OutputBufferOxide<'a> { self.local = saved.local; } + #[inline] + /// Pad the bit buffer to a whole byte with + /// zeroes and write that byte to the output buffer. fn pad_to_bytes(&mut self) { if self.bits_in != 0 { let len = 8 - self.bits_in; self.put_bits(0, len); } } + + #[inline] + fn write_bytes(&mut self, bytes: &[u8]) { + debug_assert_eq!(self.bits_in, 0); + self.inner[self.inner_pos..self.inner_pos + bytes.len()].copy_from_slice(bytes); + self.inner_pos += bytes.len(); + } } struct SavedOutputBufferOxide { @@ -757,7 +693,7 @@ impl BitBuffer { /// NOTE: Only the literal/lengths have enough symbols to actually use /// the full array. It's unclear why it's defined like this in miniz, /// it could be for cache/alignment reasons. -struct HuffmanOxide { +pub(crate) struct HuffmanOxide { /// Number of occurrences of each symbol. pub count: [[u16; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], /// The bits of the huffman code assigned to the symbol @@ -776,7 +712,7 @@ const HUFF_CODES_TABLE: usize = 2; /// Status of RLE encoding of huffman code lengths. struct Rle { pub z_count: u32, - pub repeat_count: u32, + pub repeat_count: u16, pub prev_code_size: u8, } @@ -792,7 +728,7 @@ impl Rle { if self.repeat_count != 0 { if self.repeat_count < 3 { counts[self.prev_code_size as usize] = - counts[self.prev_code_size as usize].wrapping_add(self.repeat_count as u16); + counts[self.prev_code_size as usize].wrapping_add(self.repeat_count); let code = self.prev_code_size; write(&[code, code, code][..self.repeat_count as usize])?; } else { @@ -977,7 +913,7 @@ impl HuffmanOxide { code_size_limit: usize, static_table: bool, ) { - let mut num_codes = [0i32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; + let mut num_codes = [0i32; 32 + 1]; let mut next_code = [0u32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; if static_table { @@ -1017,8 +953,8 @@ impl HuffmanOxide { Self::enforce_max_code_size(&mut num_codes, num_used_symbols, code_size_limit); - memset(&mut self.code_sizes[table_num][..], 0); - memset(&mut self.codes[table_num][..], 0); + self.code_sizes[table_num].fill(0); + self.codes[table_num].fill(0); let mut last = num_used_symbols; for (i, &num_item) in num_codes @@ -1051,25 +987,23 @@ impl HuffmanOxide { continue; } - let mut code = next_code[code_size as usize]; + let code = next_code[code_size as usize]; + next_code[code_size as usize] += 1; - let mut rev_code = 0; - for _ in 0..code_size { - rev_code = (rev_code << 1) | (code & 1); - code >>= 1; - } - *huff_code = rev_code as u16; + let rev_code = (code as u16).reverse_bits() >> (16 - code_size); + + *huff_code = rev_code; } } fn start_static_block(&mut self, output: &mut OutputBufferOxide) { - memset(&mut self.code_sizes[LITLEN_TABLE][0..144], 8); - memset(&mut self.code_sizes[LITLEN_TABLE][144..256], 9); - memset(&mut self.code_sizes[LITLEN_TABLE][256..280], 7); - memset(&mut self.code_sizes[LITLEN_TABLE][280..288], 8); + self.code_sizes[LITLEN_TABLE][0..144].fill(8); + self.code_sizes[LITLEN_TABLE][144..256].fill(9); + self.code_sizes[LITLEN_TABLE][256..280].fill(7); + self.code_sizes[LITLEN_TABLE][280..288].fill(8); - memset(&mut self.code_sizes[DIST_TABLE][..32], 5); + self.code_sizes[DIST_TABLE][..32].fill(5); self.optimize_table(LITLEN_TABLE, 288, 15, true); self.optimize_table(DIST_TABLE, 32, 15, true); @@ -1114,7 +1048,7 @@ impl HuffmanOxide { prev_code_size: 0xFF, }; - memset(&mut self.count[HUFF_CODES_TABLE][..MAX_HUFF_SYMBOLS_2], 0); + self.count[HUFF_CODES_TABLE][..MAX_HUFF_SYMBOLS_2].fill(0); let mut packed_pos = 0; for &code_size in &code_sizes_to_pack[..total_code_sizes_to_pack] { @@ -1149,10 +1083,10 @@ impl HuffmanOxide { self.optimize_table(2, MAX_HUFF_SYMBOLS_2, 7, false); - output.put_bits(2, 2); + output.put_bits_no_flush(2, 2); - output.put_bits((num_lit_codes - 257) as u32, 5); - output.put_bits((num_dist_codes - 1) as u32, 5); + output.put_bits_no_flush((num_lit_codes - 257) as u32, 5); + output.put_bits_no_flush((num_dist_codes - 1) as u32, 5); let mut num_bit_lengths = 18 - HUFFMAN_LENGTH_ORDER @@ -1192,18 +1126,19 @@ impl HuffmanOxide { } } -struct DictOxide { +pub(crate) struct DictOxide { /// The maximum number of checks in the hash chain, for the initial, /// and the lazy match respectively. pub max_probes: [u32; 2], /// Buffer of input data. /// Padded with 1 byte to simplify matching code in `compress_fast`. - pub b: Box, + pub b: HashBuffers, pub code_buf_dict_pos: usize, pub lookahead_size: usize, pub lookahead_pos: usize, pub size: usize, + loop_len: u8, } const fn probes_from_flags(flags: u32) -> [u32; 2] { @@ -1217,11 +1152,12 @@ impl DictOxide { fn new(flags: u32) -> Self { DictOxide { max_probes: probes_from_flags(flags), - b: Box::default(), + b: HashBuffers::default(), code_buf_dict_pos: 0, lookahead_size: 0, lookahead_pos: 0, size: 0, + loop_len: 32, } } @@ -1265,13 +1201,6 @@ impl DictOxide { u64::from_le_bytes(bytes) } - /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of - /// type T. - #[inline] - fn read_as_u16(&self, pos: usize) -> u16 { - read_u16_le(&self.b.dict[..], pos) - } - /// Try to find a match for the data at lookahead_pos in the dictionary that is /// longer than `match_len`. /// Returns a tuple containing (match_distance, match_length). Will be equal to the input @@ -1292,20 +1221,24 @@ impl DictOxide { let max_match_len = cmp::min(MAX_MATCH_LEN as u32, max_match_len); match_len = cmp::max(match_len, 1); - let pos = lookahead_pos & LZ_DICT_SIZE_MASK; - let mut probe_pos = pos; - // Number of probes into the hash chains. - let mut num_probes_left = self.max_probes[(match_len >= 32) as usize]; - // If we already have a match of the full length don't bother searching for another one. if max_match_len <= match_len { return (match_dist, match_len); } + let pos = lookahead_pos & LZ_DICT_SIZE_MASK; + let mut probe_pos = pos; + // Number of probes into the hash chains. + let mut num_probes_left = if match_len < 32 { + self.max_probes[0] + } else { + self.max_probes[1] + }; + // Read the last byte of the current match, and the next one, used to compare matches. - let mut c01: u16 = self.read_as_u16(pos + match_len as usize - 1); + let mut c01: u16 = read_u16_le(&self.b.dict, pos + match_len as usize - 1); // Read the two bytes at the end position of the current match. - let s01: u16 = self.read_as_u16(pos); + let s01: u16 = read_u16_le(&self.b.dict, pos); 'outer: loop { let mut dist; @@ -1332,7 +1265,8 @@ impl DictOxide { // position to match against. probe_pos = next_probe_pos & LZ_DICT_SIZE_MASK; - if self.read_as_u16(probe_pos + match_len as usize - 1) == c01 { + // TODO: This bounds check does not get optimized out + if read_u16_le(&self.b.dict, probe_pos + match_len as usize - 1) == c01 { break 'found; } } @@ -1345,14 +1279,17 @@ impl DictOxide { } // Check if the two first bytes match. - if self.read_as_u16(probe_pos) != s01 { + if read_u16_le(&self.b.dict, probe_pos) != s01 { continue; } let mut p = pos + 2; let mut q = probe_pos + 2; // The first two bytes matched, so check the full length of the match. - for _ in 0..32 { + // TODO: This is a workaround for an upstream issue introduced after a LLVM upgrade in rust 1.82. + // the compiler is too smart and ends up unrolling the loop which causes the performance to get worse + // Using a variable instead of a constant here to prevent it seems to at least get back some of the performance loss. + for _ in 0..self.loop_len as i32 { let p_data: u64 = self.read_unaligned_u64(p); let q_data: u64 = self.read_unaligned_u64(q); // Compare of 8 bytes at a time by using unaligned loads of 64-bit integers. @@ -1375,7 +1312,7 @@ impl DictOxide { } // We found a better match, so save the last two bytes for further match // comparisons. - c01 = self.read_as_u16(pos + match_len as usize - 1) + c01 = read_u16_le(&self.b.dict, pos + match_len as usize - 1); } continue 'outer; } @@ -1386,7 +1323,7 @@ impl DictOxide { } } -struct ParamsOxide { +pub(crate) struct ParamsOxide { pub flags: u32, pub greedy_parsing: bool, pub block_index: u32, @@ -1461,7 +1398,7 @@ impl ParamsOxide { } } -struct LZOxide { +pub(crate) struct LZOxide { pub codes: [u8; LZ_CODE_BUF_SIZE], pub code_position: usize, pub flag_position: usize, @@ -1521,7 +1458,8 @@ impl LZOxide { fn compress_lz_codes( huff: &HuffmanOxide, output: &mut OutputBufferOxide, - lz_code_buf: &[u8], + lz_code_buf: &[u8; LZ_CODE_BUF_SIZE], + lz_code_buf_used_len: usize, ) -> Result { let mut flags = 1; let mut bb = BitBuffer { @@ -1529,8 +1467,12 @@ fn compress_lz_codes( bits_in: output.bits_in, }; + // Help out the compiler know this variable won't be larger than + // the buffer length since the constants won't propagate through the function call. + let lz_code_buf_used_len = cmp::min(lz_code_buf.len(), lz_code_buf_used_len); + let mut i: usize = 0; - while i < lz_code_buf.len() { + while i < lz_code_buf_used_len { if flags == 1 { flags = u32::from(lz_code_buf[i]) | 0x100; i += 1; @@ -1580,7 +1522,7 @@ fn compress_lz_codes( // The lz code was a literal for _ in 0..3 { flags >>= 1; - let lit = lz_code_buf[i]; + let lit = lz_code_buf[i & (LZ_CODE_BUF_SIZE - 1)]; i += 1; debug_assert!(huff.code_sizes[0][lit as usize] != 0); @@ -1589,7 +1531,7 @@ fn compress_lz_codes( u32::from(huff.code_sizes[0][lit as usize]), ); - if flags & 1 == 1 || i >= lz_code_buf.len() { + if flags & 1 == 1 || i >= lz_code_buf_used_len { break; } } @@ -1628,10 +1570,10 @@ fn compress_block( huff.start_dynamic_block(output)?; } - compress_lz_codes(huff, output, &lz.codes[..lz.code_position]) + compress_lz_codes(huff, output, &lz.codes, lz.code_position) } -fn flush_block( +pub(crate) fn flush_block( d: &mut CompressorOxide, callback: &mut CallbackOxide, flush: TDEFLFlush, @@ -1644,8 +1586,13 @@ fn flush_block( output.bit_buffer = d.params.saved_bit_buffer; output.bits_in = d.params.saved_bits_in; + // TODO: Don't think this second condition should be here but need to verify. let use_raw_block = (d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0) && (d.dict.lookahead_pos - d.dict.code_buf_dict_pos) <= d.dict.size; + debug_assert_eq!( + use_raw_block, + d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0 + ); assert!(d.params.flush_remaining == 0); d.params.flush_ofs = 0; @@ -1656,7 +1603,7 @@ fn flush_block( // If we are at the start of the stream, write the zlib header if requested. if d.params.flags & TDEFL_WRITE_ZLIB_HEADER != 0 && d.params.block_index == 0 { let header = zlib::header_from_flags(d.params.flags); - output.put_bits(header[0].into(), 8); + output.put_bits_no_flush(header[0].into(), 8); output.put_bits(header[1].into(), 8); } @@ -1692,7 +1639,7 @@ fn flush_block( // Block header. output.put_bits(0, 2); - // Block length has to start on a byte boundary, s opad. + // Block length has to start on a byte boundary, so pad. output.pad_to_bytes(); // Block length and ones complement of block length. @@ -1700,9 +1647,16 @@ fn flush_block( output.put_bits(!d.lz.total_bytes & 0xFFFF, 16); // Write the actual bytes. - for i in 0..d.lz.total_bytes { - let pos = (d.dict.code_buf_dict_pos + i as usize) & LZ_DICT_SIZE_MASK; - output.put_bits(u32::from(d.dict.b.dict[pos]), 8); + let start = d.dict.code_buf_dict_pos & LZ_DICT_SIZE_MASK; + let end = (d.dict.code_buf_dict_pos + d.lz.total_bytes as usize) & LZ_DICT_SIZE_MASK; + let dict = &mut d.dict.b.dict; + if start < end { + // The data does not wrap around. + output.write_bytes(&dict[start..end]); + } else if d.lz.total_bytes > 0 { + // The data wraps around and the input was not 0 bytes. + output.write_bytes(&dict[start..LZ_DICT_SIZE]); + output.write_bytes(&dict[..end]); } } else if !comp_success { output.load(saved_buffer); @@ -1729,9 +1683,10 @@ fn flush_block( } } - memset(&mut d.huff.count[0][..MAX_HUFF_SYMBOLS_0], 0); - memset(&mut d.huff.count[1][..MAX_HUFF_SYMBOLS_1], 0); + d.huff.count[0][..MAX_HUFF_SYMBOLS_0].fill(0); + d.huff.count[1][..MAX_HUFF_SYMBOLS_1].fill(0); + // Clear LZ buffer for the next block. d.lz.code_position = 1; d.lz.flag_position = 0; d.lz.num_flags_left = 8; @@ -1748,7 +1703,7 @@ fn flush_block( Ok(callback.flush_output(saved_buffer, &mut d.params)) } -fn record_literal(h: &mut HuffmanOxide, lz: &mut LZOxide, lit: u8) { +pub(crate) fn record_literal(h: &mut HuffmanOxide, lz: &mut LZOxide, lit: u8) { lz.total_bytes += 1; lz.write_code(lit); @@ -1785,12 +1740,12 @@ fn record_match(h: &mut HuffmanOxide, lz: &mut LZOxide, mut match_len: u32, mut } fn compress_normal(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { - let mut src_pos = d.params.src_pos; let in_buf = match callback.in_buf { None => return true, Some(in_buf) => in_buf, }; + let mut src_pos = d.params.src_pos; let mut lookahead_size = d.dict.lookahead_size; let mut lookahead_pos = d.dict.lookahead_pos; let mut saved_lit = d.params.saved_lit; @@ -1872,9 +1827,9 @@ fn compress_normal(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> boo u32::from(MIN_MATCH_LEN) - 1 }; let cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; - if d.params.flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS) != 0 { + if d.params.flags & TDEFL_RLE_MATCHES != 0 { // If TDEFL_RLE_MATCHES is set, we only look for repeating sequences of the current byte. - if d.dict.size != 0 && d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0 { + if d.dict.size != 0 { let c = d.dict.b.dict[(cur_pos.wrapping_sub(1)) & LZ_DICT_SIZE_MASK]; cur_match_len = d.dict.b.dict[cur_pos..(cur_pos + lookahead_size)] .iter() @@ -1949,11 +1904,10 @@ fn compress_normal(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> boo d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE); let lz_buf_tight = d.lz.code_position > LZ_CODE_BUF_SIZE - 8; - let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0; let fat = ((d.lz.code_position * 115) >> 7) >= d.lz.total_bytes as usize; - let fat_or_raw = (d.lz.total_bytes > 31 * 1024) && (fat || raw); + let buf_fat = (d.lz.total_bytes > 31 * 1024) && fat; - if lz_buf_tight || fat_or_raw { + if lz_buf_tight || buf_fat { d.params.src_pos = src_pos; // These values are used in flush_block, so we need to write them back here. d.dict.lookahead_size = lookahead_size; @@ -2278,13 +2232,15 @@ fn compress_inner( return res; } - let one_probe = d.params.flags & MAX_PROBES_MASK as u32 == 1; + let one_probe = d.params.flags & MAX_PROBES_MASK == 1; let greedy = d.params.flags & TDEFL_GREEDY_PARSING_FLAG != 0; - let filter_or_rle_or_raw = d.params.flags - & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES) - != 0; + let filter_or_rle = d.params.flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS) != 0; - let compress_success = if one_probe && greedy && !filter_or_rle_or_raw { + let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0; + + let compress_success = if raw { + compress_stored(d, callback) + } else if one_probe && greedy && !filter_or_rle { compress_fast(d, callback) } else { compress_normal(d, callback) @@ -2328,8 +2284,8 @@ fn compress_inner( _ => { d.params.finished = d.params.flush == TDEFLFlush::Finish; if d.params.flush == TDEFLFlush::Full { - memset(&mut d.dict.b.hash[..], 0); - memset(&mut d.dict.b.next[..], 0); + d.dict.b.hash.fill(0); + d.dict.b.next.fill(0); d.dict.size = 0; } } @@ -2365,7 +2321,7 @@ pub fn create_comp_flags_from_zip_params(level: i32, window_bits: i32, strategy: } else { 0 }; - let mut comp_flags = NUM_PROBES[num_probes] | greedy; + let mut comp_flags = u32::from(NUM_PROBES[num_probes]) | greedy; if window_bits > 0 { comp_flags |= TDEFL_WRITE_ZLIB_HEADER; @@ -2376,7 +2332,7 @@ pub fn create_comp_flags_from_zip_params(level: i32, window_bits: i32, strategy: } else if strategy == CompressionStrategy::Filtered as i32 { comp_flags |= TDEFL_FILTER_MATCHES; } else if strategy == CompressionStrategy::HuffmanOnly as i32 { - comp_flags &= !MAX_PROBES_MASK as u32; + comp_flags &= !MAX_PROBES_MASK; } else if strategy == CompressionStrategy::Fixed as i32 { comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; } else if strategy == CompressionStrategy::RLE as i32 { @@ -2466,4 +2422,45 @@ mod test { let decoded = decompress_to_vec(&encoded[..]).unwrap(); assert_eq!(&decoded[..], &slice[..]); } + + #[test] + fn zlib_window_bits() { + use crate::inflate::stream::{inflate, InflateState}; + use crate::DataFormat; + use alloc::boxed::Box; + let slice = [ + 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 6, 1, 2, 3, 1, 2, 3, 2, 3, 1, 2, 3, 35, 22, 22, 2, + 6, 2, 6, + ]; + let mut encoded = vec![]; + let flags = create_comp_flags_from_zip_params(2, 1, CompressionStrategy::RLE.into()); + let mut d = CompressorOxide::new(flags); + let (status, in_consumed) = + compress_to_output(&mut d, &slice, TDEFLFlush::Finish, |out: &[u8]| { + encoded.extend_from_slice(out); + true + }); + + assert_eq!(status, TDEFLStatus::Done); + assert_eq!(in_consumed, slice.len()); + + let mut output = vec![0; slice.len()]; + + let mut decompressor = Box::new(InflateState::new(DataFormat::Zlib)); + + let mut out_slice = output.as_mut_slice(); + // Feed 1 byte at a time and no back buffer to test that RLE encoding has been used. + for i in 0..encoded.len() { + let result = inflate( + &mut decompressor, + &encoded[i..i + 1], + out_slice, + crate::MZFlush::None, + ); + out_slice = &mut out_slice[result.bytes_written..]; + } + let cmf = decompressor.decompressor().zlib_header().0; + assert_eq!(cmf, 8); + assert_eq!(output, slice) + } } diff --git a/miniz_oxide/src/deflate/mod.rs b/miniz_oxide/src/deflate/mod.rs index f36f28c8..f31c587c 100644 --- a/miniz_oxide/src/deflate/mod.rs +++ b/miniz_oxide/src/deflate/mod.rs @@ -5,7 +5,9 @@ use crate::alloc::vec::Vec; mod buffer; pub mod core; +mod stored; pub mod stream; +mod zlib; use self::core::*; /// How much processing the compressor should do to compress the data. @@ -188,6 +190,15 @@ mod test { assert_eq!(test_data, d.as_slice()); } + #[test] + fn compress_rle() { + let test_data = b"Deflate late"; + + let res = compress_to_vec_inner(test_data, 1, 0, CompressionStrategy::RLE as i32); + let d = decompress_to_vec(res.as_slice()).expect("Failed to decompress!"); + assert_eq!(test_data, d.as_slice()); + } + /// Test that a raw block compresses fine. #[test] fn compress_raw() { diff --git a/miniz_oxide/src/deflate/stored.rs b/miniz_oxide/src/deflate/stored.rs new file mode 100644 index 00000000..166d31a6 --- /dev/null +++ b/miniz_oxide/src/deflate/stored.rs @@ -0,0 +1,305 @@ +use crate::deflate::buffer::{update_hash, LZ_HASH_SHIFT, LZ_HASH_SIZE}; +use crate::deflate::core::{ + flush_block, CallbackOxide, CompressorOxide, TDEFLFlush, TDEFLStatus, LZ_DICT_SIZE, + LZ_DICT_SIZE_MASK, MAX_MATCH_LEN, MIN_MATCH_LEN, +}; +use core::cmp; + +pub(crate) fn compress_stored(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { + let in_buf = match callback.buf() { + None => return true, + Some(in_buf) => in_buf, + }; + + // Make sure this is cleared in case compression level is switched later. + // TODO: It's possible we don't need this or could do this elsewhere later + // but just do this here to avoid causing issues for now. + d.params.saved_match_len = 0; + let mut bytes_written = d.lz.total_bytes; + let mut src_pos = d.params.src_pos; + let mut lookahead_size = d.dict.lookahead_size; + let mut lookahead_pos = d.dict.lookahead_pos; + + while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) { + let src_buf_left = in_buf.len() - src_pos; + let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size); + + if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1 + && num_bytes_to_process > 0 + { + let dictb = &mut d.dict.b; + + let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; + let mut ins_pos = lookahead_pos + lookahead_size - 2; + // Start the hash value from the first two bytes + let mut hash = update_hash( + u16::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]), + dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK], + ); + + lookahead_size += num_bytes_to_process; + + for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { + // Add byte to input buffer. + dictb.dict[dst_pos] = c; + if dst_pos < MAX_MATCH_LEN - 1 { + dictb.dict[LZ_DICT_SIZE + dst_pos] = c; + } + + // Generate hash from the current byte, + hash = update_hash(hash, c); + dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize]; + // and insert it into the hash chain. + dictb.hash[hash as usize] = ins_pos as u16; + dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK; + ins_pos += 1; + } + src_pos += num_bytes_to_process; + } else { + let dictb = &mut d.dict.b; + for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { + let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; + dictb.dict[dst_pos] = c; + if dst_pos < MAX_MATCH_LEN - 1 { + dictb.dict[LZ_DICT_SIZE + dst_pos] = c; + } + + lookahead_size += 1; + if lookahead_size + d.dict.size >= MIN_MATCH_LEN.into() { + let ins_pos = lookahead_pos + lookahead_size - 3; + let hash = ((u32::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]) + << (LZ_HASH_SHIFT * 2)) + ^ ((u32::from(dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK]) + << LZ_HASH_SHIFT) + ^ u32::from(c))) + & (LZ_HASH_SIZE as u32 - 1); + + dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize]; + dictb.hash[hash as usize] = ins_pos as u16; + } + } + + src_pos += num_bytes_to_process; + } + + d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); + if d.params.flush == TDEFLFlush::None && lookahead_size < MAX_MATCH_LEN { + break; + } + + let len_to_move = 1; + + bytes_written += 1; + + lookahead_pos += len_to_move; + assert!(lookahead_size >= len_to_move); + lookahead_size -= len_to_move; + d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE); + + if bytes_written > 31 * 1024 { + d.lz.total_bytes = bytes_written; + + d.params.src_pos = src_pos; + // These values are used in flush_block, so we need to write them back here. + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + + let n = flush_block(d, callback, TDEFLFlush::None) + .unwrap_or(TDEFLStatus::PutBufFailed as i32); + if n != 0 { + return n > 0; + } + bytes_written = d.lz.total_bytes; + } + } + + d.lz.total_bytes = bytes_written; + d.params.src_pos = src_pos; + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + true +} + +/* +fn compress_rle(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { + let mut src_pos = d.params.src_pos; + let in_buf = match callback.in_buf { + None => return true, + Some(in_buf) => in_buf, + }; + + let mut lookahead_size = d.dict.lookahead_size; + let mut lookahead_pos = d.dict.lookahead_pos; + let mut saved_lit = d.params.saved_lit; + let mut saved_match_dist = d.params.saved_match_dist; + let mut saved_match_len = d.params.saved_match_len; + + while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) { + let src_buf_left = in_buf.len() - src_pos; + let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size); + + if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1 + && num_bytes_to_process > 0 + { + let dictb = &mut d.dict.b; + + let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; + let mut ins_pos = lookahead_pos + lookahead_size - 2; + // Start the hash value from the first two bytes + let mut hash = update_hash( + u16::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]), + dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK], + ); + + lookahead_size += num_bytes_to_process; + + for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { + // Add byte to input buffer. + dictb.dict[dst_pos] = c; + if dst_pos < MAX_MATCH_LEN - 1 { + dictb.dict[LZ_DICT_SIZE + dst_pos] = c; + } + + // Generate hash from the current byte, + hash = update_hash(hash, c); + dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize]; + // and insert it into the hash chain. + dictb.hash[hash as usize] = ins_pos as u16; + dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK; + ins_pos += 1; + } + src_pos += num_bytes_to_process; + } else { + let dictb = &mut d.dict.b; + for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { + let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; + dictb.dict[dst_pos] = c; + if dst_pos < MAX_MATCH_LEN - 1 { + dictb.dict[LZ_DICT_SIZE + dst_pos] = c; + } + + lookahead_size += 1; + if lookahead_size + d.dict.size >= MIN_MATCH_LEN.into() { + let ins_pos = lookahead_pos + lookahead_size - 3; + let hash = ((u32::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]) + << (LZ_HASH_SHIFT * 2)) + ^ ((u32::from(dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK]) + << LZ_HASH_SHIFT) + ^ u32::from(c))) + & (LZ_HASH_SIZE as u32 - 1); + + dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize]; + dictb.hash[hash as usize] = ins_pos as u16; + } + } + + src_pos += num_bytes_to_process; + } + + d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); + if d.params.flush == TDEFLFlush::None && lookahead_size < MAX_MATCH_LEN { + break; + } + + let mut len_to_move = 1; + let mut cur_match_dist = 0; + let mut cur_match_len = if saved_match_len != 0 { + saved_match_len + } else { + u32::from(MIN_MATCH_LEN) - 1 + }; + let cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; + // If TDEFL_RLE_MATCHES is set, we only look for repeating sequences of the current byte. + if d.dict.size != 0 && d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0 { + let c = d.dict.b.dict[(cur_pos.wrapping_sub(1)) & LZ_DICT_SIZE_MASK]; + cur_match_len = d.dict.b.dict[cur_pos..(cur_pos + lookahead_size)] + .iter() + .take_while(|&x| *x == c) + .count() as u32; + if cur_match_len < MIN_MATCH_LEN.into() { + cur_match_len = 0 + } else { + cur_match_dist = 1 + } + } + + + let far_and_small = cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024; + let filter_small = d.params.flags & TDEFL_FILTER_MATCHES != 0 && cur_match_len <= 5; + if far_and_small || filter_small || cur_pos == cur_match_dist as usize { + cur_match_dist = 0; + cur_match_len = 0; + } + + if saved_match_len != 0 { + if cur_match_len > saved_match_len { + record_literal(&mut d.huff, &mut d.lz, saved_lit); + if cur_match_len >= 128 { + record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); + saved_match_len = 0; + len_to_move = cur_match_len as usize; + } else { + saved_lit = d.dict.b.dict[cur_pos]; + saved_match_dist = cur_match_dist; + saved_match_len = cur_match_len; + } + } else { + record_match(&mut d.huff, &mut d.lz, saved_match_len, saved_match_dist); + len_to_move = (saved_match_len - 1) as usize; + saved_match_len = 0; + } + } else if cur_match_dist == 0 { + record_literal( + &mut d.huff, + &mut d.lz, + d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)], + ); + } else if d.params.greedy_parsing + || (d.params.flags & TDEFL_RLE_MATCHES != 0) + || cur_match_len >= 128 + { + // If we are using lazy matching, check for matches at the next byte if the current + // match was shorter than 128 bytes. + record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); + len_to_move = cur_match_len as usize; + } else { + saved_lit = d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)]; + saved_match_dist = cur_match_dist; + saved_match_len = cur_match_len; + } + + lookahead_pos += len_to_move; + assert!(lookahead_size >= len_to_move); + lookahead_size -= len_to_move; + d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE); + + let lz_buf_tight = d.lz.code_position > LZ_CODE_BUF_SIZE - 8; + let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0; + let fat = ((d.lz.code_position * 115) >> 7) >= d.lz.total_bytes as usize; + let fat_or_raw = (d.lz.total_bytes > 31 * 1024) && (fat || raw); + + if lz_buf_tight || fat_or_raw { + d.params.src_pos = src_pos; + // These values are used in flush_block, so we need to write them back here. + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + + let n = flush_block(d, callback, TDEFLFlush::None) + .unwrap_or(TDEFLStatus::PutBufFailed as i32); + if n != 0 { + d.params.saved_lit = saved_lit; + d.params.saved_match_dist = saved_match_dist; + d.params.saved_match_len = saved_match_len; + return n > 0; + } + } + } + + d.params.src_pos = src_pos; + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + d.params.saved_lit = saved_lit; + d.params.saved_match_dist = saved_match_dist; + d.params.saved_match_len = saved_match_len; + true +}*/ diff --git a/miniz_oxide/src/deflate/zlib.rs b/miniz_oxide/src/deflate/zlib.rs new file mode 100644 index 00000000..281c4f17 --- /dev/null +++ b/miniz_oxide/src/deflate/zlib.rs @@ -0,0 +1,112 @@ +use crate::deflate::core::deflate_flags::{ + TDEFL_FORCE_ALL_RAW_BLOCKS, TDEFL_GREEDY_PARSING_FLAG, TDEFL_RLE_MATCHES, +}; + +const DEFAULT_CM: u8 = 8; +const DEFAULT_CINFO: u8 = 7 << 4; +const _DEFAULT_FDICT: u8 = 0; +const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO; +// CMF used for RLE (technically it uses a window size of 0 but the lowest that can +// be specified in the header corresponds to a window size of 1 << (0 + 8) aka 256. +const MIN_CMF: u8 = DEFAULT_CM; // | 0 +/// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid. +const FCHECK_DIVISOR: u8 = 31; + +/// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the +/// specification, i.e (CMF*256 + FCHK) % 31 = 0. +/// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored). +#[inline] +fn add_fcheck(cmf: u8, flg: u8) -> u8 { + let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR); + + // Clear existing FCHECK if any + let flg = flg & 0b11100000; + + // Casting is safe as rem can't overflow since it is a value mod 31 + // We can simply add the value to flg as (31 - rem) will never be above 2^5 + flg + (FCHECK_DIVISOR - rem as u8) +} + +#[inline] +const fn zlib_level_from_flags(flags: u32) -> u8 { + use crate::deflate::core::NUM_PROBES; + + let num_probes = flags & super::MAX_PROBES_MASK; + if (flags & TDEFL_GREEDY_PARSING_FLAG != 0) || (flags & TDEFL_RLE_MATCHES != 0) { + if num_probes <= 1 { + 0 + } else { + 1 + } + } else if num_probes >= NUM_PROBES[9] as u32 { + 3 + } else { + 2 + } +} + +#[inline] +const fn cmf_from_flags(flags: u32) -> u8 { + if (flags & TDEFL_RLE_MATCHES == 0) && (flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0) { + DEFAULT_CMF + // If we are using RLE encoding or no compression the window bits can be set as the + // minimum. + } else { + MIN_CMF + } +} + +/// Get the zlib header for the level using the default window size and no +/// dictionary. +#[inline] +fn header_from_level(level: u8, flags: u32) -> [u8; 2] { + let cmf = cmf_from_flags(flags); + [cmf, add_fcheck(cmf, level << 6)] +} + +/// Create a zlib header from the given compression flags. +/// Only level is considered. +#[inline] +pub fn header_from_flags(flags: u32) -> [u8; 2] { + let level = zlib_level_from_flags(flags); + header_from_level(level, flags) +} + +#[cfg(test)] +mod test { + use crate::shared::MZ_DEFAULT_WINDOW_BITS; + #[test] + fn zlib() { + use super::super::*; + use super::*; + + let test_level = |level, expected| { + let flags = create_comp_flags_from_zip_params( + level, + MZ_DEFAULT_WINDOW_BITS, + CompressionStrategy::Default as i32, + ); + assert_eq!(zlib_level_from_flags(flags), expected); + }; + + assert_eq!(zlib_level_from_flags(DEFAULT_FLAGS), 2); + test_level(0, 0); + test_level(1, 0); + test_level(2, 1); + test_level(3, 1); + for i in 4..=8 { + test_level(i, 2) + } + test_level(9, 3); + test_level(10, 3); + } + + #[test] + fn test_header() { + let header = super::header_from_level(3, 0); + assert_eq!( + ((usize::from(header[0]) * 256) + usize::from(header[1])) % 31, + 0 + ); + } +} diff --git a/miniz_oxide/src/inflate/core.rs b/miniz_oxide/src/inflate/core.rs index 738de236..1b6149f6 100644 --- a/miniz_oxide/src/inflate/core.rs +++ b/miniz_oxide/src/inflate/core.rs @@ -4,17 +4,16 @@ use super::*; use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER}; use ::core::cell::Cell; +use ::core::cmp; use ::core::convert::TryInto; -use ::core::{cmp, slice}; -use self::output_buffer::OutputBuffer; +use self::output_buffer::{InputWrapper, OutputBuffer}; pub const TINFL_LZ_DICT_SIZE: usize = 32_768; /// A struct containing huffman code lengths and the huffman code tree used by the decompressor. +#[derive(Clone)] struct HuffmanTable { - /// Length of the code at each index. - pub code_size: [u8; MAX_HUFF_SYMBOLS_0], /// Fast lookup table for shorter huffman codes. /// /// See `HuffmanTable::fast_lookup`. @@ -29,7 +28,6 @@ struct HuffmanTable { impl HuffmanTable { const fn new() -> HuffmanTable { HuffmanTable { - code_size: [0; MAX_HUFF_SYMBOLS_0], look_up: [0; FAST_LOOKUP_SIZE as usize], tree: [0; MAX_HUFF_TREE_SIZE], } @@ -46,7 +44,7 @@ impl HuffmanTable { /// Get the symbol and the code length from the huffman tree. #[inline] - fn tree_lookup(&self, fast_symbol: i32, bit_buf: BitBuffer, mut code_len: u32) -> (i32, u32) { + fn tree_lookup(&self, fast_symbol: i32, bit_buf: BitBuffer, mut code_len: u8) -> (i32, u32) { let mut symbol = fast_symbol; // We step through the tree until we encounter a positive value, which indicates a // symbol. @@ -54,17 +52,21 @@ impl HuffmanTable { // symbol here indicates the position of the left (0) node, if the next bit is 1 // we add 1 to the lookup position to get the right node. let tree_index = (!symbol + ((bit_buf >> code_len) & 1) as i32) as usize; + + // Use get here to avoid generatic panic code. + // The init_tree code should prevent this from actually going out of bounds + // but if there were somehow a bug with that + // we would at worst end up with corrupted output in release mode. debug_assert!(tree_index < self.tree.len()); - if tree_index >= self.tree.len() { - break; - } - symbol = i32::from(self.tree[tree_index]); + symbol = i32::from(self.tree.get(tree_index).copied().unwrap_or(i16::MAX)); code_len += 1; if symbol >= 0 { break; } } - (symbol, code_len) + // Note: Using a u8 for code_len inside this function seems to improve performance, but changing it + // in localvars seems to worsen things so we convert it to a u32 here. + (symbol, u32::from(code_len)) } #[inline] @@ -75,18 +77,14 @@ impl HuffmanTable { /// /// It's possible we could avoid checking for 0 if we can guarantee a sane table. /// TODO: Check if a smaller type for code_len helps performance. - fn lookup(&self, bit_buf: BitBuffer) -> Option<(i32, u32)> { + fn lookup(&self, bit_buf: BitBuffer) -> (i32, u32) { let symbol = self.fast_lookup(bit_buf).into(); if symbol >= 0 { - if (symbol >> 9) as u32 != 0 { - Some((symbol, (symbol >> 9) as u32)) - } else { - // Zero-length code. - None - } + let length = (symbol >> 9) as u32; + (symbol, length) } else { // We didn't get a symbol from the fast lookup table, so check the tree instead. - Some(self.tree_lookup(symbol, bit_buf, FAST_LOOKUP_BITS.into())) + self.tree_lookup(symbol, bit_buf, FAST_LOOKUP_BITS) } } } @@ -98,7 +96,7 @@ const MAX_HUFF_SYMBOLS_0: usize = 288; /// The length of the second (distance) huffman table. const MAX_HUFF_SYMBOLS_1: usize = 32; /// The length of the last (huffman code length) huffman table. -const _MAX_HUFF_SYMBOLS_2: usize = 19; +const MAX_HUFF_SYMBOLS_2: usize = 19; /// The maximum length of a code that can be looked up in the fast lookup table. const FAST_LOOKUP_BITS: u8 = 10; /// The size of the fast lookup table. @@ -164,8 +162,16 @@ type BitBuffer = u64; #[cfg(not(target_pointer_width = "64"))] type BitBuffer = u32; +/* +enum HuffmanTableType { + LiteralLength = 0, + Dist = 1, + Huffman = 2, +}*/ + /// Main decompression struct. /// +#[derive(Clone)] pub struct DecompressorOxide { /// Current state of the decompressor. state: core::State, @@ -178,9 +184,11 @@ pub struct DecompressorOxide { /// Adler32 checksum from the zlib header. z_adler32: u32, /// 1 if the current block is the last block, 0 otherwise. - finish: u32, + finish: u8, /// The type of the current block. - block_type: u32, + /// or if in a dynamic block, which huffman table we are currently + // initializing. + block_type: u8, /// 1 if the adler32 value should be checked. check_adler32: u32, /// Last match distance. @@ -188,13 +196,16 @@ pub struct DecompressorOxide { /// Variable used for match length, symbols, and a number of other things. counter: u32, /// Number of extra bits for the last length or distance code. - num_extra: u32, + num_extra: u8, /// Number of entries in each huffman table. - table_sizes: [u32; MAX_HUFF_TABLES], + table_sizes: [u16; MAX_HUFF_TABLES], /// Buffer of input data. bit_buf: BitBuffer, /// Huffman tables. tables: [HuffmanTable; MAX_HUFF_TABLES], + code_size_literal: [u8; MAX_HUFF_SYMBOLS_0], + code_size_dist: [u8; MAX_HUFF_SYMBOLS_1], + code_size_huffman: [u8; MAX_HUFF_SYMBOLS_2], /// Raw block header. raw_header: [u8; 4], /// Huffman length codes. @@ -234,6 +245,21 @@ impl DecompressorOxide { None } } + + // Get zlib header for tests + // Only for tests for now, may provide a proper function for this for later. + #[cfg(all(test, feature = "with-alloc"))] + pub(crate) const fn zlib_header(&self) -> (u32, u32) { + (self.z_header0, self.z_header1) + } + + /*fn code_size_table(&mut self, table_num: u8) -> &mut [u8] { + match table_num { + 0 => &mut self.code_size_literal, + 1 => &mut self.code_size_dist, + _ => &mut self.code_size_huffman, + } + }*/ } impl Default for DecompressorOxide { @@ -260,6 +286,9 @@ impl Default for DecompressorOxide { HuffmanTable::new(), HuffmanTable::new(), ], + code_size_literal: [0; MAX_HUFF_SYMBOLS_0], + code_size_dist: [0; MAX_HUFF_SYMBOLS_1], + code_size_huffman: [0; MAX_HUFF_SYMBOLS_2], raw_header: [0; 4], len_codes: [0; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1 + 137], } @@ -307,7 +336,6 @@ enum State { BadCodeSizeDistPrevLookup, InvalidLitlen, InvalidDist, - InvalidCodeLen, } impl State { @@ -335,7 +363,6 @@ impl State { use self::State::*; -// Not sure why miniz uses 32-bit values for these, maybe alignment/cache again? // # Optimization // We add a extra value at the end and make the tables 32 elements long // so we can use a mask to avoid bounds checks. @@ -360,56 +387,37 @@ const LENGTH_EXTRA: [u8; 32] = [ /// Base length for each distance code. #[rustfmt::skip] -const DIST_BASE: [u16; 32] = [ +const DIST_BASE: [u16; 30] = [ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, - 2049, 3073, 4097, 6145, 8193, 12_289, 16_385, 24_577, 32_768, 32_768 + 2049, 3073, 4097, 6145, 8193, 12_289, 16_385, 24_577 ]; -/// Number of extra bits for each distance code. -#[rustfmt::skip] -const DIST_EXTRA: [u8; 32] = [ - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, - 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 13, 13 -]; +/// Get the number of extra bits used for a distance code. +/// (Code numbers above `NUM_DISTANCE_CODES` will give some garbage +/// value.) +#[inline(always)] +const fn num_extra_bits_for_distance_code(code: u8) -> u8 { + // TODO: Need to verify that this is faster on all platforms. + // This can be easily calculated without a lookup. + let c = code >> 1; + c.saturating_sub(1) +} /// The mask used when indexing the base/extra arrays. const BASE_EXTRA_MASK: usize = 32 - 1; -/// Sets the value of all the elements of the slice to `val`. -#[inline] -fn memset(slice: &mut [T], val: T) { - for x in slice { - *x = val - } -} - /// Read an le u16 value from the slice iterator. /// /// # Panics /// Panics if there are less than two bytes left. #[inline] -fn read_u16_le(iter: &mut slice::Iter) -> u16 { +fn read_u16_le(iter: &mut InputWrapper) -> u16 { let ret = { - let two_bytes = iter.as_ref()[..2].try_into().unwrap(); + let two_bytes = iter.as_slice()[..2].try_into().unwrap_or_default(); u16::from_le_bytes(two_bytes) }; - iter.nth(1); - ret -} - -/// Read an le u32 value from the slice iterator. -/// -/// # Panics -/// Panics if there are less than four bytes left. -#[inline(always)] -#[cfg(target_pointer_width = "64")] -fn read_u32_le(iter: &mut slice::Iter) -> u32 { - let ret = { - let four_bytes: [u8; 4] = iter.as_ref()[..4].try_into().unwrap(); - u32::from_le_bytes(four_bytes) - }; - iter.nth(3); + iter.advance(2); ret } @@ -420,10 +428,10 @@ fn read_u32_le(iter: &mut slice::Iter) -> u32 { /// This function assumes that there is at least 4 bytes left in the input buffer. #[inline(always)] #[cfg(target_pointer_width = "64")] -fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut slice::Iter) { +fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut InputWrapper) { // Read four bytes into the buffer at once. if l.num_bits < 30 { - l.bit_buf |= BitBuffer::from(read_u32_le(in_iter)) << l.num_bits; + l.bit_buf |= BitBuffer::from(in_iter.read_u32_le()) << l.num_bits; l.num_bits += 32; } } @@ -432,7 +440,7 @@ fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut slice::Iter) { /// Ensures at least 16 bits are present, requires at least 2 bytes in the in buffer. #[inline(always)] #[cfg(not(target_pointer_width = "64"))] -fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut slice::Iter) { +fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut InputWrapper) { // If the buffer is 32-bit wide, read 2 bytes instead. if l.num_bits < 15 { l.bit_buf |= BitBuffer::from(read_u16_le(in_iter)) << l.num_bits; @@ -488,7 +496,7 @@ fn decode_huffman_code( l: &mut LocalVars, table: usize, flags: u32, - in_iter: &mut slice::Iter, + in_iter: &mut InputWrapper, f: F, ) -> Action where @@ -498,7 +506,7 @@ where // ready in the bit buffer to start decoding the next huffman code. if l.num_bits < 15 { // First, make sure there is enough data in the bit buffer to decode a huffman code. - if in_iter.len() < 2 { + if in_iter.bytes_left() < 2 { // If there is less than 2 bytes left in the input buffer, we try to look up // the huffman code with what's available, and return if that doesn't succeed. // Original explanation in miniz: @@ -513,9 +521,9 @@ where // /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */ loop { let mut temp = i32::from(r.tables[table].fast_lookup(l.bit_buf)); - if temp >= 0 { let code_len = (temp >> 9) as u32; + // TODO: Is there any point to check for code_len != 0 here still? if (code_len != 0) && (l.num_bits >= code_len) { break; } @@ -578,15 +586,11 @@ where // Mask out the length value. symbol &= 511; } else { - let res = r.tables[table].tree_lookup(symbol, l.bit_buf, u32::from(FAST_LOOKUP_BITS)); + let res = r.tables[table].tree_lookup(symbol, l.bit_buf, FAST_LOOKUP_BITS); symbol = res.0; code_len = res.1; }; - if code_len == 0 { - return Action::Jump(InvalidCodeLen); - } - l.bit_buf >>= code_len; l.num_bits -= code_len; f(r, l, symbol) @@ -596,13 +600,13 @@ where /// returning the result. /// If reading fails, `Action::End is returned` #[inline] -fn read_byte(in_iter: &mut slice::Iter, flags: u32, f: F) -> Action +fn read_byte(in_iter: &mut InputWrapper, flags: u32, f: F) -> Action where F: FnOnce(u8) -> Action, { - match in_iter.next() { + match in_iter.read_byte() { None => end_of_input(flags), - Some(&byte) => f(byte), + Some(byte) => f(byte), } } @@ -615,7 +619,7 @@ where fn read_bits( l: &mut LocalVars, amount: u32, - in_iter: &mut slice::Iter, + in_iter: &mut InputWrapper, flags: u32, f: F, ) -> Action @@ -644,7 +648,7 @@ where } #[inline] -fn pad_to_bytes(l: &mut LocalVars, in_iter: &mut slice::Iter, flags: u32, f: F) -> Action +fn pad_to_bytes(l: &mut LocalVars, in_iter: &mut InputWrapper, flags: u32, f: F) -> Action where F: FnOnce(&mut LocalVars) -> Action, { @@ -671,23 +675,36 @@ fn undo_bytes(l: &mut LocalVars, max: u32) -> u32 { fn start_static_table(r: &mut DecompressorOxide) { r.table_sizes[LITLEN_TABLE] = 288; r.table_sizes[DIST_TABLE] = 32; - memset(&mut r.tables[LITLEN_TABLE].code_size[0..144], 8); - memset(&mut r.tables[LITLEN_TABLE].code_size[144..256], 9); - memset(&mut r.tables[LITLEN_TABLE].code_size[256..280], 7); - memset(&mut r.tables[LITLEN_TABLE].code_size[280..288], 8); - memset(&mut r.tables[DIST_TABLE].code_size[0..32], 5); + r.code_size_literal[0..144].fill(8); + r.code_size_literal[144..256].fill(9); + r.code_size_literal[256..280].fill(7); + r.code_size_literal[280..288].fill(8); + r.code_size_dist[0..32].fill(5); } -#[cfg(feature = "rustc-dep-of-std")] +#[cfg(any( + feature = "rustc-dep-of-std", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "loongarch64" +))] fn reverse_bits(n: u32) -> u32 { // Lookup is not used when building as part of std to avoid wasting space // for lookup table in every rust binary // as it's only used for backtraces in the cold path // - see #152 + + // armv7 and newer, and loongarch have a cpu instruction for bit reversal so + // it's preferable to just use that on those architectures. n.reverse_bits() } -#[cfg(not(feature = "rustc-dep-of-std"))] +#[cfg(not(any( + feature = "rustc-dep-of-std", + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "loongarch64" +)))] fn reverse_bits(n: u32) -> u32 { static REVERSED_BITS_LOOKUP: [u32; 512] = { let mut table = [0; 512]; @@ -700,27 +717,45 @@ fn reverse_bits(n: u32) -> u32 { table }; - REVERSED_BITS_LOOKUP[n as usize] } fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option { loop { let bt = r.block_type as usize; - if bt >= r.tables.len() { - return None; - } + + let code_sizes = match bt { + LITLEN_TABLE => &mut r.code_size_literal[..], + DIST_TABLE => &mut r.code_size_dist, + HUFFLEN_TABLE => &mut r.code_size_huffman, + _ => return None, + }; let table = &mut r.tables[bt]; + + let mut total_symbols = [0u16; 16]; + let mut next_code = [0u32; 17]; + const INVALID_CODE: i16 = 1 << 9 | 286; + // Set the values in the fast table to return a + // non-zero length and an invalid symbol instead of zero + // so that we do not have to have a check for a zero + // code length in the hot code path later + // and can instead error out on the invalid symbol check + // on bogus input. + table.look_up.fill(INVALID_CODE); + // If we are initializing the huffman code length we can skip + // this since these codes can't be longer than 3 bits + // and thus only use the fast table and this table won't be accessed so + // there is no point clearing it. + // TODO: Avoid creating this table at all. + if bt != HUFFLEN_TABLE { + table.tree.fill(0); + } + let table_size = r.table_sizes[bt] as usize; - if table_size > table.code_size.len() { + if table_size > code_sizes.len() { return None; } - let mut total_symbols = [0u32; 16]; - let mut next_code = [0u32; 17]; - memset(&mut table.look_up[..], 0); - memset(&mut table.tree[..], 0); - - for &code_size in &table.code_size[..table_size] { + for &code_size in &code_sizes[..table_size] { let cs = code_size as usize; if cs >= total_symbols.len() { return None; @@ -729,26 +764,35 @@ fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option { } let mut used_symbols = 0; - let mut total = 0; - for (ts, next) in total_symbols - .iter() - .copied() - .zip(next_code.iter_mut().skip(1)) - .skip(1) - { + let mut total = 0u32; + // Count up the total number of used lengths and check that the table is not under or over-subscribed. + for (&ts, next) in total_symbols.iter().zip(next_code[1..].iter_mut()).skip(1) { used_symbols += ts; - total += ts; + total += u32::from(ts); total <<= 1; *next = total; } - if total != 65_536 && used_symbols > 1 { + // + // While it's not explicitly stated in the spec, a hufflen table + // with a single length (or none) would be invalid as there needs to be + // at minimum a length for both a non-zero length huffman code for the end of block symbol + // and one of the codes to represent 0 to make sense - so just reject that here as well. + // + // The distance table is allowed to have a single distance code though according to the spect it is + // supposed to be accompanied by a second dummy code. It can also be empty indicating no used codes. + // + // The literal/length table can not be empty as there has to be an end of block symbol, + // The standard doesn't specify that there should be a dummy code in case of a single + // symbol (i.e an empty block). Normally that's not an issue though the code will have + // to take that into account later on in case of malformed input. + if total != 65_536 && (used_symbols > 1 || bt == HUFFLEN_TABLE) { return Some(Action::Jump(BadTotalSymbols)); } let mut tree_next = -1; for symbol_index in 0..table_size { - let code_size = table.code_size[symbol_index]; + let code_size = code_sizes[symbol_index]; if code_size == 0 || usize::from(code_size) >= next_code.len() { continue; } @@ -779,7 +823,7 @@ fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option { } let mut tree_cur = table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize]; - if tree_cur == 0 { + if tree_cur == INVALID_CODE { table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize] = tree_next; tree_cur = tree_next; tree_next -= 2; @@ -811,18 +855,19 @@ fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option { table.tree[tree_index] = symbol_index as i16; } - if r.block_type == 2 { + if r.block_type == HUFFLEN_TABLE as u8 { l.counter = 0; return Some(Action::Jump(ReadLitlenDistTablesCodeSize)); } - if r.block_type == 0 { + if r.block_type == LITLEN_TABLE as u8 { break; } r.block_type -= 1; } l.counter = 0; + Some(Action::Jump(DecodeLitlen)) } @@ -851,7 +896,7 @@ struct LocalVars { pub num_bits: u32, pub dist: u32, pub counter: u32, - pub num_extra: u32, + pub num_extra: u8, } #[inline] @@ -955,6 +1000,9 @@ fn apply_match( transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask); } else if match_len <= dist && source_pos + match_len < out_slice.len() { // Destination and source segments does not intersect and source does not wrap. + // TODO: An invalid before start of data wrapping match reached here before + // it was fixed (it wrapped around and ended overlapping again)- need + // to check that we are not wrapping here. if source_pos < out_pos { let (from_slice, to_slice) = out_slice.split_at_mut(out_pos); to_slice[..match_len].copy_from_slice(&from_slice[source_pos..source_pos + match_len]); @@ -978,7 +1026,7 @@ fn apply_match( /// and already improves decompression speed a fair bit. fn decompress_fast( r: &mut DecompressorOxide, - in_iter: &mut slice::Iter, + in_iter: &mut InputWrapper, out_buf: &mut OutputBuffer, flags: u32, local_vars: &mut LocalVars, @@ -998,50 +1046,42 @@ fn decompress_fast( // + 29 + 32 (left in bit buf, including last 13 dist extra) = 111 bits < 14 bytes // We need the one extra byte as we may write one length and one full match // before checking again. - if out_buf.bytes_left() < 259 || in_iter.len() < 14 { + if out_buf.bytes_left() < 259 || in_iter.bytes_left() < 14 { state = State::DecodeLitlen; break 'o TINFLStatus::Done; } fill_bit_buffer(&mut l, in_iter); - if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { - l.counter = symbol as u32; + let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf); + l.counter = symbol as u32; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + + if (l.counter & 256) != 0 { + // The symbol is not a literal. + break; + } else { + // If we have a 32-bit buffer we need to read another two bytes now + // to have enough bits to keep going. + if cfg!(not(target_pointer_width = "64")) { + fill_bit_buffer(&mut l, in_iter); + } + + let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf); l.bit_buf >>= code_len; l.num_bits -= code_len; - - if (l.counter & 256) != 0 { - // The symbol is not a literal. + // The previous symbol was a literal, so write it directly and check + // the next one. + out_buf.write_byte(l.counter as u8); + if (symbol & 256) != 0 { + l.counter = symbol as u32; + // The symbol is a length value. break; } else { - // If we have a 32-bit buffer we need to read another two bytes now - // to have enough bits to keep going. - if cfg!(not(target_pointer_width = "64")) { - fill_bit_buffer(&mut l, in_iter); - } - - if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { - l.bit_buf >>= code_len; - l.num_bits -= code_len; - // The previous symbol was a literal, so write it directly and check - // the next one. - out_buf.write_byte(l.counter as u8); - if (symbol & 256) != 0 { - l.counter = symbol as u32; - // The symbol is a length value. - break; - } else { - // The symbol is a literal, so write it directly and continue. - out_buf.write_byte(symbol as u8); - } - } else { - state.begin(InvalidCodeLen); - break 'o TINFLStatus::Failed; - } + // The symbol is a literal, so write it directly and continue. + out_buf.write_byte(symbol as u8); } - } else { - state.begin(InvalidCodeLen); - break 'o TINFLStatus::Failed; } } @@ -1060,18 +1100,19 @@ fn decompress_fast( // The symbol was a length code. // # Optimization // Mask the value to avoid bounds checks - // We could use get_unchecked later if can statically verify that - // this will never go out of bounds. - l.num_extra = u32::from(LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + // While the maximum is checked, the compiler isn't able to know that the + // value won't wrap around here. + l.num_extra = LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]; l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]); // Length and distance codes have a number of extra bits depending on // the base, which together with the base gives us the exact value. + // We need to make sure we have at least 33 (so min 5 bytes) bits in the buffer at this spot. fill_bit_buffer(&mut l, in_iter); if l.num_extra != 0 { let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1); l.bit_buf >>= l.num_extra; - l.num_bits -= l.num_extra; + l.num_bits -= u32::from(l.num_extra); l.counter += extra_bits as u32; } @@ -1081,33 +1122,30 @@ fn decompress_fast( fill_bit_buffer(&mut l, in_iter); } - if let Some((mut symbol, code_len)) = r.tables[DIST_TABLE].lookup(l.bit_buf) { - symbol &= 511; - l.bit_buf >>= code_len; - l.num_bits -= code_len; - if symbol > 29 { - state.begin(InvalidDist); - break 'o TINFLStatus::Failed; - } - - l.num_extra = u32::from(DIST_EXTRA[symbol as usize]); - l.dist = u32::from(DIST_BASE[symbol as usize]); - } else { - state.begin(InvalidCodeLen); + let (mut symbol, code_len) = r.tables[DIST_TABLE].lookup(l.bit_buf); + symbol &= 511; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + if symbol > 29 { + state.begin(InvalidDist); break 'o TINFLStatus::Failed; } + l.num_extra = num_extra_bits_for_distance_code(symbol as u8); + l.dist = u32::from(DIST_BASE[symbol as usize]); + if l.num_extra != 0 { fill_bit_buffer(&mut l, in_iter); let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1); l.bit_buf >>= l.num_extra; - l.num_bits -= l.num_extra; + l.num_bits -= u32::from(l.num_extra); l.dist += extra_bits as u32; } let position = out_buf.position(); - if l.dist as usize > out_buf.position() - && (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0) + if (l.dist as usize > out_buf.position() + && (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0)) + || (l.dist as usize > out_buf.get_ref().len()) { // We encountered a distance that refers a position before // the start of the decoded data, so we can't continue. @@ -1147,18 +1185,18 @@ fn decompress_fast( /// /// * The offset given by `out_pos` indicates where in the output buffer slice writing should start. /// * If [`TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF`] is not set, the output buffer is used in a -/// wrapping manner, and it's size is required to be a power of 2. +/// wrapping manner, and it's size is required to be a power of 2. /// * The decompression function normally needs access to 32KiB of the previously decompressed data -///(or to the beginning of the decompressed data if less than 32KiB has been decompressed.) +/// (or to the beginning of the decompressed data if less than 32KiB has been decompressed.) /// - If this data is not available, decompression may fail. /// - Some deflate compressors allow specifying a window size which limits match distances to -/// less than this, or alternatively an RLE mode where matches will only refer to the previous byte -/// and thus allows a smaller output buffer. The window size can be specified in the zlib -/// header structure, however, the header data should not be relied on to be correct. +/// less than this, or alternatively an RLE mode where matches will only refer to the previous byte +/// and thus allows a smaller output buffer. The window size can be specified in the zlib +/// header structure, however, the header data should not be relied on to be correct. /// /// `flags` indicates settings and status to the decompression function. /// * The [`TINFL_FLAG_HAS_MORE_INPUT`] has to be specified if more compressed data is to be provided -/// in a subsequent call to this function. +/// in a subsequent call to this function. /// * See the the [`inflate_flags`] module for details on other flags. /// /// # Returns @@ -1175,7 +1213,7 @@ pub fn decompress( flags: u32, ) -> (TINFLStatus, usize, usize) { let out_buf_size_mask = if flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0 { - usize::max_value() + usize::MAX } else { // In the case of zero len, any attempt to write would produce HasMoreOutput, // so to gracefully process the case of there really being no output, @@ -1191,7 +1229,7 @@ pub fn decompress( return (TINFLStatus::BadParam, 0, 0); } - let mut in_iter = in_buf.iter(); + let mut in_iter = InputWrapper::from_slice(in_buf); let mut state = r.state; @@ -1242,8 +1280,8 @@ pub fn decompress( // Read the block header and jump to the relevant section depending on the block type. ReadBlockHeader => generate_state!(state, 'state_machine, { read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| { - r.finish = (bits & 1) as u32; - r.block_type = (bits >> 1) as u32 & 3; + r.finish = (bits & 1) as u8; + r.block_type = ((bits >> 1) & 3) as u8; match r.block_type { 0 => Action::Jump(BlockTypeNoCompression), 1 => { @@ -1348,20 +1386,20 @@ pub fn decompress( }), RawMemcpy2 => generate_state!(state, 'state_machine, { - if in_iter.len() > 0 { + if in_iter.bytes_left() > 0 { // Copy as many raw bytes as possible from the input to the output using memcpy. // Raw block lengths are limited to 64 * 1024, so casting through usize and u32 // is not an issue. let space_left = out_buf.bytes_left(); let bytes_to_copy = cmp::min(cmp::min( space_left, - in_iter.len()), + in_iter.bytes_left()), l.counter as usize ); out_buf.write_slice(&in_iter.as_slice()[..bytes_to_copy]); - in_iter.nth(bytes_to_copy - 1); + in_iter.advance(bytes_to_copy); l.counter -= bytes_to_copy as u32; Action::Jump(RawMemcpy1) } else { @@ -1375,12 +1413,12 @@ pub fn decompress( let num_bits = [5, 5, 4][l.counter as usize]; read_bits(&mut l, num_bits, &mut in_iter, flags, |l, bits| { r.table_sizes[l.counter as usize] = - bits as u32 + u32::from(MIN_TABLE_SIZES[l.counter as usize]); + bits as u16 + MIN_TABLE_SIZES[l.counter as usize]; l.counter += 1; Action::None }) } else { - memset(&mut r.tables[HUFFLEN_TABLE].code_size[..], 0); + r.code_size_huffman.fill(0); l.counter = 0; // Check that the litlen and distance are within spec. // litlen table should be <=286 acc to the RFC and @@ -1400,25 +1438,24 @@ pub fn decompress( // Read the 3-bit lengths of the huffman codes describing the huffman code lengths used // to decode the lengths of the main tables. ReadHufflenTableCodeSize => generate_state!(state, 'state_machine, { - if l.counter < r.table_sizes[HUFFLEN_TABLE] { + if l.counter < r.table_sizes[HUFFLEN_TABLE].into() { read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| { // These lengths are not stored in a normal ascending order, but rather one // specified by the deflate specification intended to put the most used // values at the front as trailing zero lengths do not have to be stored. - r.tables[HUFFLEN_TABLE] - .code_size[HUFFMAN_LENGTH_ORDER[l.counter as usize] as usize] = + r.code_size_huffman[HUFFMAN_LENGTH_ORDER[l.counter as usize] as usize] = bits as u8; l.counter += 1; Action::None }) } else { - r.table_sizes[HUFFLEN_TABLE] = 19; + r.table_sizes[HUFFLEN_TABLE] = MAX_HUFF_SYMBOLS_2 as u16; init_tree(r, &mut l).unwrap_or(Action::End(TINFLStatus::Failed)) } }), ReadLitlenDistTablesCodeSize => generate_state!(state, 'state_machine, { - if l.counter < r.table_sizes[LITLEN_TABLE] + r.table_sizes[DIST_TABLE] { + if l.counter < u32::from(r.table_sizes[LITLEN_TABLE]) + u32::from(r.table_sizes[DIST_TABLE]) { decode_huffman_code( r, &mut l, HUFFLEN_TABLE, flags, &mut in_iter, |r, l, symbol| { @@ -1435,16 +1472,16 @@ pub fn decompress( } } ) - } else if l.counter != r.table_sizes[LITLEN_TABLE] + r.table_sizes[DIST_TABLE] { + } else if l.counter != u32::from(r.table_sizes[LITLEN_TABLE]) + u32::from(r.table_sizes[DIST_TABLE]) { Action::Jump(BadCodeSizeSum) } else { - r.tables[LITLEN_TABLE].code_size[..r.table_sizes[LITLEN_TABLE] as usize] + r.code_size_literal[..r.table_sizes[LITLEN_TABLE] as usize] .copy_from_slice(&r.len_codes[..r.table_sizes[LITLEN_TABLE] as usize]); let dist_table_start = r.table_sizes[LITLEN_TABLE] as usize; let dist_table_end = (r.table_sizes[LITLEN_TABLE] + r.table_sizes[DIST_TABLE]) as usize; - r.tables[DIST_TABLE].code_size[..r.table_sizes[DIST_TABLE] as usize] + r.code_size_dist[..r.table_sizes[DIST_TABLE] as usize] .copy_from_slice(&r.len_codes[dist_table_start..dist_table_end]); r.block_type -= 1; @@ -1453,7 +1490,7 @@ pub fn decompress( }), ReadExtraBitsCodeSize => generate_state!(state, 'state_machine, { - let num_extra = l.num_extra; + let num_extra = l.num_extra.into(); read_bits(&mut l, num_extra, &mut in_iter, flags, |l, mut extra_bits| { // Mask to avoid a bounds check. extra_bits += [3, 3, 11][(l.dist as usize - 16) & 3]; @@ -1463,19 +1500,16 @@ pub fn decompress( 0 }; - memset( - &mut r.len_codes[ + r.len_codes[ l.counter as usize..l.counter as usize + extra_bits as usize - ], - val, - ); + ].fill(val); l.counter += extra_bits as u32; Action::Jump(ReadLitlenDistTablesCodeSize) }) }), DecodeLitlen => generate_state!(state, 'state_machine, { - if in_iter.len() < 4 || out_buf.bytes_left() < 2 { + if in_iter.bytes_left() < 4 || out_buf.bytes_left() < 2 { // See if we can decode a literal with the data we have left. // Jumps to next state (WriteSymbol) if successful. decode_huffman_code( @@ -1493,7 +1527,7 @@ pub fn decompress( // If there is enough space, use the fast inner decompression // function. out_buf.bytes_left() >= 259 && - in_iter.len() >= 14 + in_iter.bytes_left() >= 14 { let (status, new_state) = decompress_fast( r, @@ -1513,7 +1547,7 @@ pub fn decompress( } else { fill_bit_buffer(&mut l, &mut in_iter); - if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf); l.counter = symbol as u32; l.bit_buf >>= code_len; @@ -1529,7 +1563,7 @@ pub fn decompress( fill_bit_buffer(&mut l, &mut in_iter); } - if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf); l.bit_buf >>= code_len; l.num_bits -= code_len; @@ -1545,13 +1579,9 @@ pub fn decompress( out_buf.write_byte(symbol as u8); Action::None } - } else { - Action::Jump(InvalidCodeLen) - } - } - } else { - Action::Jump(InvalidCodeLen) + } + } }), @@ -1584,7 +1614,7 @@ pub fn decompress( // We could use get_unchecked later if can statically verify that // this will never go out of bounds. l.num_extra = - u32::from(LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]; l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]); // Length and distance codes have a number of extra bits depending on // the base, which together with the base gives us the exact value. @@ -1597,7 +1627,7 @@ pub fn decompress( }), ReadExtraBitsLitlen => generate_state!(state, 'state_machine, { - let num_extra = l.num_extra; + let num_extra = l.num_extra.into(); read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| { l.counter += extra_bits as u32; Action::Jump(DecodeDistance) @@ -1608,16 +1638,19 @@ pub fn decompress( // Try to read a huffman code from the input buffer and look up what // length code the decoded symbol refers to. decode_huffman_code(r, &mut l, DIST_TABLE, flags, &mut in_iter, |_r, l, symbol| { + // # Optimizaton - transform the value into usize here before the check so + // the compiler can optimize the bounds check later - ideally it should + // know that the value can't be negative from earlier in the + // decode_huffman_code function but it seems it may not be able + // to make the assumption that it can't be negative and thus + // overflow if it's converted after the check. + let symbol = symbol as usize; if symbol > 29 { // Invalid distance code. return Action::Jump(InvalidDist) } - // # Optimization - // Mask the value to avoid bounds checks - // We could use get_unchecked later if can statically verify that - // this will never go out of bounds. - l.num_extra = u32::from(DIST_EXTRA[symbol as usize & BASE_EXTRA_MASK]); - l.dist = u32::from(DIST_BASE[symbol as usize & BASE_EXTRA_MASK]); + l.num_extra = num_extra_bits_for_distance_code(symbol as u8); + l.dist = u32::from(DIST_BASE[symbol]); if l.num_extra != 0 { // ReadEXTRA_BITS_DISTACNE Action::Jump(ReadExtraBitsDistance) @@ -1628,7 +1661,7 @@ pub fn decompress( }), ReadExtraBitsDistance => generate_state!(state, 'state_machine, { - let num_extra = l.num_extra; + let num_extra = l.num_extra.into(); read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| { l.dist += extra_bits as u32; Action::Jump(HuffDecodeOuterLoop2) @@ -1636,8 +1669,8 @@ pub fn decompress( }), HuffDecodeOuterLoop2 => generate_state!(state, 'state_machine, { - if l.dist as usize > out_buf.position() && - (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0) + if (l.dist as usize > out_buf.position() && + (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0)) || (l.dist as usize > out_buf.get_ref().len()) { // We encountered a distance that refers a position before // the start of the decoded data, so we can't continue. @@ -1704,9 +1737,9 @@ pub fn decompress( if r.finish != 0 { pad_to_bytes(&mut l, &mut in_iter, flags, |_| Action::None); - let in_consumed = in_buf.len() - in_iter.len(); + let in_consumed = in_buf.len() - in_iter.bytes_left(); let undo = undo_bytes(&mut l, in_consumed as u32) as usize; - in_iter = in_buf[in_consumed - undo..].iter(); + in_iter = InputWrapper::from_slice(in_buf[in_consumed - undo..].iter().as_slice()); l.bit_buf &= ((1 as BitBuffer) << l.num_bits) - 1; debug_assert_eq!(l.num_bits, 0); @@ -1759,7 +1792,7 @@ pub fn decompress( let in_undo = if status != TINFLStatus::NeedsMoreInput && status != TINFLStatus::FailedCannotMakeProgress { - undo_bytes(&mut l, (in_buf.len() - in_iter.len()) as u32) as usize + undo_bytes(&mut l, (in_buf.len() - in_iter.bytes_left()) as u32) as usize } else { 0 }; @@ -1810,7 +1843,7 @@ pub fn decompress( ( status, - in_buf.len() - in_iter.len() - in_undo, + in_buf.len() - in_iter.bytes_left() - in_undo, out_buf.position() - out_pos, ) } @@ -1891,7 +1924,7 @@ mod test { } fn masked_lookup(table: &HuffmanTable, bit_buf: BitBuffer) -> (i32, u32) { - let ret = table.lookup(bit_buf).unwrap(); + let ret = table.lookup(bit_buf); (ret.0 & 511, ret.1) } @@ -2049,4 +2082,49 @@ mod test { let res = decompress(&mut r, &encoded, &mut output_buf, 0, flags); assert_eq!(res, (TINFLStatus::HasMoreOutput, 2, 0)); } + + #[test] + fn dist_extra_bits() { + use self::num_extra_bits_for_distance_code; + // Number of extra bits for each distance code. + const DIST_EXTRA: [u8; 29] = [ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, + 12, 13, + ]; + + for (i, &dist) in DIST_EXTRA.iter().enumerate() { + assert_eq!(dist, num_extra_bits_for_distance_code(i as u8)); + } + } + + #[test] + fn check_tree() { + let mut r = DecompressorOxide::new(); + let mut l = LocalVars { + bit_buf: 0, + num_bits: 0, + dist: 0, + counter: 0, + num_extra: 0, + }; + + r.code_size_huffman[0] = 1; + r.code_size_huffman[1] = 1; + //r.code_size_huffman[2] = 3; + //r.code_size_huffman[3] = 3; + //r.code_size_huffman[1] = 4; + r.block_type = HUFFLEN_TABLE as u8; + r.table_sizes[HUFFLEN_TABLE] = 4; + let res = init_tree(&mut r, &mut l).unwrap(); + + let status = match res { + Action::Jump(s) => s, + _ => { + //println!("issue"); + return; + } + }; + //println!("status {:?}", status); + assert!(status != BadTotalSymbols); + } } diff --git a/miniz_oxide/src/inflate/mod.rs b/miniz_oxide/src/inflate/mod.rs index 3f787e72..cbf41ee7 100644 --- a/miniz_oxide/src/inflate/mod.rs +++ b/miniz_oxide/src/inflate/mod.rs @@ -2,7 +2,6 @@ #[cfg(feature = "with-alloc")] use crate::alloc::{boxed::Box, vec, vec::Vec}; -use ::core::usize; #[cfg(all(feature = "std", feature = "with-alloc"))] use std::error::Error; @@ -123,7 +122,7 @@ fn decompress_error(status: TINFLStatus, output: Vec) -> Result, Dec #[inline] #[cfg(feature = "with-alloc")] pub fn decompress_to_vec(input: &[u8]) -> Result, DecompressError> { - decompress_to_vec_inner(input, 0, usize::max_value()) + decompress_to_vec_inner(input, 0, usize::MAX) } /// Decompress the deflate-encoded data (with a zlib wrapper) in `input` to a vector. @@ -139,7 +138,7 @@ pub fn decompress_to_vec_zlib(input: &[u8]) -> Result, DecompressError> decompress_to_vec_inner( input, inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER, - usize::max_value(), + usize::MAX, ) } diff --git a/miniz_oxide/src/inflate/output_buffer.rs b/miniz_oxide/src/inflate/output_buffer.rs index 5218a807..ce0ccd61 100644 --- a/miniz_oxide/src/inflate/output_buffer.rs +++ b/miniz_oxide/src/inflate/output_buffer.rs @@ -14,12 +14,12 @@ impl<'a> OutputBuffer<'a> { OutputBuffer { slice, position } } - #[inline] + #[inline(always)] pub const fn position(&self) -> usize { self.position } - #[inline] + #[inline(always)] pub fn set_position(&mut self, position: usize) { self.position = position; } @@ -48,13 +48,64 @@ impl<'a> OutputBuffer<'a> { self.slice.len() - self.position } - #[inline] + #[inline(always)] pub const fn get_ref(&self) -> &[u8] { self.slice } - #[inline] + #[inline(always)] pub fn get_mut(&mut self) -> &mut [u8] { self.slice } } + +/// A wrapper for the output slice used when decompressing. +/// +/// Using this rather than `Cursor` lets us implement the writing methods directly on +/// the buffer and lets us use a usize rather than u64 for the position which helps with +/// performance on 32-bit systems. +#[derive(Copy, Clone)] +pub struct InputWrapper<'a> { + slice: &'a [u8], +} + +impl<'a> InputWrapper<'a> { + #[inline(always)] + pub const fn as_slice(&self) -> &[u8] { + self.slice + } + + #[inline(always)] + pub const fn from_slice(slice: &'a [u8]) -> InputWrapper<'a> { + InputWrapper { slice } + } + + #[inline(always)] + pub fn advance(&mut self, steps: usize) { + self.slice = &self.slice[steps..]; + } + + #[inline] + pub fn read_byte(&mut self) -> Option { + self.slice.first().map(|n| { + self.advance(1); + *n + }) + } + + #[inline] + #[cfg(target_pointer_width = "64")] + pub fn read_u32_le(&mut self) -> u32 { + let ret = { + let four_bytes: [u8; 4] = self.slice[..4].try_into().unwrap_or_default(); + u32::from_le_bytes(four_bytes) + }; + self.advance(4); + ret + } + + #[inline(always)] + pub const fn bytes_left(&self) -> usize { + self.slice.len() + } +} diff --git a/miniz_oxide/src/inflate/stream.rs b/miniz_oxide/src/inflate/stream.rs index 5463ab0f..39b41e1c 100644 --- a/miniz_oxide/src/inflate/stream.rs +++ b/miniz_oxide/src/inflate/stream.rs @@ -57,6 +57,7 @@ impl ResetPolicy for FullReset { /// A struct that compbines a decompressor with extra data for streaming decompression. /// +#[derive(Clone)] pub struct InflateState { /// Inner decompressor struct decomp: DecompressorOxide, @@ -226,6 +227,9 @@ pub fn inflate( if (flush == MZFlush::Finish) && first_call { decomp_flags |= inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + // The caller is indicating that they want to finish the compression and this is the first call with the current stream + // so we can simply write directly to the output buffer. + // If there is not enough space for all of the decompressed data we will end up with a failure regardless. let status = decompress(&mut state.decomp, next_in, next_out, 0, decomp_flags); let in_bytes = status.1; let out_bytes = status.2; @@ -420,4 +424,78 @@ mod test { // Should still have the checksum read from the header file. assert_eq!(state.decompressor().adler32_header(), Some(459605011)) } + + #[test] + fn test_partial_continue() { + let encoded = [ + 120u8, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, + 19, + ]; + + // Feed input bytes one at a time to the decompressor + let mut out = vec![0; 50]; + let mut state = InflateState::new_boxed(DataFormat::Zlib); + let mut part_in = 0; + let mut part_out = 0; + for i in 1..=encoded.len() { + let res = inflate( + &mut state, + &encoded[part_in..i], + &mut out[part_out..], + MZFlush::None, + ); + let status = res.status.expect("Failed to decompress!"); + if i == encoded.len() { + assert_eq!(status, MZStatus::StreamEnd); + } else { + assert_eq!(status, MZStatus::Ok); + } + part_out += res.bytes_written as usize; + part_in += res.bytes_consumed; + } + + assert_eq!(out[..part_out as usize], b"Hello, zlib!"[..]); + assert_eq!(part_in, encoded.len()); + assert_eq!(state.decompressor().adler32(), Some(459605011)); + } + + // Inflate part of a stream and clone the inflate state. + // Discard the original state and resume the stream from the clone. + #[test] + fn test_rewind_and_resume() { + let encoded = [ + 120u8, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, + 19, + ]; + let decoded = b"Hello, zlib!"; + + // Feed partial input bytes to the decompressor + let mut out = vec![0; 50]; + let mut state = InflateState::new_boxed(DataFormat::Zlib); + let res1 = inflate(&mut state, &encoded[..10], &mut out, MZFlush::None); + let status = res1.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::Ok); + + // Clone the state and discard the original + let mut resume = state.clone(); + drop(state); + + // Resume the stream using the cloned state + let res2 = inflate( + &mut resume, + &encoded[res1.bytes_consumed..], + &mut out[res1.bytes_written..], + MZFlush::Finish, + ); + let status = res2.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + + assert_eq!(res1.bytes_consumed + res2.bytes_consumed, encoded.len()); + assert_eq!(res1.bytes_written + res2.bytes_written, decoded.len()); + assert_eq!( + &out[..res1.bytes_written + res2.bytes_written as usize], + decoded + ); + assert_eq!(resume.decompressor().adler32(), Some(459605011)); + } } diff --git a/miniz_oxide/tests/test.rs b/miniz_oxide/tests/test.rs index dcb93874..cd30a341 100644 --- a/miniz_oxide/tests/test.rs +++ b/miniz_oxide/tests/test.rs @@ -250,6 +250,88 @@ fn issue_143_return_buf_error_on_finish_without_end_header() { assert_eq!(inflate_result.status.unwrap_err(), MZError::Buf) } +#[test] +fn decompress_empty_dynamic() { + // Empty block with dynamic huffman codes. + let enc = vec![5, 192, 129, 8, 0, 0, 0, 0, 32, 127, 235, 0b011, 0, 0, 0]; + + let res = decompress_to_vec(enc.as_slice()).unwrap(); + assert!(res.is_empty()); + + let enc = vec![5, 192, 129, 8, 0, 0, 0, 0, 32, 127, 235, 0b1111011, 0, 0, 0]; + + let res = decompress_to_vec(enc.as_slice()); + assert!(res.is_err()); +} + +fn decode_hex(s: &str) -> Vec { + (0..s.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&s[i..i + 2], 16).unwrap()) + .collect::>() +} + +#[test] +fn issue_161_index_out_of_range_apply_match() { + // This data contains an match that has a distance before the start of the data. + // and resulted in an edge cause causing a panic instead of returning with an error when using. + // a smaller wrapping buffer. + let content_hex = "fa99fff4f37fef5bbff9bb6ccb9ab4e47f66d9875cebf9ffe6eb6fbdf6e24b773f72ebe5175f62ff26bf78eec57bafdd78ee6b5f7efeee2b2f5b1d2bfe5100"; + let content = decode_hex(&content_hex); + + let mut decompressor = miniz_oxide::inflate::core::DecompressorOxide::new(); + + let mut buf2 = vec![0; 2048]; + let _ = miniz_oxide::inflate::core::decompress(&mut decompressor, &content, &mut buf2, 0, 0); +} + +#[test] +fn empty_stored() { + // Compress empty input using stored compression level + // There was a logic error casuing this to output zeroes + // from the empty data buffer instead of outputting an empty stored block. + let data = vec![]; + let enc = compress_to_vec_zlib(&data, 0); + let _ = decompress_to_vec_zlib(&enc).unwrap(); +} + +/* +#[test] +fn partial_decompression_imap_issue_158() { + use miniz_oxide::inflate::stream::{inflate, InflateState}; + use miniz_oxide::{DataFormat, MZFlush}; + use std::string; + + // Decompresses to + // "* QUOTAROOT INBOX \"User quota\"\r\n* QUOTA \"User quota\" (STORAGE 76 307200)\r\nA0001 OK Getquotaroot completed (0.001 + 0.000 secs).\r\n" + let input = vec![ + 210, 82, 8, 12, 245, 15, 113, 12, 242, 247, 15, 81, 240, 244, 115, 242, 143, 80, 80, 10, + 45, 78, 45, 82, 40, 44, 205, 47, 73, 84, 226, 229, 210, 130, 200, 163, 136, 42, 104, 4, + 135, 248, 7, 57, 186, 187, 42, 152, 155, 41, 24, 27, 152, 27, 25, 24, 104, 242, 114, 57, + 26, 24, 24, 24, 42, 248, 123, 43, 184, 167, 150, 128, 213, 21, 229, 231, 151, 40, 36, 231, + 231, 22, 228, 164, 150, 164, 166, 40, 104, 24, 232, 129, 20, 104, 43, 128, 104, 3, 133, + 226, 212, 228, 98, 77, 61, 94, 46, 0, 0, 0, 0, 255, 255, + ]; + + let mut inflate_stream = InflateState::new(DataFormat::Raw); + let mut output = vec![0; 8]; + let result = inflate(&mut inflate_stream, &input, &mut output, MZFlush::None); + + let out_string: String = string::String::from_utf8(output).unwrap(); + + println!("{}", out_string); + println!("written {}", result.bytes_written); + + assert!(result.status.is_ok()); + // Should not consume everything, there is not enough space in the buffer for the output. + assert!( + result.bytes_consumed < input.len(), + "bytes consumed {:?}, input.len() {}", + result.bytes_consumed, + input.len() + ) +}*/ + /* #[test] fn large_file() { diff --git a/src/c_export.rs b/src/c_export.rs index 97e3be21..1bee6999 100644 --- a/src/c_export.rs +++ b/src/c_export.rs @@ -253,7 +253,6 @@ impl<'io, ST: StateType> StreamOxide<'io, ST> { } } -#[cfg(not(no_c_export))] unmangle!( /// Default allocation function using `malloc`. pub unsafe extern "C" fn miniz_def_alloc_func( diff --git a/src/lib.rs b/src/lib.rs index 438fddb7..d71d0330 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -297,13 +297,13 @@ unmangle!( } ); -#[cfg(target_bit_width = "64")] +#[cfg(target_pointer_width = "64")] #[inline] fn buffer_too_large(source_len: c_ulong, dest_len: c_ulong) -> bool { (source_len | dest_len) > 0xFFFFFFFF } -#[cfg(not(target_bit_width = "64"))] +#[cfg(not(target_pointer_width = "64"))] #[inline] fn buffer_too_large(_source_len: c_ulong, _dest_len: c_ulong) -> bool { false