Skip to content

Commit

Permalink
use faster compressor in more cases
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Nov 24, 2023
1 parent 1f2c889 commit 69a0667
Show file tree
Hide file tree
Showing 45 changed files with 299 additions and 240 deletions.
Binary file added samples/compressed2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added samples/compressed2.jpgoutput
Binary file not shown.
Binary file added samples/compressed2.lep
Binary file not shown.
Binary file added samples/compressed_flate2_level0.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level1.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level2.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level3.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level4.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level5.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level6.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level7.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level8.deflate
Binary file not shown.
Binary file added samples/compressed_flate2_level9.deflate
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level0.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level1.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level2.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level3.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level4.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level5.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level6.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level7.bin
Binary file not shown.
Binary file removed samples/compressed_miniz_oxide_level8.bin
Binary file not shown.
Binary file added samples/compressed_zlib_level0.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level1.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level2.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level3.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level4.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level5.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level6.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level7.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level8.deflate
Binary file not shown.
Binary file added samples/compressed_zlib_level9.deflate
Binary file not shown.
Binary file added samples/dump214.deflate
Binary file not shown.
Binary file added samples/dump5265.deflate
Binary file not shown.
File renamed without changes.
File renamed without changes.
Binary file added samples/starcontrol.deflate
Binary file not shown.
359 changes: 182 additions & 177 deletions src/complevel_estimator.rs

Large diffs are not rendered by default.

69 changes: 49 additions & 20 deletions src/hash_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,26 @@ impl<'a> HashIterator<'a> {

#[derive(DefaultBoxed)]
struct HashTable {
/// Represents the head of the hash chain for a given hash value. In order
/// to find additional matches, you follow the prev chain from the head.
head: [u16; 65536],

/// Represents the number of following nodes in the chain for a given
/// position. For example, if chainDepth[100] == 5, then there are 5 more
/// matches if we follow the prev chain from position 100 back to 0. The value goes back
/// all the way to be beginning of the compressed data (not readjusted when we shift
/// the compression window), so in order to calculate the number of chain positions,
/// you need to subtract the value from the head position.
///
/// This is used during estimation only to figure out how deep we need to match
/// into the hash chain, which allows us to estimate which parameters were used
/// to generate the deflate data.
chain_depth: [u32; 65536],

/// Represents the prev chain for a given position. This is used to find
/// all the potential matches for a given hash. The value points to previous
/// position in the chain, or 0 if there are no more matches. (We start
/// with an offset of 8 to avoid confusion with the end of the chain)
prev: [u16; 65536],
}

Expand Down Expand Up @@ -137,7 +155,7 @@ impl<'a, H: RotatingHashTrait + Default> HashChain<'a, H> {
self.running_hash = self.running_hash.append(b, self.hash_shift);
}

fn reshift_if_necessary(&mut self) {
fn reshift_if_necessary<const MAINTAIN_DEPTH: bool>(&mut self) {
if self.input.pos() as i32 - self.total_shift >= 0xfe00 {
const DELTA: usize = 0x7e00;
for i in 0..=self.hash_mask as usize {
Expand All @@ -149,7 +167,9 @@ impl<'a, H: RotatingHashTrait + Default> HashChain<'a, H> {
self.hash_table.prev[i].saturating_sub(DELTA as u16);
}

self.hash_table.chain_depth.copy_within(DELTA..=65535, 0);
if MAINTAIN_DEPTH {
self.hash_table.chain_depth.copy_within(DELTA..=65535, 0);
}
self.total_shift += DELTA as i32;
}
}
Expand Down Expand Up @@ -254,17 +274,17 @@ impl<'a, H: RotatingHashTrait + Default> HashChain<'a, H> {
a.hash(self.hash_mask) == b.hash(self.hash_mask)
}

pub fn update_hash(&mut self, mut length: u32) {
pub fn update_hash<const MAINTAIN_DEPTH: bool>(&mut self, mut length: u32) {
if length > 0x180 {
while length > 0 {
let blk = std::cmp::min(length, 0x180);
self.update_hash(blk);
self.update_hash::<MAINTAIN_DEPTH>(blk);
length -= blk;
}
return;
}

self.reshift_if_necessary();
self.reshift_if_necessary::<MAINTAIN_DEPTH>();

let pos = (self.input.pos() as i32 - self.total_shift) as u16;

Expand All @@ -274,8 +294,13 @@ impl<'a, H: RotatingHashTrait + Default> HashChain<'a, H> {
self.update_running_hash(self.input.cur_char(i as i32));
let h = self.running_hash.hash(self.hash_mask);
let p = pos + i - 2;
self.hash_table.chain_depth[usize::from(p)] =
self.hash_table.chain_depth[usize::from(self.hash_table.head[usize::from(h)])] + 1;

if MAINTAIN_DEPTH {
self.hash_table.chain_depth[usize::from(p)] = self.hash_table.chain_depth
[usize::from(self.hash_table.head[usize::from(h)])]
+ 1;
}

self.hash_table.prev[usize::from(p)] = self.hash_table.head[usize::from(h)];
self.hash_table.head[usize::from(h)] = p;
}
Expand All @@ -286,8 +311,8 @@ impl<'a, H: RotatingHashTrait + Default> HashChain<'a, H> {
//println!("u {} = {}", length, c);
}

pub fn skip_hash(&mut self, l: u32) {
self.reshift_if_necessary();
pub fn skip_hash<const MAINTAIN_DEPTH: bool>(&mut self, l: u32) {
self.reshift_if_necessary::<MAINTAIN_DEPTH>();

let pos = self.input.pos() as i32;

Expand All @@ -296,20 +321,24 @@ impl<'a, H: RotatingHashTrait + Default> HashChain<'a, H> {
self.update_running_hash(self.input.cur_char(2));
let h = self.running_hash.hash(self.hash_mask);
let p = pos - self.total_shift;
self.hash_table.chain_depth[p as usize] =
self.hash_table.chain_depth[self.hash_table.head[h as usize] as usize] + 1;
self.hash_table.prev[p as usize] = self.hash_table.head[h as usize];
self.hash_table.head[h as usize] = p as u16;

// Skipped data is not inserted into the hash chain,
// but we must still update the chainDepth, to avoid
// bad analysis results
// --------------------
for i in 1..l {
let p = (pos + i as i32) - self.total_shift;
self.hash_table.chain_depth[p as usize] = 0xffff8000;
if MAINTAIN_DEPTH {
self.hash_table.chain_depth[p as usize] =
self.hash_table.chain_depth[self.hash_table.head[h as usize] as usize] + 1;

// Skipped data is not inserted into the hash chain,
// but we must still update the chainDepth, to avoid
// bad analysis results
// --------------------
for i in 1..l {
let p = (pos + i as i32) - self.total_shift;
self.hash_table.chain_depth[p as usize] = 0xffff8000;
}
}

self.hash_table.prev[p as usize] = self.hash_table.head[h as usize];
self.hash_table.head[h as usize] = p as u16;

if remaining > l {
self.update_running_hash(self.input.cur_char(l as i32));
if remaining > l + 1 {
Expand Down
4 changes: 2 additions & 2 deletions src/predictor_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ impl<'a, H: RotatingHashTrait + Default> PredictorState<'a, H> {
}

pub fn update_hash(&mut self, pos: u32) {
self.hash.update_hash(pos);
self.hash.update_hash::<false>(pos);
}

pub fn skip_hash(&mut self, pos: u32) {
self.hash.skip_hash(pos);
self.hash.skip_hash::<false>(pos);
}

pub fn current_input_pos(&self) -> u32 {
Expand Down
45 changes: 10 additions & 35 deletions src/preflate_parameter_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
use crate::{
bit_helper::bit_length,
complevel_estimator::estimate_preflate_comp_level,
preflate_constants::{self, MIN_MATCH},
preflate_parse_config::*,
preflate_constants::{self},
preflate_stream_info::{extract_preflate_info, PreflateStreamInfo},
preflate_token::PreflateTokenBlock,
statistical_codec::{PredictionDecoder, PredictionEncoder},
Expand Down Expand Up @@ -169,39 +168,15 @@ pub fn estimate_preflate_parameters(
let window_bits = estimate_preflate_window_bits(info.max_dist);
let mem_level = estimate_preflate_mem_level(info.max_tokens_per_block);

let hash_bits = mem_level + 7;
let hash_shift = (hash_bits + 2) / 3;
let hash_mask = ((1u32 << hash_bits) - 1) as u16;

//let hash_shift = 5;
//let hash_mask = 32767;

let max_token_count = (1 << (6 + mem_level)) - 1;

let cl = estimate_preflate_comp_level(
window_bits,
hash_shift,
hash_mask,
unpacked_output,
blocks,
false,
);

let config;
let comp_level = cl.recommended_compression_level;
let is_fast_compressor;

if (1..=3).contains(&comp_level) {
is_fast_compressor = true;
config = &FAST_PREFLATE_PARSER_SETTINGS[(comp_level - 1) as usize]
} else {
is_fast_compressor = false;
config = &SLOW_PREFLATE_PARSER_SETTINGS[if (4..=9).contains(&comp_level) {
(comp_level - 4) as usize
} else {
5
}]
}
let cl = estimate_preflate_comp_level(window_bits, mem_level, unpacked_output, blocks);

let hash_shift = cl.hash_shift;
let hash_mask = cl.hash_mask;

PreflateParameters {
window_bits,
Expand All @@ -219,10 +194,10 @@ pub fn estimate_preflate_parameters(
} else {
bit_length(cl.max_chain_depth as u32 - 1)
},
is_fast_compressor,
good_length: config.good_length,
max_lazy: config.max_lazy,
nice_length: config.nice_length,
max_chain: config.max_chain,
is_fast_compressor: cl.fast_compressor,
good_length: cl.good_length,
max_lazy: cl.max_lazy,
nice_length: cl.nice_length,
max_chain: cl.max_chain,
}
}
10 changes: 9 additions & 1 deletion src/preflate_parse_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,15 @@ pub struct PreflateParserConfig {
pub max_chain: u32,
}

pub const FAST_PREFLATE_PARSER_SETTINGS: [PreflateParserConfig; 3] = [
pub const FAST_PREFLATE_PARSER_SETTINGS: [PreflateParserConfig; 4] = [
// max speed used by miniz, always match the first entry
PreflateParserConfig {
good_length: 4,
max_lazy: 2,
nice_length: 8,
max_chain: 200,
},
// these three levels are used by zlib
PreflateParserConfig {
good_length: 4,
max_lazy: 4,
Expand Down
44 changes: 40 additions & 4 deletions src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,27 +312,63 @@ fn do_analyze(crc: Option<u32>, compressed_data: &[u8], verify: bool) {
fn verify_longmatch() {
do_analyze(
None,
&read_file("compressed_flate2_level1_longmatch.bin"),
&read_file("compressed_flate2_level1_longmatch.deflate"),
false,
);
}

// test binary deflate generated by MS Office
#[test]
fn verify_docx() {
do_analyze(None, &read_file("dump571.bin"), true);
do_analyze(None, &read_file("dump571.deflate"), true);
}

// test binary deflate generated by starcontrol
#[test]
fn verify_savegame() {
do_analyze(None, &read_file("savegame.bin"), true);
do_analyze(None, &read_file("savegame.deflate"), true);
}

#[test]
fn verify_zlib_compressed_3() {
let i = 1;
let v = read_file(&format!("compressed_zlib_level{}.deflate", i));

//let minusheader = &v[2..v.len() - 4];
//let crc = Some(u32::from_le_bytes([v[v.len() - 4], v[v.len() - 3], v[v.len() - 2], v[v.len() - 1]]));

do_analyze(None, &v, true);
}

#[test]
fn verify_zlib_compressed() {
for i in 0..9 {
let v = read_file(&format!("compressed_miniz_oxide_level{}.bin", i));
let v = read_file(&format!("compressed_zlib_level{}.deflate", i));

//let minusheader = &v[2..v.len() - 4];
//let crc = Some(u32::from_le_bytes([v[v.len() - 4], v[v.len() - 3], v[v.len() - 2], v[v.len() - 1]]));

do_analyze(None, &v, true);
do_analyze(None, &v, false);
}
}

#[test]
fn verify_miniz_compressed_1() {
let v = read_file(&format!("compressed_flate2_level1.deflate"));

//let minusheader = &v[2..v.len() - 4];
//let crc = Some(u32::from_le_bytes([v[v.len() - 4], v[v.len() - 3], v[v.len() - 2], v[v.len() - 1]]));

do_analyze(None, &v, true);
}

#[test]
fn verify_miniz_compressed() {
for i in 0..9 {
let filename = format!("compressed_flate2_level{}.deflate", i);
println!("loading {}", filename);
let v = read_file(&filename);

//let minusheader = &v[2..v.len() - 4];
//let crc = Some(u32::from_le_bytes([v[v.len() - 4], v[v.len() - 3], v[v.len() - 2], v[v.len() - 1]]));
Expand Down
8 changes: 7 additions & 1 deletion tests/end_to_end.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ pub fn read_file(filename: &str) -> Vec<u8> {
#[test]
fn end_to_end_compressed() {
for i in 0..9 {
let compressed_data = read_file(&format!("compressed_miniz_oxide_level{}.bin", i));
let compressed_data = read_file(&format!("compressed_flate2_level{}.deflate", i));
verifyresult(&compressed_data);

let compressed_data = read_file(&format!("compressed_zlib_level{}.deflate", i));
verifyresult(&compressed_data);
}
}
Expand Down Expand Up @@ -89,6 +91,10 @@ fn test_file(filename: &str) {

let minusheader = &output[2..output.len() - 4];

// write to file
let mut f = File::create(format!("c:\\temp\\compressed_zlib_level{}.bin", level)).unwrap();
f.write_all(minusheader).unwrap();

verifyresult(minusheader);
}

Expand Down

0 comments on commit 69a0667

Please sign in to comment.