Skip to content

Commit

Permalink
make hash calculations a trait
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Nov 23, 2023
1 parent ee1170f commit 1f2c889
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 69 deletions.
32 changes: 17 additions & 15 deletions src/complevel_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* This software incorporates material from third parties. See NOTICE.txt for details.
*--------------------------------------------------------------------------------------------*/

use crate::hash_chain::{HashChain, RotatingHash};
use crate::hash_chain::{HashChain, ZlibRotatingHash};
use crate::preflate_constants;
use crate::preflate_parse_config::{
PreflateParserConfig, FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS,
Expand All @@ -27,10 +27,10 @@ pub struct CompLevelInfo {
pub far_len_3_matches: bool,
}
struct CompLevelEstimatorState<'a> {
slow_hash: HashChain<'a>,
fast_l1_hash: HashChain<'a>,
fast_l2_hash: HashChain<'a>,
fast_l3_hash: HashChain<'a>,
slow_hash: HashChain<'a, ZlibRotatingHash>,
fast_l1_hash: HashChain<'a, ZlibRotatingHash>,
fast_l2_hash: HashChain<'a, ZlibRotatingHash>,
fast_l3_hash: HashChain<'a, ZlibRotatingHash>,
blocks: &'a Vec<PreflateTokenBlock>,
info: CompLevelInfo,
wsize: u16,
Expand All @@ -39,15 +39,16 @@ struct CompLevelEstimatorState<'a> {
impl<'a> CompLevelEstimatorState<'a> {
pub fn new(
wbits: u32,
mbits: u32,
hash_shift: u32,
hash_mask: u16,
plain_text: &'a [u8],
blocks: &'a Vec<PreflateTokenBlock>,
) -> Self {
CompLevelEstimatorState::<'a> {
slow_hash: HashChain::<'a>::new(plain_text, mbits),
fast_l1_hash: HashChain::<'a>::new(plain_text, mbits),
fast_l2_hash: HashChain::<'a>::new(plain_text, mbits),
fast_l3_hash: HashChain::<'a>::new(plain_text, mbits),
slow_hash: HashChain::<'a>::new(plain_text, hash_shift, hash_mask),
fast_l1_hash: HashChain::<'a>::new(plain_text, hash_shift, hash_mask),
fast_l2_hash: HashChain::<'a>::new(plain_text, hash_shift, hash_mask),
fast_l3_hash: HashChain::<'a>::new(plain_text, hash_shift, hash_mask),
blocks,
info: CompLevelInfo {
possible_compression_levels: 0b_111111110,
Expand Down Expand Up @@ -238,7 +239,7 @@ impl<'a> CompLevelEstimatorState<'a> {
}

fn update_or_skip_single_fast_hash(
hash: &mut HashChain,
hash: &mut HashChain<ZlibRotatingHash>,
len: u32,
config: &PreflateParserConfig,
) {
Expand All @@ -251,9 +252,9 @@ impl<'a> CompLevelEstimatorState<'a> {

fn check_match_single_fast_hash(
token: &PreflateTokenReference,
hash: &HashChain,
hash: &HashChain<ZlibRotatingHash>,
config: &PreflateParserConfig,
hash_head: RotatingHash,
hash_head: ZlibRotatingHash,
window_size: u32,
) -> bool {
let mdepth = hash.match_depth(hash_head, token, window_size);
Expand All @@ -270,12 +271,13 @@ impl<'a> CompLevelEstimatorState<'a> {

pub fn estimate_preflate_comp_level(
wbits: u32,
mbits: u32,
hash_shift: u32,
hash_mask: u16,
plain_text: &[u8],
blocks: &Vec<PreflateTokenBlock>,
early_out: bool,
) -> CompLevelInfo {
let mut state = CompLevelEstimatorState::new(wbits, mbits, plain_text, blocks);
let mut state = CompLevelEstimatorState::new(wbits, hash_shift, hash_mask, plain_text, blocks);
state.check_dump(early_out);
state.recommend();
state.info
Expand Down
56 changes: 26 additions & 30 deletions src/hash_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
use default_boxed::DefaultBoxed;

use crate::{
bit_helper::DebugHash, preflate_constants::MIN_MATCH, preflate_input::PreflateInput,
preflate_token::PreflateTokenReference,
bit_helper::DebugHash, preflate_input::PreflateInput, preflate_token::PreflateTokenReference,
};

pub struct HashIterator<'a> {
Expand Down Expand Up @@ -61,47 +60,49 @@ struct HashTable {
prev: [u16; 65536],
}

pub struct HashChain<'a> {
pub struct HashChain<'a, H: RotatingHashTrait> {
input: PreflateInput<'a>,
hash_table: Box<HashTable>,
hash_shift: u32,
running_hash: RotatingHash,
running_hash: H,
hash_mask: u16,
total_shift: i32,
}

#[derive(Default, Debug, Copy, Clone)]
pub struct RotatingHash {
pub struct ZlibRotatingHash {
hash: u16,
}

impl RotatingHash {
pub fn hash(&self, mask: u16) -> u16 {
pub trait RotatingHashTrait {
fn hash(&self, mask: u16) -> u16;
fn append(&self, c: u8, hash_shift: u32) -> Self;
}

impl RotatingHashTrait for ZlibRotatingHash {
fn hash(&self, mask: u16) -> u16 {
self.hash & mask
}

pub fn append(&self, c: u8, hash_shift: u32) -> RotatingHash {
RotatingHash {
fn append(&self, c: u8, hash_shift: u32) -> ZlibRotatingHash {
ZlibRotatingHash {
hash: (self.hash << hash_shift) ^ u16::from(c),
}
}
}

impl<'a> HashChain<'a> {
pub fn new(i: &'a [u8], mem_level: u32) -> Self {
let hash_bits = mem_level + 7;
let hash_mask = ((1u32 << hash_bits) - 1) as u16;

impl<'a, H: RotatingHashTrait + Default> HashChain<'a, H> {
pub fn new(i: &'a [u8], hash_shift: u32, hash_mask: u16) -> Self {
// Important: total_shift starts at -8 since 0 indicates the end of the hash chain
// so this means that all valid values will be >= 8, otherwise the very first hash
// offset would be zero and so it would get missed
let mut hash_chain_ext = HashChain {
input: PreflateInput::new(i),
total_shift: -8,
hash_shift: (hash_bits + MIN_MATCH - 1) / MIN_MATCH,
hash_shift,
hash_mask,
hash_table: HashTable::default_boxed(),
running_hash: RotatingHash::default(),
running_hash: H::default(),
};

if i.len() > 2 {
Expand All @@ -122,11 +123,11 @@ impl<'a> HashChain<'a> {
checksum.update(self.total_shift);
}

fn next_hash(&self, b: u8) -> RotatingHash {
fn next_hash(&self, b: u8) -> H {
self.running_hash.append(b, self.hash_shift)
}

fn next_hash_double(&self, b1: u8, b2: u8) -> RotatingHash {
fn next_hash_double(&self, b1: u8, b2: u8) -> H {
self.running_hash
.append(b1, self.hash_shift)
.append(b2, self.hash_shift)
Expand Down Expand Up @@ -157,7 +158,7 @@ impl<'a> HashChain<'a> {
/// used for debugging only
#[allow(dead_code)]
pub fn verify_hash(&self, dist: Option<PreflateTokenReference>) {
let mut hash = RotatingHash::default();
let mut hash = H::default();
let mut start_pos = self.total_shift as i32;

let mut chains: Vec<Vec<u16>> = Vec::new();
Expand Down Expand Up @@ -219,20 +220,15 @@ impl<'a> HashChain<'a> {
assert!(!mismatch);
}

pub fn get_head(&self, hash: RotatingHash) -> u32 {
pub fn get_head(&self, hash: H) -> u32 {
self.hash_table.head[hash.hash(self.hash_mask) as usize].into()
}

pub fn get_node_depth(&self, node: u32) -> u32 {
self.hash_table.chain_depth[node as usize]
}

pub fn iterate_from_head(
&self,
hash: RotatingHash,
ref_pos: u32,
max_dist: u32,
) -> HashIterator {
pub fn iterate_from_head(&self, hash: H, ref_pos: u32, max_dist: u32) -> HashIterator {
let head = self.get_head(hash);
HashIterator::new(
&self.hash_table.prev,
Expand All @@ -246,15 +242,15 @@ impl<'a> HashChain<'a> {
&self.input
}

pub fn cur_hash(&self) -> RotatingHash {
pub fn cur_hash(&self) -> H {
self.next_hash(self.input.cur_char(2))
}

pub fn cur_plus_1_hash(&self) -> RotatingHash {
pub fn cur_plus_1_hash(&self) -> H {
self.next_hash_double(self.input.cur_char(2), self.input.cur_char(3))
}

pub fn hash_equal(&self, a: RotatingHash, b: RotatingHash) -> bool {
pub fn hash_equal(&self, a: H, b: H) -> bool {
a.hash(self.hash_mask) == b.hash(self.hash_mask)
}

Expand Down Expand Up @@ -330,7 +326,7 @@ impl<'a> HashChain<'a> {

pub fn match_depth(
&self,
hash: RotatingHash,
hash: H,
target_reference: &PreflateTokenReference,
window_size: u32,
) -> u32 {
Expand Down
24 changes: 9 additions & 15 deletions src/predictor_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*--------------------------------------------------------------------------------------------*/

use crate::bit_helper::DebugHash;
use crate::hash_chain::{HashChain, RotatingHash};
use crate::hash_chain::{HashChain, RotatingHashTrait};
use crate::preflate_constants::{MAX_MATCH, MIN_LOOKAHEAD, MIN_MATCH};
use crate::preflate_parameter_estimator::PreflateParameters;
use crate::preflate_token::PreflateTokenReference;
Expand All @@ -26,16 +26,16 @@ pub struct PreflateRematchInfo {
pub condensed_hops: u32,
}

pub struct PredictorState<'a> {
hash: HashChain<'a>,
pub struct PredictorState<'a, H: RotatingHashTrait> {
hash: HashChain<'a, H>,
params: PreflateParameters,
window_bytes: u32,
}

impl<'a> PredictorState<'a> {
impl<'a, H: RotatingHashTrait + Default> PredictorState<'a, H> {
pub fn new(uncompressed: &'a [u8], params: &PreflateParameters) -> Self {
Self {
hash: HashChain::new(uncompressed, params.mem_level),
hash: HashChain::new(uncompressed, params.hash_shift, params.hash_mask),
window_bytes: 1 << params.window_bits,
params: *params,
}
Expand Down Expand Up @@ -82,15 +82,15 @@ impl<'a> PredictorState<'a> {
self.hash.input().remaining()
}

pub fn hash_equal(&self, a: RotatingHash, b: RotatingHash) -> bool {
pub fn hash_equal(&self, a: H, b: H) -> bool {
self.hash.hash_equal(a, b)
}

pub fn calculate_hash(&self) -> RotatingHash {
pub fn calculate_hash(&self) -> H {
self.hash.cur_hash()
}

pub fn calculate_hash_next(&self) -> RotatingHash {
pub fn calculate_hash_next(&self) -> H {
self.hash.cur_plus_1_hash()
}

Expand All @@ -115,13 +115,7 @@ impl<'a> PredictorState<'a> {
match_len
}

pub fn match_token(
&self,
hash: RotatingHash,
prev_len: u32,
offset: u32,
max_depth: u32,
) -> MatchResult {
pub fn match_token(&self, hash: H, prev_len: u32, offset: u32, max_depth: u32) -> MatchResult {
let start_pos = self.current_input_pos() + offset;
let max_len = std::cmp::min(self.total_input_size() - start_pos, MAX_MATCH);
if max_len < std::cmp::max(prev_len + 1, MIN_MATCH) {
Expand Down
40 changes: 33 additions & 7 deletions src/preflate_parameter_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use crate::{
bit_helper::bit_length,
complevel_estimator::estimate_preflate_comp_level,
preflate_constants,
preflate_constants::{self, MIN_MATCH},
preflate_parse_config::*,
preflate_stream_info::{extract_preflate_info, PreflateStreamInfo},
preflate_token::PreflateTokenBlock,
Expand Down Expand Up @@ -35,7 +35,9 @@ pub struct PreflateParameters {
pub huff_strategy: PreflateHuffStrategy,
pub zlib_compatible: bool,
pub window_bits: u32,
pub mem_level: u32,
pub hash_shift: u32,
pub hash_mask: u16,
pub max_token_count: u16,
pub far_len3_matches_detected: bool,
pub very_far_matches_detected: bool,
pub matches_to_start_detected: bool,
Expand All @@ -53,7 +55,9 @@ impl PreflateParameters {
let huff_strategy = decoder.decode_value(4);
let zlib_compatible = decoder.decode_value(1) != 0;
let window_bits = decoder.decode_value(8);
let mem_level = decoder.decode_value(8);
let hash_shift = decoder.decode_value(8);
let hash_mask = decoder.decode_value(16);
let max_token_count = decoder.decode_value(16);
let far_len3_matches_detected = decoder.decode_value(1) != 0;
let very_far_matches_detected = decoder.decode_value(1) != 0;
let matches_to_start_detected = decoder.decode_value(1) != 0;
Expand All @@ -80,7 +84,9 @@ impl PreflateParameters {
},
zlib_compatible,
window_bits: window_bits.into(),
mem_level: mem_level.into(),
hash_shift: hash_shift.into(),
hash_mask: hash_mask,
max_token_count: max_token_count,
far_len3_matches_detected,
very_far_matches_detected,
matches_to_start_detected,
Expand All @@ -98,7 +104,9 @@ impl PreflateParameters {
encoder.encode_value(self.huff_strategy as u16, 4);
encoder.encode_value(u16::try_from(self.zlib_compatible).unwrap(), 1);
encoder.encode_value(u16::try_from(self.window_bits).unwrap(), 8);
encoder.encode_value(u16::try_from(self.mem_level).unwrap(), 8);
encoder.encode_value(u16::try_from(self.hash_shift).unwrap(), 8);
encoder.encode_value(u16::try_from(self.hash_mask).unwrap(), 16);
encoder.encode_value(u16::try_from(self.max_token_count).unwrap(), 16);
encoder.encode_value(u16::try_from(self.far_len3_matches_detected).unwrap(), 1);
encoder.encode_value(u16::try_from(self.very_far_matches_detected).unwrap(), 1);
encoder.encode_value(u16::try_from(self.matches_to_start_detected).unwrap(), 1);
Expand Down Expand Up @@ -161,7 +169,23 @@ pub fn estimate_preflate_parameters(
let window_bits = estimate_preflate_window_bits(info.max_dist);
let mem_level = estimate_preflate_mem_level(info.max_tokens_per_block);

let cl = estimate_preflate_comp_level(window_bits, mem_level, unpacked_output, blocks, false);
let hash_bits = mem_level + 7;
let hash_shift = (hash_bits + 2) / 3;
let hash_mask = ((1u32 << hash_bits) - 1) as u16;

//let hash_shift = 5;
//let hash_mask = 32767;

let max_token_count = (1 << (6 + mem_level)) - 1;

let cl = estimate_preflate_comp_level(
window_bits,
hash_shift,
hash_mask,
unpacked_output,
blocks,
false,
);

let config;
let comp_level = cl.recommended_compression_level;
Expand All @@ -181,7 +205,9 @@ pub fn estimate_preflate_parameters(

PreflateParameters {
window_bits,
mem_level,
hash_shift,
hash_mask,
max_token_count,
strategy: estimate_preflate_strategy(&info),
huff_strategy: estimate_preflate_huff_strategy(&info),
zlib_compatible: cl.zlib_compatible,
Expand Down
Loading

0 comments on commit 1f2c889

Please sign in to comment.