Skip to content

Commit eb9d2c9

Browse files
authored
major refactor and simplification (#11)
* work in progress * work in progress * work * work * work * work * work * work * added comments
1 parent 811c6b3 commit eb9d2c9

10 files changed

+1038
-773
lines changed

src/complevel_estimator.rs

+42-128
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
/// Getting the parameters correct means that the resulting diff between the deflate stream
99
/// and the predicted deflate stream will be as small as possible.
1010
use crate::{
11-
hash_algorithm::{
12-
HashAlgorithm, LibdeflateRotatingHash4, MiniZHash, RotatingHashTrait, ZlibNGHash,
13-
ZlibRotatingHash, MINIZ_LEVEL1_HASH_SIZE_MASK,
14-
},
15-
hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH},
11+
hash_algorithm::HashAlgorithm,
12+
hash_chain::DictionaryAddPolicy,
13+
hash_chain_holder::{new_hash_chain_holder, HashChainHolderTrait},
1614
preflate_constants,
1715
preflate_input::PreflateInput,
16+
preflate_parameter_estimator::PreflateStrategy,
1817
preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS},
1918
preflate_token::{BlockType, PreflateToken, PreflateTokenBlock, PreflateTokenReference},
2019
skip_length_estimator::estimate_skip_length,
20+
token_predictor::TokenPredictorParameters,
2121
};
2222

2323
#[derive(Default)]
@@ -30,73 +30,17 @@ pub struct CompLevelInfo {
3030
pub max_dist_3_matches: u16,
3131
pub min_len: u32,
3232
pub add_policy: DictionaryAddPolicy,
33-
pub hash_mask: u16,
34-
pub hash_shift: u32,
3533
pub hash_algorithm: HashAlgorithm,
3634
pub good_length: u32,
3735
pub max_lazy: u32,
3836
pub nice_length: u32,
3937
pub max_chain: u32,
4038
}
4139

42-
/// vtable for invoking the hash chain functions on specific implementation
43-
/// of hash algorithm
44-
trait HashChainInvoke {
45-
fn invoke_update_hash(
46-
&mut self,
47-
len: u32,
48-
input: &PreflateInput,
49-
add_policy: DictionaryAddPolicy,
50-
);
51-
52-
fn invoke_match_depth(
53-
&mut self,
54-
token: PreflateTokenReference,
55-
window_size: u32,
56-
input: &PreflateInput,
57-
) -> u32;
58-
}
59-
60-
/// holds the hashchain for a specific hash algorithm
61-
struct HashChainHolder<H: RotatingHashTrait> {
62-
hash_chain: HashChain<H>,
63-
}
64-
65-
impl<H: RotatingHashTrait + 'static> HashChainHolder<H> {
66-
fn new(hash_shift: u32, hash_mask: u16, input: &PreflateInput<'_>) -> Box<dyn HashChainInvoke> {
67-
Box::new(HashChainHolder::<H> {
68-
hash_chain: HashChain::<H>::new(hash_shift, hash_mask, input),
69-
})
70-
}
71-
}
72-
73-
impl<H: RotatingHashTrait> HashChainInvoke for HashChainHolder<H> {
74-
fn invoke_update_hash(
75-
&mut self,
76-
len: u32,
77-
input: &PreflateInput,
78-
add_policy: DictionaryAddPolicy,
79-
) {
80-
self.hash_chain
81-
.update_hash_with_policy::<true>(len, input, add_policy)
82-
}
83-
84-
fn invoke_match_depth(
85-
&mut self,
86-
token: PreflateTokenReference,
87-
window_size: u32,
88-
input: &PreflateInput,
89-
) -> u32 {
90-
self.hash_chain.match_depth(&token, window_size, input)
91-
}
92-
}
93-
9440
struct CandidateInfo {
9541
hash_algorithm: HashAlgorithm,
96-
hash_mask: u16,
97-
hash_shift: u32,
9842
add_policy: DictionaryAddPolicy,
99-
hash_chain: Box<dyn HashChainInvoke>,
43+
hash_chain: Box<dyn HashChainHolderTrait>,
10044

10145
longest_dist_at_hop_0: u32,
10246
longest_dist_at_hop_1_plus: u32,
@@ -105,31 +49,31 @@ struct CandidateInfo {
10549

10650
impl CandidateInfo {
10751
fn new(
108-
hash_mask: u16,
109-
hash_shift: u32,
11052
add_policy: DictionaryAddPolicy,
11153
hash_algorithm: HashAlgorithm,
112-
input: &PreflateInput,
54+
window_bits: u32,
11355
) -> Self {
114-
CandidateInfo {
115-
hash_mask,
116-
hash_shift,
56+
let params = TokenPredictorParameters {
57+
hash_algorithm,
58+
add_policy,
59+
matches_to_start_detected: false,
60+
very_far_matches_detected: false,
61+
window_bits,
62+
strategy: PreflateStrategy::Default,
63+
nice_length: 0,
64+
max_token_count: 0,
65+
zlib_compatible: false,
66+
max_dist_3_matches: 0,
67+
good_length: 0,
68+
max_lazy: 0,
69+
max_chain: 0,
70+
min_len: 0,
71+
};
72+
73+
Self {
11774
add_policy,
11875
hash_algorithm,
119-
hash_chain: match hash_algorithm {
120-
HashAlgorithm::Zlib => {
121-
HashChainHolder::<ZlibRotatingHash>::new(hash_shift, hash_mask, input)
122-
}
123-
HashAlgorithm::MiniZFast => {
124-
HashChainHolder::<MiniZHash>::new(hash_shift, hash_mask, input)
125-
}
126-
HashAlgorithm::Libdeflate4 => {
127-
HashChainHolder::<LibdeflateRotatingHash4>::new(hash_shift, hash_mask, input)
128-
}
129-
HashAlgorithm::ZlibNG => {
130-
HashChainHolder::<ZlibNGHash>::new(hash_shift, hash_mask, input)
131-
}
132-
},
76+
hash_chain: new_hash_chain_holder(&params),
13377
longest_dist_at_hop_0: 0,
13478
longest_dist_at_hop_1_plus: 0,
13579
max_chain_found: 0,
@@ -142,9 +86,7 @@ impl CandidateInfo {
14286
window_size: u32,
14387
input: &PreflateInput,
14488
) -> bool {
145-
let mdepth = self
146-
.hash_chain
147-
.invoke_match_depth(token, window_size, input);
89+
let mdepth = self.hash_chain.match_depth(token, window_size, input);
14890

14991
// remove element if the match was impossible due to matching the
15092
// the hash depth or because in fast mode we can't match partial words
@@ -187,14 +129,6 @@ impl CandidateInfo {
187129
self.max_chain_found
188130
}
189131

190-
fn hash_mask(&self) -> u16 {
191-
self.hash_mask
192-
}
193-
194-
fn hash_shift(&self) -> u32 {
195-
self.hash_shift
196-
}
197-
198132
fn hash_algorithm(&self) -> HashAlgorithm {
199133
self.hash_algorithm
200134
}
@@ -243,39 +177,34 @@ impl<'a> CompLevelEstimatorState<'a> {
243177
let mut candidates: Vec<Box<CandidateInfo>> = Vec::new();
244178

245179
candidates.push(Box::new(CandidateInfo::new(
246-
MINIZ_LEVEL1_HASH_SIZE_MASK,
247-
0,
248180
add_policy,
249181
HashAlgorithm::MiniZFast,
250-
&input,
182+
wbits,
251183
)));
252184

253185
for (hash_shift, hash_mask) in [(5, 32767), (4, 2047)] {
254186
candidates.push(Box::new(CandidateInfo::new(
255-
hash_mask,
256-
hash_shift,
257187
add_policy,
258-
HashAlgorithm::Zlib,
259-
&input,
188+
HashAlgorithm::Zlib {
189+
hash_mask,
190+
hash_shift,
191+
},
192+
wbits,
260193
)));
261194
}
262195

263196
// LibFlate4 candidate
264197
candidates.push(Box::new(CandidateInfo::new(
265-
0xffff,
266-
0,
267198
add_policy,
268199
HashAlgorithm::Libdeflate4,
269-
&input,
200+
wbits,
270201
)));
271202

272203
// ZlibNG candidate
273204
candidates.push(Box::new(CandidateInfo::new(
274-
0xffff,
275-
0,
276205
add_policy,
277206
HashAlgorithm::ZlibNG,
278-
&input,
207+
wbits,
279208
)));
280209

281210
CompLevelEstimatorState {
@@ -291,25 +220,14 @@ impl<'a> CompLevelEstimatorState<'a> {
291220
}
292221
}
293222

294-
fn update_hash(&mut self, mut length: u32, override_add_policy: bool) {
295-
while length > 0 {
296-
let batch_len = std::cmp::min(length, MAX_UPDATE_HASH_BATCH);
297-
298-
for i in &mut self.candidates {
299-
i.hash_chain.invoke_update_hash(
300-
batch_len,
301-
&self.input,
302-
if override_add_policy {
303-
DictionaryAddPolicy::AddAll
304-
} else {
305-
i.add_policy
306-
},
307-
);
308-
}
309-
310-
self.input.advance(batch_len);
311-
length -= batch_len;
223+
fn update_hash(&mut self, length: u32, override_add_policy: bool) {
224+
for i in &mut self.candidates {
225+
let mut inputc = self.input.clone();
226+
i.hash_chain
227+
.update_hash_with_depth(length, &mut inputc, override_add_policy);
312228
}
229+
230+
self.input.advance(length);
313231
}
314232

315233
fn check_match(&mut self, token: PreflateTokenReference) {
@@ -371,8 +289,6 @@ impl<'a> CompLevelEstimatorState<'a> {
371289
let mut max_lazy = 258;
372290
let mut nice_length = 258;
373291

374-
let hash_mask = candidate.hash_mask();
375-
let hash_shift = candidate.hash_shift();
376292
let add_policy = candidate.add_policy;
377293
let max_chain = candidate.max_chain_found() + 1;
378294
let hash_algorithm = candidate.hash_algorithm();
@@ -419,8 +335,6 @@ impl<'a> CompLevelEstimatorState<'a> {
419335
matches_to_start_detected: self.match_to_start,
420336
very_far_matches_detected: very_far_matches,
421337
max_dist_3_matches: self.longest_len_3_dist as u16,
422-
hash_mask,
423-
hash_shift,
424338
add_policy,
425339
good_length,
426340
max_lazy,

0 commit comments

Comments
 (0)