Skip to content

Commit

Permalink
improving parameter prediction
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Nov 26, 2023
1 parent 8e90276 commit 7aaf339
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 106 deletions.
4 changes: 0 additions & 4 deletions src/deflate_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ impl<R: Read> DeflateReader<R> {
self.input.get(padding_bit_count.into()).unwrap() as u8
}

pub fn get_plain_text(&self) -> &[u8] {
&self.plain_text
}

/// moves ownership out of block reader
pub fn move_plain_text(&mut self) -> Vec<u8> {
std::mem::take(&mut self.plain_text)
Expand Down
90 changes: 71 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,27 @@ use cabac::{
vp8::{VP8Reader, VP8Writer},
};
use preflate_error::PreflateError;
use preflate_parameter_estimator::{estimate_preflate_parameters, PreflateParameters};
use process::parse_deflate;
use std::io::Cursor;

use crate::{
cabac_codec::{PredictionDecoderCabac, PredictionEncoderCabac},
process::{read_deflate, write_deflate},
process::{decode_mispredictions, encode_mispredictions},
statistical_codec::PredictionEncoder,
};

/// result of decompress_deflate_stream
pub struct DecompressResult {
/// the plaintext that was decompressed from the stream
pub plain_text: Vec<u8>,

/// the extra data that is needed to reconstruct the deflate stream exactly as it was written
pub cabac_encoded: Vec<u8>,
pub prediction_corrections: Vec<u8>,

/// the number of bytes that were processed from the compressed stream (this will be exactly the
/// data that will be recreated using the cabac_encoded data)
pub compressed_processed: usize,
pub compressed_size: usize,
}

/// decompresses a deflate stream and returns the plaintext and cabac_encoded data that can be used to reconstruct it
Expand All @@ -62,27 +66,37 @@ pub fn decompress_deflate_stream(

let mut cabac_encoder =
PredictionEncoderCabac::new(VP8Writer::new(&mut cabac_encoded).unwrap());
let (compressed_processed, _params, plain_text, _original_blocks) =
read_deflate(compressed_data, &mut cabac_encoder, 0)?;

let contents = parse_deflate(compressed_data, 1)?;

let params = estimate_preflate_parameters(&contents.plain_text, &contents.blocks);

params.write(&mut cabac_encoder);
encode_mispredictions(&contents, &params, &mut cabac_encoder)?;

cabac_encoder.finish();

if verify {
let mut cabac_decoder =
PredictionDecoderCabac::new(VP8Reader::new(Cursor::new(&cabac_encoded)).unwrap());
let (recompressed, _recreated_blocks) = write_deflate(&plain_text, &mut cabac_decoder)?;

if recompressed[..] != compressed_data[..compressed_processed] {
let reread_params = PreflateParameters::read(&mut cabac_decoder);
assert_eq!(params, reread_params);

let (recompressed, _recreated_blocks) =
decode_mispredictions(&reread_params, &contents.plain_text, &mut cabac_decoder)?;

if recompressed[..] != compressed_data[..contents.compressed_size] {
return Err(PreflateError::Mismatch(anyhow::anyhow!(
"recompressed data does not match original"
)));
}
}

Ok(DecompressResult {
plain_text,
cabac_encoded,
compressed_processed,
plain_text: contents.plain_text,
prediction_corrections: cabac_encoded,
compressed_size: contents.compressed_size,
})
}

Expand All @@ -93,7 +107,10 @@ pub fn recompress_deflate_stream(
) -> Result<Vec<u8>, PreflateError> {
let mut cabac_decoder =
PredictionDecoderCabac::new(VP8Reader::new(Cursor::new(&cabac_encoded)).unwrap());
let (recompressed, _recreated_blocks) = write_deflate(plain_text, &mut cabac_decoder)?;

let params = PreflateParameters::read(&mut cabac_decoder);
let (recompressed, _recreated_blocks) =
decode_mispredictions(&params, plain_text, &mut cabac_decoder)?;
Ok(recompressed)
}

Expand All @@ -107,16 +124,24 @@ pub fn decompress_deflate_stream_assert(

let mut cabac_encoder =
PredictionEncoderCabac::new(DebugWriter::new(&mut cabac_encoded).unwrap());
let (compressed_processed, _params, plain_text, _original_blocks) =
read_deflate(compressed_data, &mut cabac_encoder, 0)?;

assert_eq!(compressed_processed, compressed_data.len());
let contents = parse_deflate(compressed_data, 1)?;

let params = estimate_preflate_parameters(&contents.plain_text, &contents.blocks);

params.write(&mut cabac_encoder);
encode_mispredictions(&contents, &params, &mut cabac_encoder)?;

assert_eq!(contents.compressed_size, compressed_data.len());
cabac_encoder.finish();

if verify {
let mut cabac_decoder =
PredictionDecoderCabac::new(DebugReader::new(Cursor::new(&cabac_encoded)).unwrap());
let (recompressed, _recreated_blocks) = write_deflate(&plain_text, &mut cabac_decoder)?;

let params = PreflateParameters::read(&mut cabac_decoder);
let (recompressed, _recreated_blocks) =
decode_mispredictions(&params, &contents.plain_text, &mut cabac_decoder)?;

if recompressed[..] != compressed_data[..] {
return Err(PreflateError::Mismatch(anyhow::anyhow!(
Expand All @@ -126,9 +151,9 @@ pub fn decompress_deflate_stream_assert(
}

Ok(DecompressResult {
plain_text,
cabac_encoded,
compressed_processed,
plain_text: contents.plain_text,
prediction_corrections: cabac_encoded,
compressed_size: contents.compressed_size,
})
}

Expand All @@ -140,6 +165,33 @@ pub fn recompress_deflate_stream_assert(
) -> Result<Vec<u8>, PreflateError> {
let mut cabac_decoder =
PredictionDecoderCabac::new(DebugReader::new(Cursor::new(&cabac_encoded)).unwrap());
let (recompressed, _recreated_blocks) = write_deflate(plain_text, &mut cabac_decoder)?;

let params = PreflateParameters::read(&mut cabac_decoder);

let (recompressed, _recreated_blocks) =
decode_mispredictions(&params, plain_text, &mut cabac_decoder)?;
Ok(recompressed)
}

#[test]
fn verify_roundtrip() {
use crate::process::read_file;

let v = read_file("compressed_zlib_level1.deflate");

let r = decompress_deflate_stream(&v, true).unwrap();
let recompressed = recompress_deflate_stream(&r.plain_text, &r.prediction_corrections).unwrap();
assert_eq!(v, recompressed);
}

#[test]
fn verify_roundtrip_assert() {
use crate::process::read_file;

let v = read_file("compressed_zlib_level1.deflate");

let r = decompress_deflate_stream_assert(&v, true).unwrap();
let recompressed =
recompress_deflate_stream_assert(&r.plain_text, &r.prediction_corrections).unwrap();
assert_eq!(v, recompressed);
}
3 changes: 2 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ fn main_with_result() -> anyhow::Result<()> {

let result = preflate_rs::decompress_deflate_stream(&content, true)?;

let recomp = preflate_rs::recompress_deflate_stream(&result.plain_text, &result.cabac_encoded)?;
let recomp =
preflate_rs::recompress_deflate_stream(&result.plain_text, &result.prediction_corrections)?;

if content[..] != recomp[..] {
return Err(anyhow::anyhow!("recompressed data does not match original"));
Expand Down
6 changes: 3 additions & 3 deletions src/preflate_parameter_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ use crate::{
statistical_codec::{PredictionDecoder, PredictionEncoder},
};

#[derive(Debug, Copy, Clone)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum PreflateStrategy {
Default,
RleOnly,
HuffOnly,
Store,
}

#[derive(Debug, Copy, Clone)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum PreflateHuffStrategy {
Dynamic,
Mixed,
Static,
}

#[derive(Debug, Copy, Clone)]
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct PreflateParameters {
pub strategy: PreflateStrategy,
pub huff_strategy: PreflateHuffStrategy,
Expand Down
15 changes: 6 additions & 9 deletions src/preflate_parse_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,18 @@ pub struct PreflateParserConfig {
pub max_chain: u32,
}

pub const FAST_PREFLATE_PARSER_SETTINGS: [PreflateParserConfig; 4] = [
// max speed used by miniz, always match the first entry
PreflateParserConfig {
good_length: 4,
max_lazy: 2,
nice_length: 8,
max_chain: 200,
},
pub const FAST_PREFLATE_PARSER_SETTINGS: [PreflateParserConfig; 3] = [
// these three levels are used by zlib

// max speed, no lazy matches (the lazy field means
// the maximum length that is added to the dictionary during
// a match)
PreflateParserConfig {
good_length: 4,
max_lazy: 4,
nice_length: 8,
max_chain: 4,
}, // max speed, no lazy matches
},
PreflateParserConfig {
good_length: 4,
max_lazy: 5,
Expand Down
Loading

0 comments on commit 7aaf339

Please sign in to comment.