Skip to content

Commit

Permalink
Improve documentation and structure for code review (#19)
Browse files Browse the repository at this point in the history
* move files to logical modules and improve docs

* add more docs

* work
  • Loading branch information
mcroomp authored Jan 30, 2025
1 parent b8d374f commit 5f9b868
Show file tree
Hide file tree
Showing 25 changed files with 374 additions and 358 deletions.
1 change: 1 addition & 0 deletions src/bit_reader.rs → src/deflate/bit_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub trait ReadBits {
fn get(&mut self, cbit: u32) -> Result<u32>;
}

/// BitReader reads a variable number of bits from a byte stream.
pub struct BitReader<R> {
binary_reader: R,
bits_read: u32,
Expand Down
3 changes: 2 additions & 1 deletion src/bit_writer.rs → src/deflate/bit_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* This software incorporates material from third parties. See NOTICE.txt for details.
*--------------------------------------------------------------------------------------------*/

/// Used to write a variable number of bits to a byte buffer.
#[derive(Default)]
pub struct BitWriter {
pub bit_buffer: u32,
Expand Down Expand Up @@ -61,7 +62,7 @@ fn write_simple() {
/// write various bit patterns and see if the result matches the input
#[test]
fn write_roundtrip() {
use crate::bit_reader::BitReader;
use super::bit_reader::BitReader;

let mut b = BitWriter::default();
let mut data_buffer = Vec::new();
Expand Down
File renamed without changes.
51 changes: 25 additions & 26 deletions src/deflate_reader.rs → src/deflate/deflate_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
*--------------------------------------------------------------------------------------------*/

use crate::{
deflate::deflate_token::{DeflateHuffmanType, DeflateToken, DeflateTokenReference},
preflate_error::{err_exit_code, AddContext, ExitCode, Result},
preflate_token::{PreflateHuffmanType, PreflateToken, PreflateTokenReference},
};

use std::io::Read;

use crate::{
use super::{deflate_constants, deflate_token::DeflateTokenBlock};

use super::{
bit_reader::BitReader,
huffman_encoding::{HuffmanOriginalEncoding, HuffmanReader},
preflate_constants,
preflate_token::PreflateTokenBlock,
};

/// Used to read binary data in deflate format and convert it to plaintext and a list of tokenized blocks
/// Used to read binary data in DEFLATE format and convert it to plaintext and a list of tokenized blocks
/// containing the literals and distance codes that were used to compress the file
pub struct DeflateReader<R> {
input: BitReader<R>,
Expand Down Expand Up @@ -64,7 +64,7 @@ impl<R: Read> DeflateReader<R> {
}
}

pub fn read_block(&mut self, last: &mut bool) -> Result<PreflateTokenBlock> {
pub fn read_block(&mut self, last: &mut bool) -> Result<DeflateTokenBlock> {
*last = self.read_bit()?;
let mode = self.read_bits(2)?;

Expand All @@ -89,7 +89,7 @@ impl<R: Read> DeflateReader<R> {
self.write_literal(b);
}

Ok(PreflateTokenBlock::Stored {
Ok(DeflateTokenBlock::Stored {
uncompressed,
padding_bits,
})
Expand All @@ -102,17 +102,17 @@ impl<R: Read> DeflateReader<R> {
let decoder = HuffmanReader::create_fixed()?;
if let Err(e) = self.decode_block(&decoder, &mut tokens) {
if e.exit_code() == ExitCode::ShortRead {
Ok(PreflateTokenBlock::Huffman {
Ok(DeflateTokenBlock::Huffman {
tokens,
huffman_type: PreflateHuffmanType::Static { incomplete: true },
huffman_type: DeflateHuffmanType::Static { incomplete: true },
})
} else {
Err(e)
}
} else {
Ok(PreflateTokenBlock::Huffman {
Ok(DeflateTokenBlock::Huffman {
tokens,
huffman_type: PreflateHuffmanType::Static { incomplete: false },
huffman_type: DeflateHuffmanType::Static { incomplete: false },
})
}
}
Expand All @@ -125,9 +125,9 @@ impl<R: Read> DeflateReader<R> {
let mut tokens = Vec::new();
self.decode_block(&decoder, &mut tokens).context()?;

Ok(PreflateTokenBlock::Huffman {
Ok(DeflateTokenBlock::Huffman {
tokens,
huffman_type: PreflateHuffmanType::Dynamic { huffman_encoding },
huffman_type: DeflateHuffmanType::Dynamic { huffman_encoding },
})
}

Expand All @@ -138,7 +138,7 @@ impl<R: Read> DeflateReader<R> {
fn decode_block(
&mut self,
decoder: &HuffmanReader,
tokens: &mut Vec<PreflateToken>,
tokens: &mut Vec<DeflateToken>,
) -> Result<()> {
let mut earliest_reference = i32::MAX;
let mut cur_pos = 0;
Expand All @@ -147,40 +147,39 @@ impl<R: Read> DeflateReader<R> {
let lit_len: u32 = decoder.fetch_next_literal_code(&mut self.input)?.into();
if lit_len < 256 {
self.write_literal(lit_len as u8);
tokens.push(PreflateToken::Literal(lit_len as u8));
tokens.push(DeflateToken::Literal(lit_len as u8));
cur_pos += 1;
} else if lit_len == 256 {
return Ok(());
} else {
let lcode: u32 = lit_len - preflate_constants::NONLEN_CODE_COUNT as u32;
if lcode >= preflate_constants::LEN_CODE_COUNT as u32 {
let lcode: u32 = lit_len - deflate_constants::NONLEN_CODE_COUNT as u32;
if lcode >= deflate_constants::LEN_CODE_COUNT as u32 {
return err_exit_code(ExitCode::InvalidDeflate, "Invalid length code");
}
let len: u32 = preflate_constants::MIN_MATCH
+ preflate_constants::LENGTH_BASE_TABLE[lcode as usize] as u32
let len: u32 = deflate_constants::MIN_MATCH
+ deflate_constants::LENGTH_BASE_TABLE[lcode as usize] as u32
+ self
.read_bits(preflate_constants::LENGTH_EXTRA_TABLE[lcode as usize].into())?;
.read_bits(deflate_constants::LENGTH_EXTRA_TABLE[lcode as usize].into())?;

// length of 258 can be encoded two ways: 284 with 5 one bits (non-standard) or as 285 with 0 extra bits (standard)
let irregular258 =
len == 258 && lcode != preflate_constants::LEN_CODE_COUNT as u32 - 1;
len == 258 && lcode != deflate_constants::LEN_CODE_COUNT as u32 - 1;

let dcode = decoder.fetch_next_distance_char(&mut self.input)? as u32;
if dcode >= preflate_constants::DIST_CODE_COUNT as u32 {
if dcode >= deflate_constants::DIST_CODE_COUNT as u32 {
return err_exit_code(ExitCode::InvalidDeflate, "Invalid distance code");
}

let dist = 1
+ preflate_constants::DIST_BASE_TABLE[dcode as usize] as u32
+ self
.read_bits(preflate_constants::DIST_EXTRA_TABLE[dcode as usize].into())?;
+ deflate_constants::DIST_BASE_TABLE[dcode as usize] as u32
+ self.read_bits(deflate_constants::DIST_EXTRA_TABLE[dcode as usize].into())?;

if dist as usize > self.plain_text.len() {
return err_exit_code(ExitCode::InvalidDeflate, "Invalid distance");
}

self.write_reference(dist, len);
tokens.push(PreflateToken::Reference(PreflateTokenReference::new(
tokens.push(DeflateToken::Reference(DeflateTokenReference::new(
len,
dist,
irregular258,
Expand Down
53 changes: 30 additions & 23 deletions src/preflate_token.rs → src/deflate/deflate_token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,35 @@
* This software incorporates material from third parties. See NOTICE.txt for details.
*--------------------------------------------------------------------------------------------*/

use crate::{
huffman_encoding::HuffmanOriginalEncoding,
preflate_constants::{
quantize_distance, quantize_length, DIST_CODE_COUNT, LITLENDIST_CODE_COUNT,
NONLEN_CODE_COUNT,
},
use crate::deflate::huffman_encoding::HuffmanOriginalEncoding;

use super::deflate_constants::{
quantize_distance, quantize_length, DIST_CODE_COUNT, LITLENDIST_CODE_COUNT, NONLEN_CODE_COUNT,
};

/// In a DEFLATE stream, tokens are either literals (bytes) or references to previous bytes
/// with a distance and length.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct PreflateTokenReference {
len: u8,
dist: u16,
irregular258: bool,
pub enum DeflateToken {
Literal(u8),
Reference(DeflateTokenReference),
}

/// In the case of a distance and length, the length is the number of bytes to copy from the
/// previous bytes, and the distance is the number of bytes back to start copying from.
///
/// the irregular258 field is used to indicate that the 258 length code was used but in a
/// suboptimal way (the RFC allows for two different ways to encode 258)
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum PreflateToken {
Literal(u8),
Reference(PreflateTokenReference),
pub struct DeflateTokenReference {
len: u8,
dist: u16,
irregular258: bool,
}

impl PreflateTokenReference {
pub fn new(len: u32, dist: u32, irregular258: bool) -> PreflateTokenReference {
PreflateTokenReference {
impl DeflateTokenReference {
pub fn new(len: u32, dist: u32, irregular258: bool) -> DeflateTokenReference {
DeflateTokenReference {
len: (len - 3) as u8,
dist: dist as u16,
irregular258,
Expand Down Expand Up @@ -56,7 +61,7 @@ pub const BT_DYNAMICHUFF: u32 = 0;
pub const BT_STATICHUFF: u32 = 2;

#[derive(Debug, PartialEq)]
pub enum PreflateHuffmanType {
pub enum DeflateHuffmanType {
Dynamic {
huffman_encoding: HuffmanOriginalEncoding,
},
Expand All @@ -66,17 +71,19 @@ pub enum PreflateHuffmanType {
}

#[derive(Debug)]
pub enum PreflateTokenBlock {
pub enum DeflateTokenBlock {
Huffman {
tokens: Vec<PreflateToken>,
huffman_type: PreflateHuffmanType,
tokens: Vec<DeflateToken>,
huffman_type: DeflateHuffmanType,
},
Stored {
uncompressed: Vec<u8>,
padding_bits: u8,
},
}

/// Used to track the frequence of tokens in the DEFLATE stream
/// which are later used to build the huffman encoding.
#[derive(Debug)]
pub struct TokenFrequency {
pub literal_codes: [u16; LITLENDIST_CODE_COUNT],
Expand All @@ -98,12 +105,12 @@ impl Default for TokenFrequency {
}

impl TokenFrequency {
pub fn commit_token(&mut self, token: &PreflateToken) {
pub fn commit_token(&mut self, token: &DeflateToken) {
match token {
PreflateToken::Literal(lit) => {
DeflateToken::Literal(lit) => {
self.literal_codes[*lit as usize] += 1;
}
PreflateToken::Reference(t) => {
DeflateToken::Reference(t) => {
self.literal_codes[NONLEN_CODE_COUNT + quantize_length(t.len())] += 1;
self.distance_codes[quantize_distance(t.dist())] += 1;
}
Expand Down
30 changes: 16 additions & 14 deletions src/deflate_writer.rs → src/deflate/deflate_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@

use crate::preflate_error::Result;

use crate::preflate_token::PreflateHuffmanType;
use crate::{
bit_writer::BitWriter,
huffman_encoding::HuffmanWriter,
preflate_constants::{
use super::deflate_token::DeflateHuffmanType;
use super::{
deflate_constants::{
quantize_distance, quantize_length, DIST_BASE_TABLE, DIST_EXTRA_TABLE, LENGTH_BASE_TABLE,
LENGTH_EXTRA_TABLE, LITLEN_CODE_COUNT, MIN_MATCH, NONLEN_CODE_COUNT,
},
preflate_token::{PreflateToken, PreflateTokenBlock},
deflate_token::{DeflateToken, DeflateTokenBlock},
};

use super::bit_writer::BitWriter;
use super::huffman_encoding::HuffmanWriter;

/// Takes a tokenized block and writes it to the original compressed output.
pub struct DeflateWriter {
/// bit writer to write partial bits to output
bitwriter: BitWriter,
Expand All @@ -39,10 +41,10 @@ impl DeflateWriter {
o
}

pub fn encode_block(&mut self, block: &PreflateTokenBlock, last: bool) -> Result<()> {
pub fn encode_block(&mut self, block: &DeflateTokenBlock, last: bool) -> Result<()> {
self.bitwriter.write(last as u32, 1, &mut self.output);
match block {
PreflateTokenBlock::Stored {
DeflateTokenBlock::Stored {
uncompressed,
padding_bits,
} => {
Expand All @@ -57,16 +59,16 @@ impl DeflateWriter {

self.output.extend_from_slice(&uncompressed);
}
PreflateTokenBlock::Huffman {
DeflateTokenBlock::Huffman {
tokens,
huffman_type,
} => match huffman_type {
PreflateHuffmanType::Static { .. } => {
DeflateHuffmanType::Static { .. } => {
self.bitwriter.write(1, 2, &mut self.output);
let huffman_writer = HuffmanWriter::start_fixed_huffman_table();
self.encode_block_with_decoder(tokens, &huffman_writer);
}
PreflateHuffmanType::Dynamic {
DeflateHuffmanType::Dynamic {
huffman_encoding, ..
} => {
let huffman_writer = HuffmanWriter::start_dynamic_huffman_table(
Expand All @@ -90,19 +92,19 @@ impl DeflateWriter {

fn encode_block_with_decoder(
&mut self,
tokens: &Vec<PreflateToken>,
tokens: &Vec<DeflateToken>,
huffman_writer: &HuffmanWriter,
) {
for token in tokens {
match token {
PreflateToken::Literal(lit) => {
DeflateToken::Literal(lit) => {
huffman_writer.write_literal(
&mut self.bitwriter,
&mut self.output,
u16::from(*lit),
);
}
PreflateToken::Reference(reference) => {
DeflateToken::Reference(reference) => {
if reference.get_irregular258() {
huffman_writer.write_literal(
&mut self.bitwriter,
Expand Down
7 changes: 7 additions & 0 deletions src/huffman_calc.rs → src/deflate/huffman_calc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ pub enum HufftreeBitCalc {
Miniz,
}

/// Calculates the Huffman bit lengths for a given distribution of symbols.
///
/// There is no one-size-fits-all solution for calculating Huffman bit lengths and
/// each library has its own.
///
/// If we can get the right algorithm, it will minimize the amount of corrections
/// that we later need to write when encoding the data back to the compressed format.
pub fn calc_bit_lengths(
bit_calc: HufftreeBitCalc,
sym_count: &[u16],
Expand Down
Loading

0 comments on commit 5f9b868

Please sign in to comment.