Skip to content

Commit

Permalink
fixed readahead of bits
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Nov 22, 2023
1 parent 88f5cd0 commit def3585
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 173 deletions.
95 changes: 95 additions & 0 deletions src/bit_reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the Apache License, Version 2.0. See LICENSE.txt in the project root for license information.
* This software incorporates material from third parties. See NOTICE.txt for details.
*--------------------------------------------------------------------------------------------*/

use std::io::Read;

use byteorder::ReadBytesExt;

pub trait ReadBits {
fn get(&mut self, cbit: u32) -> anyhow::Result<u32>;
}

pub struct BitReader<R> {
binary_reader: R,
bits_read: u32,
bit_count: u32,
}

impl<R: Read> ReadBits for BitReader<R> {
fn get(&mut self, cbit: u32) -> anyhow::Result<u32> {
BitReader::get(self, cbit)
}
}

impl<R: Read> BitReader<R> {
pub fn new(binary_reader: R) -> Self {
BitReader {
binary_reader,
bits_read: 0,
bit_count: 0,
}
}

/// Clear out the buffer and reset the position to the byte after the "current" position. Tricky since we may have more than 8 bits buffered.
pub fn flush_buffer_to_byte_boundary(&mut self) {
self.bit_count = 0;
}

pub fn bit_position_in_current_byte(&self) -> u32 {
8 - self.bit_count
}

pub fn read_byte(&mut self) -> anyhow::Result<u8> {
if self.bit_count != 0 {
return Err(anyhow::Error::msg("BitReader Error: Attempt to read bytes without first calling FlushBufferToByteBoundary"));
}

let result = self.binary_reader.read_u8()?;
Ok(result)
}

/// Read cbit bits from the input stream return
/// Only supports read of 1 to 32 bits.
pub fn get(&mut self, cbit: u32) -> anyhow::Result<u32> {
let mut wret: u32 = 0;
let mut cbits_added = 0;

if cbit == 0 {
return Ok(wret);
}

if cbit > 32 {
return Err(anyhow::Error::msg(
"BitReader Error: Attempt to read more than 32 bits",
));
}

while cbits_added < cbit {
let cbits_needed = cbit - cbits_added;

// Ensure the buffer is has at least 1 bit in it.
if self.bit_count == 0 {
self.bits_read = self.binary_reader.read_u8()? as u32;
self.bit_count = 8;
}

// Calc number of bits we can take from the buffer
let cbits_from_buffer = std::cmp::min(cbits_needed, self.bit_count);

// make room in return buffer for bits and insert them in the buffer
wret |= (self.bits_read & !(u32::MAX << cbits_from_buffer)) << cbits_added;

// Update the buffer state to reflect the bits that have been read
self.bits_read >>= cbits_from_buffer;
self.bit_count -= cbits_from_buffer;

// Update the running count of bits added so far.
cbits_added += cbits_from_buffer;
}

Ok(wret)
}
}
4 changes: 2 additions & 2 deletions src/bit_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ fn write_simple() {
/// write various bit patterns and see if the result matches the input
#[test]
fn write_roundtrip() {
use crate::zip_bit_reader::ZipBitReader;
use crate::bit_reader::BitReader;

let mut b = BitWriter::default();
let mut data_buffer = Vec::new();
Expand Down Expand Up @@ -92,7 +92,7 @@ fn write_roundtrip() {

let len = data_buffer.len() as i64;
let mut cursor = std::io::Cursor::new(data_buffer);
let mut reader = ZipBitReader::new(&mut cursor, len);
let mut reader = BitReader::new(&mut cursor);

for &(bits, len) in pattern.iter() {
assert_eq!(reader.get(len).unwrap(), bits);
Expand Down
16 changes: 8 additions & 8 deletions src/deflate_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,26 @@

use anyhow::Context;

use std::io::{Read, Seek};
use std::io::Read;

use crate::{
bit_reader::BitReader,
huffman_encoding::{HuffmanOriginalEncoding, HuffmanReader},
preflate_constants,
preflate_token::{BlockType, PreflateTokenBlock},
zip_bit_reader::ZipBitReader,
};

/// Used to read binary data in deflate format and convert it to plaintext and a list of tokenized blocks
/// containing the literals and distance codes that were used to compress the file
pub struct DeflateReader<'a, R> {
input: ZipBitReader<'a, R>,
pub struct DeflateReader<R> {
input: BitReader<R>,
plain_text: Vec<u8>,
}

impl<'a, R: Read + Seek> DeflateReader<'a, R> {
pub fn new(compressed_text: &'a mut R, max_readable_bytes: i64) -> Self {
impl<R: Read> DeflateReader<R> {
pub fn new(compressed_text: R) -> Self {
DeflateReader {
input: ZipBitReader::new(compressed_text, max_readable_bytes),
input: BitReader::new(compressed_text),
plain_text: Vec::new(),
}
}
Expand Down Expand Up @@ -86,7 +86,7 @@ impl<'a, R: Read + Seek> DeflateReader<'a, R> {
blk.uncompressed_len = len;
blk.context_len = 0;

self.input.flush_buffer_to_byte_boundary()?;
self.input.flush_buffer_to_byte_boundary();

for _i in 0..len {
let b = self.input.read_byte()?;
Expand Down
10 changes: 5 additions & 5 deletions src/huffman_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
use anyhow::Result;

use crate::{
bit_reader::ReadBits,
bit_writer::BitWriter,
huffman_helper::{calc_huffman_codes, calculate_huffman_code_tree, decode_symbol},
preflate_constants::TREE_CODE_ORDER_TABLE,
zip_bit_reader::ReadBits,
};

#[derive(PartialEq, Eq, Clone, Copy, Debug)]
Expand Down Expand Up @@ -354,7 +354,7 @@ impl HuffmanWriter {

#[test]
fn roundtrip_huffman_bitreadwrite() {
use crate::zip_bit_reader::ZipBitReader;
use crate::bit_reader::BitReader;
use std::io::Cursor;

let code_lengths = [1, 0, 3, 3, 4, 4, 3, 0];
Expand All @@ -374,7 +374,7 @@ fn roundtrip_huffman_bitreadwrite() {

let data_buffer_size = data_buffer.len();
let mut reader = Cursor::new(&data_buffer);
let mut bit_reader = ZipBitReader::new(&mut reader, data_buffer_size as i64);
let mut bit_reader = BitReader::new(&mut reader);

let huffman_tree = calculate_huffman_code_tree(&code_lengths).unwrap();

Expand Down Expand Up @@ -456,7 +456,7 @@ fn roundtrip_huffman_table() {

#[cfg(test)]
fn rountrip_test(encoding: HuffmanOriginalEncoding) {
use crate::zip_bit_reader::ZipBitReader;
use crate::bit_reader::BitReader;
use std::io::Cursor;

let mut output_buffer = Vec::new();
Expand All @@ -472,7 +472,7 @@ fn rountrip_test(encoding: HuffmanOriginalEncoding) {

// now re-read the encoding
let mut reader = Cursor::new(&output_buffer);
let mut bit_reader = ZipBitReader::new(&mut reader, output_buffer.len() as i64);
let mut bit_reader = BitReader::new(&mut reader);
let encoding2 = HuffmanOriginalEncoding::read(&mut bit_reader).unwrap();
assert_eq!(encoding, encoding2);

Expand Down
2 changes: 1 addition & 1 deletion src/huffman_helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* This software incorporates material from third parties. See NOTICE.txt for details.
*--------------------------------------------------------------------------------------------*/

use crate::zip_bit_reader::ReadBits;
use crate::bit_reader::ReadBits;
use std::vec;

/// Calculates Huffman code array given an array of Huffman Code Lengths using the RFC 1951 algorithm
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*--------------------------------------------------------------------------------------------*/

mod bit_helper;
mod bit_reader;
mod bit_writer;
mod cabac_codec;
mod complevel_estimator;
Expand All @@ -26,7 +27,6 @@ mod process;
mod statistical_codec;
mod token_predictor;
mod tree_predictor;
mod zip_bit_reader;

use anyhow::{self};
use cabac::{
Expand Down
2 changes: 1 addition & 1 deletion src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub fn read_deflate<E: PredictionEncoder>(
deflate_info_dump_level: u32,
) -> Result<(usize, PreflateParameters, Vec<u8>, Vec<PreflateTokenBlock>), PreflateError> {
let mut input_stream = Cursor::new(compressed_data);
let mut block_decoder = DeflateReader::new(&mut input_stream, compressed_data.len() as i64);
let mut block_decoder = DeflateReader::new(&mut input_stream);

let mut blocks = Vec::new();
let mut last = false;
Expand Down
152 changes: 0 additions & 152 deletions src/zip_bit_reader.rs

This file was deleted.

Loading

0 comments on commit def3585

Please sign in to comment.