diff --git a/src/hash_algorithm.rs b/src/hash_algorithm.rs index 1084c56..9fd5ec4 100644 --- a/src/hash_algorithm.rs +++ b/src/hash_algorithm.rs @@ -19,6 +19,81 @@ pub enum HashAlgorithm { RandomVector, Crc32cHash, } + +const HASH_ALGORITHM_NONE: u16 = 0; +const HASH_ALGORITHM_ZLIB: u16 = 1; +const HASH_ALGORITHM_MINIZ_FAST: u16 = 2; +const HASH_ALGORITHM_LIBDEFLATE4: u16 = 3; +const HASH_ALGORITHM_LIBDEFLATE4_FAST: u16 = 4; +const HASH_ALGORITHM_ZLIBNG: u16 = 5; +const HASH_ALGORITHM_RANDOMVECTOR: u16 = 6; +const HASH_ALGORITHM_CRC32C: u16 = 7; + +impl HashAlgorithm { + pub fn to_u16(self) -> u16 { + match self { + HashAlgorithm::None => HASH_ALGORITHM_NONE, + HashAlgorithm::Zlib { + hash_mask, + hash_shift, + } => { + HASH_ALGORITHM_ZLIB + | ((hash_mask.trailing_ones() as u16) << 8) + | ((hash_shift as u16) << 12) + } + HashAlgorithm::MiniZFast => HASH_ALGORITHM_MINIZ_FAST, + HashAlgorithm::Libdeflate4Fast => HASH_ALGORITHM_LIBDEFLATE4_FAST, + HashAlgorithm::Libdeflate4 => HASH_ALGORITHM_LIBDEFLATE4, + HashAlgorithm::ZlibNG => HASH_ALGORITHM_ZLIBNG, + HashAlgorithm::RandomVector => HASH_ALGORITHM_RANDOMVECTOR, + HashAlgorithm::Crc32cHash => HASH_ALGORITHM_CRC32C, + } + } + + pub fn from_u16(v: u16) -> Option { + match v & 0xff { + HASH_ALGORITHM_NONE => Some(HashAlgorithm::None), + HASH_ALGORITHM_ZLIB => { + let hash_mask = (1 << ((v >> 8) & 0xf)) - 1; + let hash_shift = (v >> 12) & 0xf; + Some(HashAlgorithm::Zlib { + hash_mask, + hash_shift: hash_shift.into(), + }) + } + HASH_ALGORITHM_MINIZ_FAST => Some(HashAlgorithm::MiniZFast), + HASH_ALGORITHM_LIBDEFLATE4_FAST => Some(HashAlgorithm::Libdeflate4Fast), + HASH_ALGORITHM_LIBDEFLATE4 => Some(HashAlgorithm::Libdeflate4), + HASH_ALGORITHM_ZLIBNG => Some(HashAlgorithm::ZlibNG), + HASH_ALGORITHM_RANDOMVECTOR => Some(HashAlgorithm::RandomVector), + HASH_ALGORITHM_CRC32C => Some(HashAlgorithm::Crc32cHash), + _ => None, + } + } +} + +#[test] +fn roundtrip_hash_algorithm_to_int() { + let test_hashes = [ + HashAlgorithm::Zlib { + hash_mask: 0x7ff, + hash_shift: 3, + }, + HashAlgorithm::MiniZFast, + HashAlgorithm::Libdeflate4Fast, + HashAlgorithm::Libdeflate4, + HashAlgorithm::ZlibNG, + HashAlgorithm::RandomVector, + HashAlgorithm::Crc32cHash, + ]; + + for &hash in test_hashes.iter() { + let hash_int = hash.to_u16(); + let hash2 = HashAlgorithm::from_u16(hash_int).unwrap(); + assert_eq!(hash, hash2); + } +} + pub trait HashImplementation: Default + Copy + Clone { const NUM_HASH_BYTES: usize; diff --git a/src/idat_parse.rs b/src/idat_parse.rs index 87bd418..854a24b 100644 --- a/src/idat_parse.rs +++ b/src/idat_parse.rs @@ -23,7 +23,7 @@ pub struct IdatContents { } impl IdatContents { - pub fn read_from_bytestream(r: &mut impl Read) -> std::io::Result { + pub fn read_from_bytestream(r: &mut impl Read) -> Result { let mut chunk_sizes = Vec::new(); loop { diff --git a/src/lib.rs b/src/lib.rs index 8733294..8c87a14 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,120 +34,131 @@ mod statistical_codec; mod token_predictor; mod tree_predictor; +pub mod unmanaged_api; + +use hash_algorithm::HashAlgorithm; pub use preflate_container::{ compress_zstd, decompress_deflate_stream, decompress_zstd, expand_zlib_chunks, recompress_deflate_stream, recreated_zlib_chunks, }; -pub use preflate_error::PreflateError; - -use std::{io::Cursor, panic::catch_unwind}; - -/// C ABI interface for compressing Zip file, exposed from DLL. -#[no_mangle] -pub unsafe extern "C" fn WrapperCompressZip( - input_buffer: *const u8, - input_buffer_size: u64, - output_buffer: *mut u8, - output_buffer_size: u64, - result_size: *mut u64, -) -> i32 { - match catch_unwind(|| { - let input_buffer = std::slice::from_raw_parts(input_buffer, input_buffer_size as usize); - let output_buffer = - std::slice::from_raw_parts_mut(output_buffer, output_buffer_size as usize); - - let plain_text = expand_zlib_chunks(&input_buffer, 0)?; - - *result_size = zstd::bulk::compress_to_buffer(&plain_text, output_buffer, 9)? as u64; - - Result::<(), PreflateError>::Ok(()) - }) { - Ok(x) => { - if let Err(_) = x { - return -1; - } - return 0; - } - Err(_) => { - return -2; - } - } +use preflate_error::ExitCode; +pub use preflate_error::{PreflateError, Result}; + +use std::io::{Cursor, Write}; + +pub struct PreflateCompressionContext { + content: Vec, + result: Option>, + result_pos: usize, + compression_stats: CompressionStats, } -/// C ABI interface for decompressing Zip, exposed from DLL -#[no_mangle] -pub unsafe extern "C" fn WrapperDecompressZip( - input_buffer: *const u8, - input_buffer_size: u64, - output_buffer: *mut u8, - output_buffer_size: u64, - result_size: *mut u64, -) -> i32 { - match catch_unwind(|| { - let input = std::slice::from_raw_parts(input_buffer, input_buffer_size as usize); - let output = std::slice::from_raw_parts_mut(output_buffer, output_buffer_size as usize); - - let compressed_data = - zstd::bulk::decompress(input, 1024 * 1024 * 128).map_err(PreflateError::from)?; - - let mut source = Cursor::new(&compressed_data); - let mut destination = Cursor::new(output); - - recreated_zlib_chunks(&mut source, &mut destination)?; - *result_size = destination.position(); - - Result::<(), PreflateError>::Ok(()) - }) { - Ok(x) => { - if let Err(_) = x { - return -1; - } - return 0; - } - Err(_) => { - return -2; +#[derive(Debug, Copy, Clone, Default)] +pub struct CompressionStats { + compressed_size: u64, + uncompressed_size: u64, + overhead_bytes: u64, + hash_algorithm: HashAlgorithm, +} + +impl PreflateCompressionContext { + pub fn new() -> Self { + PreflateCompressionContext { + content: Vec::new(), + compression_stats: CompressionStats::default(), + result: None, + result_pos: 0, } } -} -#[test] -fn extern_interface() { - use crate::process::read_file; - let input = read_file("samplezip.zip"); + fn process_buffer( + &mut self, + input: &[u8], + input_complete: bool, + writer: &mut impl Write, + max_output_write: usize, + ) -> Result { + self.content.extend_from_slice(input); + + if input_complete { + if self.result.is_some() { + if input.len() > 0 { + return Err(PreflateError::new( + ExitCode::InvalidParameter, + "more data provided after input_complete signaled", + )); + } + } else { + self.result = Some(compress_zstd( + &self.content, + 9, + &mut self.compression_stats, + )?); + } + } - let mut compressed = Vec::new(); + if let Some(result) = &mut self.result { + let amount_to_write = std::cmp::min(max_output_write, result.len() - self.result_pos); - compressed.resize(input.len() + 10000, 0); + writer.write(&result[self.result_pos..self.result_pos + amount_to_write])?; + self.result_pos += amount_to_write; + Ok(self.result_pos == result.len()) + } else { + Ok(false) + } + } - let mut result_size: u64 = 0; + pub fn stats(&self) -> CompressionStats { + self.compression_stats + } +} - unsafe { - let retval = WrapperCompressZip( - input[..].as_ptr(), - input.len() as u64, - compressed[..].as_mut_ptr(), - compressed.len() as u64, - (&mut result_size) as *mut u64, - ); +struct PreflateDecompressionContext { + capacity: usize, + content: Vec, + result: Option>, + result_pos: usize, +} - assert_eq!(retval, 0); +impl PreflateDecompressionContext { + fn new(capacity: usize) -> Self { + PreflateDecompressionContext { + content: Vec::new(), + result: None, + result_pos: 0, + capacity, + } } - let mut original = Vec::new(); - original.resize(input.len() + 10000, 0); + fn process_buffer( + &mut self, + input: &[u8], + input_complete: bool, + writer: &mut impl Write, + max_output_write: usize, + ) -> Result { + self.content.extend_from_slice(input); + if input_complete { + if self.result.is_some() { + if input.len() > 0 { + return Err(PreflateError::new( + ExitCode::InvalidParameter, + "more data provided after input_complete signaled", + )); + } + } else { + self.result = Some(decompress_zstd(&self.content, self.capacity)?); + } + } - let mut original_size: u64 = 0; - unsafe { - let retval = WrapperDecompressZip( - compressed[..].as_ptr(), - result_size, - original[..].as_mut_ptr(), - original.len() as u64, - (&mut original_size) as *mut u64, - ); + if let Some(result) = &mut self.result { + let amount_to_write = std::cmp::min(max_output_write, result.len() - self.result_pos); - assert_eq!(retval, 0); + writer.write(&result[self.result_pos..self.result_pos + amount_to_write])?; + self.result_pos += amount_to_write; + Ok(self.result_pos == result.len()) + } else { + Ok(false) + } } - assert_eq!(input.len() as u64, original_size); - assert_eq!(input[..], original[..(original_size as usize)]); } diff --git a/src/main.rs b/src/main.rs index d3d24ef..4e35428 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use std::{ path::{Path, PathBuf}, }; -use preflate_rs::{compress_zstd, decompress_zstd}; +use preflate_rs::{compress_zstd, decompress_zstd, CompressionStats}; fn enumerate_directory_recursively(path: &Path) -> Result, std::io::Error> { let mut results = Vec::new(); @@ -49,7 +49,8 @@ fn main() { let zstdlen = zstd::bulk::compress(&file, 9).unwrap(); - let preflatecompressed = compress_zstd(&file, 1).unwrap(); + let mut stats = CompressionStats::default(); + let preflatecompressed = compress_zstd(&file, 1, &mut stats).unwrap(); totalseen += zstdlen.len() as u64; totalzstd += preflatecompressed.len() as u64; diff --git a/src/preflate_container.rs b/src/preflate_container.rs index 576288b..fb5518e 100644 --- a/src/preflate_container.rs +++ b/src/preflate_container.rs @@ -11,6 +11,7 @@ use crate::{ process::{decode_mispredictions, encode_mispredictions, parse_deflate}, scan_deflate::{split_into_deflate_streams, BlockChunk}, statistical_codec::PredictionEncoder, + CompressionStats, }; const COMPRESSED_WRAPPER_VERSION_1: u8 = 1; @@ -78,6 +79,7 @@ fn test_variant_roundtrip() { fn write_chunk_block( block: BlockChunk, literal_data: &[u8], + compression_stats: &mut CompressionStats, destination: &mut impl Write, ) -> std::io::Result { match block { @@ -96,6 +98,8 @@ fn write_chunk_block( write_varint(destination, res.prediction_corrections.len() as u32)?; destination.write_all(&res.prediction_corrections)?; + compression_stats.overhead_bytes += res.prediction_corrections.len() as u64; + compression_stats.hash_algorithm = res.parameters.predictor.hash_algorithm; Ok(res.compressed_size) } @@ -107,6 +111,9 @@ fn write_chunk_block( write_varint(destination, res.prediction_corrections.len() as u32)?; destination.write_all(&res.prediction_corrections)?; + compression_stats.overhead_bytes += res.prediction_corrections.len() as u64; + compression_stats.hash_algorithm = res.parameters.predictor.hash_algorithm; + Ok(idat.total_chunk_length) } } @@ -171,7 +178,8 @@ fn read_chunk_block( fn roundtrip_chunk_block_literal() { let mut buffer = Vec::new(); - write_chunk_block(BlockChunk::Literal(5), b"hello", &mut buffer).unwrap(); + let mut stats = CompressionStats::default(); + write_chunk_block(BlockChunk::Literal(5), b"hello", &mut stats, &mut buffer).unwrap(); let mut read_cursor = std::io::Cursor::new(buffer); let mut destination = Vec::new(); @@ -187,7 +195,14 @@ fn roundtrip_chunk_block_deflate() { let mut buffer = Vec::new(); - write_chunk_block(BlockChunk::DeflateStream(results), &[], &mut buffer).unwrap(); + let mut stats = CompressionStats::default(); + write_chunk_block( + BlockChunk::DeflateStream(results), + &[], + &mut stats, + &mut buffer, + ) + .unwrap(); let mut read_cursor = std::io::Cursor::new(buffer); let mut destination = Vec::new(); @@ -208,9 +223,11 @@ fn roundtrip_chunk_block_png() { let mut buffer = Vec::new(); + let mut stats = CompressionStats::default(); write_chunk_block( BlockChunk::IDATDeflate(idat_contents, results), &[], + &mut stats, &mut buffer, ) .unwrap(); @@ -228,6 +245,7 @@ fn roundtrip_chunk_block_png() { pub fn expand_zlib_chunks( compressed_data: &[u8], loglevel: u32, + compression_stats: &mut CompressionStats, ) -> std::result::Result, PreflateError> { let mut locations_found = Vec::new(); @@ -241,7 +259,12 @@ pub fn expand_zlib_chunks( let mut index = 0; for loc in locations_found { - index += write_chunk_block(loc, &compressed_data[index..], &mut plain_text)?; + index += write_chunk_block( + loc, + &compressed_data[index..], + compression_stats, + &mut plain_text, + )?; } Ok(plain_text) @@ -274,7 +297,8 @@ pub fn recreated_zlib_chunks( fn roundtrip_deflate_chunks(filename: &str) { let f = crate::process::read_file(filename); - let expanded = expand_zlib_chunks(&f, 1).unwrap(); + let mut stats = CompressionStats::default(); + let expanded = expand_zlib_chunks(&f, 1, &mut stats).unwrap(); let mut read_cursor = std::io::Cursor::new(expanded); @@ -487,7 +511,8 @@ fn verify_zip_compress() { use crate::process::read_file; let v = read_file("samplezip.zip"); - let expanded = expand_zlib_chunks(&v, 1).unwrap(); + let mut stats = CompressionStats::default(); + let expanded = expand_zlib_chunks(&v, 1, &mut stats).unwrap(); let mut recompressed = Vec::new(); recreated_zlib_chunks(&mut Cursor::new(expanded), &mut recompressed).unwrap(); @@ -528,9 +553,17 @@ fn verify_file(filename: &str) { /// expands the Zlib compressed streams in the data and then recompresses the result /// with Zstd with the maximum level. -pub fn compress_zstd(zlib_compressed_data: &[u8], loglevel: u32) -> Result, PreflateError> { - let plain_text = expand_zlib_chunks(zlib_compressed_data, loglevel)?; - Ok(zstd::bulk::compress(&plain_text, 9)?) +pub fn compress_zstd( + zlib_compressed_data: &[u8], + loglevel: u32, + compression_stats: &mut CompressionStats, +) -> Result, PreflateError> { + let plain_text = expand_zlib_chunks(zlib_compressed_data, loglevel, compression_stats)?; + compression_stats.uncompressed_size = plain_text.len() as u64; + let r = zstd::bulk::compress(&plain_text, 9)?; + compression_stats.compressed_size = r.len() as u64; + + Ok(r) } /// decompresses the Zstd compressed data and then recompresses the result back @@ -548,7 +581,8 @@ fn verify_zip_compress_zstd() { use crate::process::read_file; let v = read_file("samplezip.zip"); - let compressed = compress_zstd(&v, 1).unwrap(); + let mut stats = CompressionStats::default(); + let compressed = compress_zstd(&v, 1, &mut stats).unwrap(); let recreated = decompress_zstd(&compressed, 256 * 1024 * 1024).unwrap(); diff --git a/src/preflate_error.rs b/src/preflate_error.rs index c88172c..49be555 100644 --- a/src/preflate_error.rs +++ b/src/preflate_error.rs @@ -40,6 +40,11 @@ pub enum ExitCode { /// had from recompressing the data since the amount of correction /// data would be larger than the original data. NoCompressionCandidates = 22, + + InvalidParameter = 23, + + // panic in rust code + AssertionFailure = 24, } impl Display for ExitCode { diff --git a/src/preflate_parameter_estimator.rs b/src/preflate_parameter_estimator.rs index 500bac6..426c916 100644 --- a/src/preflate_parameter_estimator.rs +++ b/src/preflate_parameter_estimator.rs @@ -43,15 +43,6 @@ pub struct PreflateParameters { const FILE_VERSION: u16 = 1; -const HASH_ALGORITHM_NONE: u16 = 0; -const HASH_ALGORITHM_ZLIB: u16 = 1; -const HASH_ALGORITHM_MINIZ_FAST: u16 = 2; -const HASH_ALGORITHM_LIBDEFLATE4: u16 = 3; -const HASH_ALGORITHM_LIBDEFLATE4_FAST: u16 = 4; -const HASH_ALGORITHM_ZLIBNG: u16 = 5; -const HASH_ALGORITHM_RANDOMVECTOR: u16 = 6; -const HASH_ALGORITHM_CRC32C: u16 = 7; - impl PreflateParameters { pub fn read(decoder: &mut impl PredictionDecoder) -> core::result::Result { assert_eq!(FILE_VERSION, decoder.decode_value(8)); @@ -59,17 +50,7 @@ impl PreflateParameters { let huff_strategy = decoder.decode_value(4); let zlib_compatible = decoder.decode_value(1) != 0; let window_bits = decoder.decode_value(8); - let hash_algorithm = decoder.decode_value(4); - - let hash_shift; - let hash_mask; - if hash_algorithm == HASH_ALGORITHM_ZLIB { - hash_shift = decoder.decode_value(8); - hash_mask = decoder.decode_value(16); - } else { - hash_shift = 0; - hash_mask = 0; - } + let hash_algorithm = HashAlgorithm::from_u16(decoder.decode_value(16)); let max_token_count = decoder.decode_value(16); let max_dist_3_matches = decoder.decode_value(16); @@ -137,18 +118,8 @@ impl PreflateParameters { max_chain: max_chain.into(), min_len: min_len.into(), hash_algorithm: match hash_algorithm { - HASH_ALGORITHM_NONE => HashAlgorithm::None, - HASH_ALGORITHM_ZLIB => HashAlgorithm::Zlib { - hash_shift: hash_shift.into(), - hash_mask, - }, - HASH_ALGORITHM_MINIZ_FAST => HashAlgorithm::MiniZFast, - HASH_ALGORITHM_LIBDEFLATE4 => HashAlgorithm::Libdeflate4, - HASH_ALGORITHM_LIBDEFLATE4_FAST => HashAlgorithm::Libdeflate4Fast, - HASH_ALGORITHM_ZLIBNG => HashAlgorithm::ZlibNG, - HASH_ALGORITHM_RANDOMVECTOR => HashAlgorithm::RandomVector, - HASH_ALGORITHM_CRC32C => HashAlgorithm::Crc32cHash, - _ => { + Some(h) => h, + None => { return Err(PreflateError::new( ExitCode::InvalidParameterHeader, "invalid hash algorithm", @@ -177,37 +148,7 @@ impl PreflateParameters { encoder.encode_value(u16::from(self.predictor.zlib_compatible), 1); encoder.encode_value(u16::try_from(self.predictor.window_bits).unwrap(), 8); - match self.predictor.hash_algorithm { - HashAlgorithm::None => { - encoder.encode_value(HASH_ALGORITHM_NONE, 4); - } - HashAlgorithm::Zlib { - hash_shift, - hash_mask, - } => { - encoder.encode_value(HASH_ALGORITHM_ZLIB, 4); - encoder.encode_value(u16::try_from(hash_shift).unwrap(), 8); - encoder.encode_value(hash_mask, 16); - } - HashAlgorithm::MiniZFast => { - encoder.encode_value(HASH_ALGORITHM_MINIZ_FAST, 4); - } - HashAlgorithm::Libdeflate4Fast => { - encoder.encode_value(HASH_ALGORITHM_LIBDEFLATE4_FAST, 4); - } - HashAlgorithm::Libdeflate4 => { - encoder.encode_value(HASH_ALGORITHM_LIBDEFLATE4, 4); - } - HashAlgorithm::ZlibNG => { - encoder.encode_value(HASH_ALGORITHM_ZLIBNG, 4); - } - HashAlgorithm::RandomVector => { - encoder.encode_value(HASH_ALGORITHM_RANDOMVECTOR, 4); - } - HashAlgorithm::Crc32cHash => { - encoder.encode_value(HASH_ALGORITHM_CRC32C, 4); - } - } + encoder.encode_value(self.predictor.hash_algorithm.to_u16(), 16); encoder.encode_value(self.predictor.max_token_count, 16); encoder.encode_value(self.predictor.max_dist_3_matches, 16); diff --git a/src/unmanaged_api.rs b/src/unmanaged_api.rs new file mode 100644 index 0000000..78ecb8e --- /dev/null +++ b/src/unmanaged_api.rs @@ -0,0 +1,351 @@ +use std::{ + io::Cursor, + panic::{catch_unwind, AssertUnwindSafe}, +}; + +use crate::{ + preflate_error::ExitCode, PreflateCompressionContext, PreflateDecompressionContext, + PreflateError, +}; + +/// Helper function to catch panics and convert them into the appropriate LeptonError +fn catch_unwind_result( + f: impl FnOnce() -> Result, +) -> Result { + match catch_unwind(AssertUnwindSafe(f)) { + Ok(r) => r.map_err(|e| e.into()), + Err(err) => { + if let Some(message) = err.downcast_ref::<&str>() { + Err(PreflateError::new(ExitCode::AssertionFailure, *message)) + } else if let Some(message) = err.downcast_ref::() { + Err(PreflateError::new( + ExitCode::AssertionFailure, + message.as_str(), + )) + } else { + Err(PreflateError::new( + ExitCode::AssertionFailure, + "unknown panic", + )) + } + } + } +} + +/// copies a string into a limited length zero terminated utf8 buffer +fn copy_cstring_utf8_to_buffer(str: &str, target_error_string: &mut [u8]) { + if target_error_string.len() == 0 { + return; + } + + // copy error string into the buffer as utf8 + let b = std::ffi::CString::new(str).unwrap(); + let b = b.as_bytes(); + + let copy_len = std::cmp::min(b.len(), target_error_string.len() - 1); + + // copy string into buffer as much as fits + target_error_string[0..copy_len].copy_from_slice(&b[0..copy_len]); + + // always null terminated + target_error_string[copy_len] = 0; +} + +#[test] +fn test_copy_cstring_utf8_to_buffer() { + // test utf8 + let mut buffer = [0u8; 10]; + copy_cstring_utf8_to_buffer("h\u{00E1}llo", &mut buffer); + assert_eq!(buffer, [b'h', 0xc3, 0xa1, b'l', b'l', b'o', 0, 0, 0, 0]); + + // test null termination + let mut buffer = [0u8; 10]; + copy_cstring_utf8_to_buffer("helloeveryone", &mut buffer); + assert_eq!( + buffer, + [b'h', b'e', b'l', b'l', b'o', b'e', b'v', b'e', b'r', 0] + ); +} + +#[no_mangle] +pub unsafe extern "C" fn create_compression_context(_flags: u32) -> *mut std::ffi::c_void { + let context = Box::new((12345678u32, PreflateCompressionContext::new())); + Box::into_raw(context) as *mut std::ffi::c_void +} + +#[no_mangle] +pub unsafe extern "C" fn free_compression_context(context: *mut std::ffi::c_void) { + let x = Box::from_raw(context as *mut (u32, PreflateCompressionContext)); + assert_eq!(x.0, 12345678, "invalid context passed in"); + // let Box destroy the object +} + +/// Compresses a file using the preflate algorithm. +/// +/// Returns -1 if more data is needed or if there is more data available, or 0 if done successfully. +/// Returns > 0 if there is an error +#[no_mangle] +pub unsafe extern "C" fn compress_buffer( + context: *mut std::ffi::c_void, + input_buffer: *const u8, + input_buffer_size: u64, + input_complete: bool, + output_buffer: *mut u8, + output_buffer_size: u64, + result_size: *mut u64, + error_string: *mut std::os::raw::c_uchar, + error_string_buffer_len: u64, +) -> i32 { + match catch_unwind_result(|| { + let context = context as *mut (u32, PreflateCompressionContext); + let (magic, context) = &mut *context; + assert_eq!(*magic, 12345678, "invalid context passed in"); + + let input = std::slice::from_raw_parts(input_buffer, input_buffer_size as usize); + let output = std::slice::from_raw_parts_mut(output_buffer, output_buffer_size as usize); + + let mut writer = Cursor::new(output); + let done = context.process_buffer( + input, + input_complete, + &mut writer, + output_buffer_size as usize, + )?; + + *result_size = writer.position().into(); + Ok(done) + }) { + Ok(done) => { + if done { + 0 + } else { + -1 + } + } + Err(e) => { + copy_cstring_utf8_to_buffer( + e.message(), + std::slice::from_raw_parts_mut(error_string, error_string_buffer_len as usize), + ); + e.exit_code().as_integer_error_code() + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn get_compression_stats( + context: *mut std::ffi::c_void, + compressed_size: *mut u64, + uncompressed_size: *mut u64, + overhead_bytes: *mut u64, + hash_algorithm: *mut u32, +) { + let context = context as *mut (u32, PreflateCompressionContext); + let (magic, context) = &*context; + assert_eq!(*magic, 12345678, "invalid context passed in"); + + *compressed_size = context.compression_stats.compressed_size; + *uncompressed_size = context.compression_stats.uncompressed_size; + *overhead_bytes = context.compression_stats.overhead_bytes; + *hash_algorithm = context.compression_stats.hash_algorithm.to_u16() as u32; +} + +#[no_mangle] +pub unsafe extern "C" fn create_decompression_context( + _flags: u32, + capacity: u64, +) -> *mut std::ffi::c_void { + let context = Box::new(( + 87654321u32, + PreflateDecompressionContext::new(capacity as usize), + )); + Box::into_raw(context) as *mut std::ffi::c_void +} + +#[no_mangle] +pub unsafe extern "C" fn free_decompression_context(context: *mut std::ffi::c_void) { + let x = Box::from_raw(context as *mut (u32, PreflateDecompressionContext)); + assert_eq!(x.0, 87654321, "invalid context passed in"); + // let Box destroy the object +} + +/// Compresses a file using the preflate algorithm. +/// +/// Returns -1 if more data is needed or if there is more data available, or 0 if done successfully. +/// Returns > 0 if there is an error +#[no_mangle] +pub unsafe extern "C" fn decompress_buffer( + context: *mut std::ffi::c_void, + input_buffer: *const u8, + input_buffer_size: u64, + input_complete: bool, + output_buffer: *mut u8, + output_buffer_size: u64, + result_size: *mut u64, + error_string: *mut std::os::raw::c_uchar, + error_string_buffer_len: u64, +) -> i32 { + match catch_unwind_result(|| { + let context = context as *mut (u32, PreflateDecompressionContext); + let (magic, context) = &mut *context; + assert_eq!(*magic, 87654321, "invalid context passed in"); + + let input = std::slice::from_raw_parts(input_buffer, input_buffer_size as usize); + let output = std::slice::from_raw_parts_mut(output_buffer, output_buffer_size as usize); + + let mut writer = Cursor::new(output); + let done = context.process_buffer( + input, + input_complete, + &mut writer, + output_buffer_size as usize, + )?; + + *result_size = writer.position().into(); + Ok(done) + }) { + Ok(done) => { + if done { + 0 + } else { + -1 + } + } + Err(e) => { + copy_cstring_utf8_to_buffer( + e.message(), + std::slice::from_raw_parts_mut(error_string, error_string_buffer_len as usize), + ); + e.exit_code().as_integer_error_code() + } + } +} + +#[test] +fn extern_interface() { + use crate::process::read_file; + let input = read_file("samplezip.zip"); + + let mut compressed = Vec::new(); + let empty = Vec::new(); + + unsafe { + let compression_context = create_compression_context(0); + + let mut compressed_chunk = Vec::new(); + compressed_chunk.resize(10000, 0); + + input.chunks(10000).for_each(|chunk| { + let mut result_size: u64 = 0; + + let retval = compress_buffer( + compression_context, + chunk.as_ptr(), + chunk.len() as u64, + false, + compressed_chunk.as_mut_ptr(), + compressed_chunk.len() as u64, + (&mut result_size) as *mut u64, + std::ptr::null_mut(), + 0, + ); + assert_eq!(retval, -1); + + compressed.extend_from_slice(&compressed_chunk[..(result_size as usize)]); + }); + + loop { + let mut result_size: u64 = 0; + + let retval = compress_buffer( + compression_context, + empty.as_ptr(), + 0, + true, + compressed_chunk.as_mut_ptr(), + compressed_chunk.len() as u64, + (&mut result_size) as *mut u64, + std::ptr::null_mut(), + 0, + ); + + compressed.extend_from_slice(&compressed_chunk[..(result_size as usize)]); + + if retval == 0 { + break; + } + } + + let mut overhead_bytes = 0; + let mut uncompressed_size = 0; + let mut compressed_size = 0; + let mut hash_algorithm = 0; + + get_compression_stats( + compression_context, + &mut compressed_size, + &mut uncompressed_size, + &mut overhead_bytes, + &mut hash_algorithm, + ); + + println!("stats: overhead={overhead_bytes}, uncompressed={uncompressed_size}, compressed={compressed_size} hash_algorithm={hash_algorithm}"); + + free_compression_context(compression_context); + } + + let mut original = Vec::new(); + + unsafe { + let decompression_context = create_decompression_context(0, 1024 * 1024 * 50); + + let mut decompressed_chunk = Vec::new(); + decompressed_chunk.resize(10000, 0); + + compressed.chunks(10000).for_each(|chunk| { + let mut result_size: u64 = 0; + + let retval = decompress_buffer( + decompression_context, + chunk.as_ptr(), + chunk.len() as u64, + false, + decompressed_chunk.as_mut_ptr(), + decompressed_chunk.len() as u64, + (&mut result_size) as *mut u64, + std::ptr::null_mut(), + 0, + ); + assert_eq!(retval, -1); + + original.extend_from_slice(&decompressed_chunk[..(result_size as usize)]); + }); + + loop { + let mut result_size: u64 = 0; + + let retval = decompress_buffer( + decompression_context, + empty.as_ptr(), + 0, + true, + decompressed_chunk.as_mut_ptr(), + decompressed_chunk.len() as u64, + (&mut result_size) as *mut u64, + std::ptr::null_mut(), + 0, + ); + + original.extend_from_slice(&decompressed_chunk[..(result_size as usize)]); + + if retval == 0 { + break; + } + } + + free_decompression_context(decompression_context); + } + + assert_eq!(input.len() as u64, original.len() as u64); + assert_eq!(input[..], original[..]); +} diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 100ccd3..c64cbf9 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -71,7 +71,8 @@ fn test_docx() { fn test_container(filename: &str) { let v = read_file(filename); - let c = compress_zstd(&v, 1).unwrap(); + let mut stats = preflate_rs::CompressionStats::default(); + let c = compress_zstd(&v, 1, &mut stats).unwrap(); let r = decompress_zstd(&c, 1024 * 1024 * 128).unwrap(); assert!(v == r);