Skip to content

Commit 8c5aed6

Browse files
authored
add IDAT scanning (#14)
* idat progress * work
1 parent 3c651ae commit 8c5aed6

File tree

6 files changed

+181
-28
lines changed

6 files changed

+181
-28
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ byteorder = "1.4"
2626
cabac = "0.6.0"
2727
default-boxed = "0.2"
2828
zstd = "0.13.0"
29+
crc32fast = "1.3"
2930

3031
[dev-dependencies]
31-
crc32fast = "1.3"
3232
libz-sys = "1.1"
3333
libdeflate-sys = "1.19"
3434
libz-ng-sys="1.1.12"

package/PreflateRs.nuspec

+8-12
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,20 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<package xmlns="http://schemas.microsoft.com/packaging/2011/08/nuspec.xsd">
33
<metadata>
4-
<id>Lepton.Jpeg.Rust</id>
5-
<version>0.3.4.3</version>
6-
<title>Lepton JPEG Compression Rust version binaries and libraries</title>
4+
<id>PreflateRs</id>
5+
<version>0.0.0.1</version>
6+
<title>PreflateRs Compression Rust binaries and libraries</title>
77
<authors>kristofr</authors>
88
<owners>kristofr</owners>
99
<requireLicenseAcceptance>false</requireLicenseAcceptance>
10-
<description>Lepton Rust binaries and libraries</description>
10+
<description>Preflate Rust binaries and libraries</description>
1111
<tags>lepton</tags>
1212
</metadata>
1313
<files>
14-
<file src="..\target\debug\lepton_jpeg_util.exe" target="exe\debug\x64" />
15-
<file src="..\target\debug\lepton_jpeg_util.pdb" target="exe\debug\x64" />
16-
<file src="..\target\debug\lepton_jpeg.dll" target="lib\debug\x64" />
17-
<file src="..\target\debug\lepton_jpeg.pdb" target="lib\debug\x64" />
14+
<file src="..\target\debug\preflate_rs.dll" target="lib\debug\x64" />
15+
<file src="..\target\debug\preflate_rs.pdb" target="lib\debug\x64" />
1816

19-
<file src="..\target\release\lepton_jpeg_util.exe" target="exe\release\x64" />
20-
<file src="..\target\release\lepton_jpeg_util.pdb" target="exe\release\x64" />
21-
<file src="..\target\release\lepton_jpeg.dll" target="lib\release\x64" />
22-
<file src="..\target\release\lepton_jpeg.pdb" target="lib\release\x64" />
17+
<file src="..\target\release\preflate_rs.dll" target="lib\release\x64" />
18+
<file src="..\target\release\preflate_rs.pdb" target="lib\release\x64" />
2319
</files>
2420
</package>

samples/treegdi.png

167 KB
Loading

src/hash_chain.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ impl InternalPositionRel {
7878
}
7979
}
8080

81-
#[derive(Default, Copy, Clone, Eq, PartialEq, Debug)]
81+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
8282
struct InternalPositionAbs {
8383
pos: u32,
8484
}
@@ -97,14 +97,20 @@ impl InternalPosition for InternalPositionAbs {
9797
}
9898

9999
fn is_valid(&self) -> bool {
100-
self.pos > 0
100+
self.pos != 0xffffffff
101101
}
102102

103103
fn dist(&self, pos: Self) -> u32 {
104104
u32::from(self.pos - pos.pos)
105105
}
106106
}
107107

108+
impl Default for InternalPositionAbs {
109+
fn default() -> Self {
110+
Self { pos: 0xffffffff }
111+
}
112+
}
113+
108114
impl InternalPositionAbs {
109115
fn new(pos: u32) -> Self {
110116
Self { pos }

src/process.rs

+56-8
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use crate::{
1212
huffman_calc::HufftreeBitCalc,
1313
preflate_error::PreflateError,
1414
preflate_parameter_estimator::PreflateParameters,
15-
preflate_token::{BlockType, PreflateTokenBlock},
15+
preflate_token::{BlockType, PreflateToken, PreflateTokenBlock},
1616
statistical_codec::{
1717
CodecCorrection, CodecMisprediction, PredictionDecoder, PredictionEncoder,
1818
},
@@ -70,6 +70,12 @@ pub fn parse_deflate(
7070
let eof_padding = block_decoder.read_eof_padding();
7171
let plain_text = block_decoder.move_plain_text();
7272
let compressed_size = input_stream.position() as usize;
73+
74+
/*// write to file
75+
let mut f = std::fs::File::create("c:\\temp\\treegdi.deflate")
76+
.unwrap();
77+
std::io::Write::write_all(&mut f, &compressed_data[0..compressed_size]).unwrap();*/
78+
7379
Ok(DeflateContents {
7480
compressed_size,
7581
plain_text,
@@ -370,10 +376,13 @@ fn test_treepngdeflate() {
370376
use crate::hash_chain::HashChain;
371377
use crate::hash_chain::UPDATE_MODE_ALL;
372378

373-
let compressed_data: &[u8] = &read_file("treepng.deflate");
379+
let compressed_data: &[u8] = &read_file("treegdi.deflate");
374380

375381
let contents = parse_deflate(compressed_data, 1).unwrap();
376382

383+
let decoder = miniz_oxide::inflate::decompress_to_vec(compressed_data).unwrap();
384+
assert_eq!(&decoder[..], &contents.plain_text[..]);
385+
377386
let mut input = crate::preflate_input::PreflateInput::new(&contents.plain_text);
378387
let mut chain: crate::hash_chain::HashChainAbs<RandomVectorHash> =
379388
RandomVectorHash::new_hash_chain(RandomVectorHash {});
@@ -382,22 +391,52 @@ fn test_treepngdeflate() {
382391

383392
let h = r.get_hash(&contents.plain_text);
384393

385-
//println!("hashx: {:?}", h);
394+
println!("hashx: {:?}", h);
386395

387396
let mut maxdepth = 0;
397+
let mut mismatches = 0;
398+
let mut prev = PreflateToken::Literal;
399+
400+
/*let mut o = 0;
401+
for i in 0..20
402+
{
403+
let t = &contents.blocks[0].tokens[i];
404+
println!("{} token: {}, {:?}", o, i, t);
405+
match t {
406+
crate::preflate_token::PreflateToken::Literal => o += 1,
407+
crate::preflate_token::PreflateToken::Reference(r) => {
408+
o += r.len();
409+
}
410+
}
411+
}*/
412+
413+
for block_no in 0..contents.blocks.len() {
414+
let b = &contents.blocks[block_no];
415+
println!("block: {} {}", block_no, b.tokens.len());
388416

389-
for b in &contents.blocks {
390417
for i in 0..b.tokens.len() {
391418
let t = &b.tokens[i];
419+
420+
let pos = input.pos();
421+
let chars = input.cur_chars(0);
422+
let depth;
423+
let mut chars = chars[0..chars.len().min(10)].to_vec();
424+
392425
match t {
393426
crate::preflate_token::PreflateToken::Literal => {
394427
chain.update_hash::<true, UPDATE_MODE_ALL>(1, &input);
395428
input.advance(1);
429+
depth = 0;
430+
chars.resize(1, 0);
396431
}
397432
crate::preflate_token::PreflateToken::Reference(r) => {
398-
let depth = chain.match_depth(&r, 32768, &input);
433+
depth = chain.match_depth(&r, 32768, &input);
434+
chars.resize(r.len().min(10) as usize, 0);
399435
if depth > 5 {
400-
println!("token: {}, depth {} reference: {:?}", i, depth, r);
436+
mismatches += 1;
437+
if mismatches > 20 {
438+
return;
439+
}
401440

402441
//println!("back: {:?}", &input.cur_chars(-82)[0..82]);
403442

@@ -406,15 +445,24 @@ fn test_treepngdeflate() {
406445
depth,
407446
input.pos(),
408447
&input.cur_chars(0)[0..16]
409-
);
410-
chain.match_depth(&r, 32768, &input);*/
448+
);*/
449+
chain.match_depth(&r, 32768, &input);
411450
}
412451

413452
chain.update_hash::<true, UPDATE_MODE_ALL>(r.len(), &input);
414453

415454
input.advance(r.len());
416455
}
417456
}
457+
458+
if (block_no == 1 && i > 6900 && i < 7100) {
459+
println!(
460+
"offset: {} token: {}/{}, depth {} reference: {:?} chars {:?}",
461+
pos, block_no, i, depth, t, chars
462+
);
463+
}
464+
465+
prev = t.clone();
418466
}
419467
}
420468

src/scan_deflate.rs

+108-5
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@ use crate::{decompress_deflate_stream, DecompressResult};
55
use byteorder::{LittleEndian, ReadBytesExt};
66
use std::io::{Read, Seek, SeekFrom};
77

8-
#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug)]
8+
use anyhow::Result;
9+
10+
#[derive(Hash, Eq, PartialEq, Clone, Debug)]
911
pub enum Signature {
1012
Zlib(u8),
1113
ZipLocalFileHeader,
1214
Gzip,
15+
/// PNG IDAT, which is a concatenated Zlib stream of IDAT chunks, each of the size given in the Vec.
16+
IDAT,
1317
}
1418

1519
fn next_signature(src: &[u8], index: &mut usize) -> Option<Signature> {
@@ -27,6 +31,7 @@ fn next_signature(src: &[u8], index: &mut usize) -> Option<Signature> {
2731
0xDA78 => Signature::Zlib(8),
2832
0x4B50 => Signature::ZipLocalFileHeader,
2933
0x8B1F => Signature::Gzip,
34+
0x4449 => Signature::IDAT,
3035
_ => continue,
3136
};
3237

@@ -58,20 +63,118 @@ pub fn search_for_deflate_streams(src: &[u8], locations_found: &mut Vec<DeflateS
5863
start,
5964
data: res,
6065
});
61-
} else {
62-
index += 2;
66+
continue;
6367
}
6468
}
6569

6670
Signature::ZipLocalFileHeader => {
67-
if find_zip_stream(src, &mut index, locations_found).is_err() {
68-
index += 2;
71+
if find_zip_stream(src, &mut index, locations_found).is_ok() {
72+
continue;
73+
}
74+
}
75+
76+
Signature::IDAT => {
77+
if index >= 4 {
78+
if let Ok(r) = parse_idat(&src[index - 4..], 0) {
79+
if let Ok(res) = decompress_deflate_stream(&r.payload[2..], true) {
80+
println!("success! {:?}", r.idat_boundaries);
81+
println!(
82+
"results {:?}, {}, {:?}",
83+
res.compressed_size,
84+
res.prediction_corrections.len(),
85+
res.parameters
86+
);
87+
88+
println!(
89+
"recompressed: {}",
90+
zstd::bulk::compress(&res.plain_text, 9).unwrap().len()
91+
);
92+
}
93+
}
6994
}
7095
}
7196
}
97+
98+
// wasn't able to match any of the known signatures, so skip the current byte
99+
index += 1;
72100
}
73101
}
74102

103+
struct IdatContents {
104+
payload: Vec<u8>,
105+
idat_boundaries: Vec<u32>,
106+
}
107+
108+
fn parse_idat(compressed_data: &[u8], deflate_info_dump_level: u32) -> Result<IdatContents> {
109+
if compressed_data.len() < 12 || &compressed_data[4..8] != b"IDAT" {
110+
return Err(anyhow::Error::msg("No IDAT chunk found"));
111+
}
112+
113+
let mut payload = Vec::new();
114+
115+
// PNG file
116+
let mut idat_boundaries = Vec::new();
117+
let mut pos = 0;
118+
119+
while pos < compressed_data.len() {
120+
// png chunks start with the length of the chunk
121+
let chunk_len = u32::from_be_bytes([
122+
compressed_data[pos],
123+
compressed_data[pos + 1],
124+
compressed_data[pos + 2],
125+
compressed_data[pos + 3],
126+
]) as usize;
127+
128+
// now look at the chunk type. We only want IDAT chunks
129+
// and they have to be consecutive, so stop once we see
130+
// something weird
131+
let chunk_type = &compressed_data[pos + 4..pos + 8];
132+
if chunk_type != b"IDAT" || pos + chunk_len + 12 > compressed_data.len() {
133+
break;
134+
}
135+
136+
let chunk = &compressed_data[pos + 8..pos + chunk_len + 8];
137+
payload.extend_from_slice(chunk);
138+
139+
let mut crc = crc32fast::Hasher::new();
140+
crc.update(&chunk_type);
141+
crc.update(chunk);
142+
143+
if crc.finalize()
144+
!= u32::from_be_bytes([
145+
compressed_data[pos + chunk_len + 8],
146+
compressed_data[pos + chunk_len + 9],
147+
compressed_data[pos + chunk_len + 10],
148+
compressed_data[pos + chunk_len + 11],
149+
])
150+
{
151+
return Err(anyhow::Error::msg("CRC mismatch"));
152+
}
153+
154+
idat_boundaries.push(pos as u32);
155+
pos += chunk_len + 12;
156+
}
157+
158+
if deflate_info_dump_level > 0 {
159+
println!("IDAT boundaries: {:?}", idat_boundaries);
160+
}
161+
162+
Ok(IdatContents {
163+
payload,
164+
idat_boundaries,
165+
})
166+
}
167+
168+
#[test]
169+
fn parse_png() {
170+
let f = crate::process::read_file("treegdi.png");
171+
172+
let mut locations_found = Vec::new();
173+
search_for_deflate_streams(&f, &mut locations_found);
174+
175+
println!("locations found: {:?}", locations_found);
176+
}
177+
75178
const ZIP_LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;
76179

77180
#[derive(Default)]

0 commit comments

Comments
 (0)