|
| 1 | +use structopt::StructOpt; |
| 2 | +use std::fs; |
| 3 | +use std::collections::HashMap; |
| 4 | +use rug::{Float, ops::AssignRound, float::Round}; |
| 5 | +use std::mem; |
| 6 | + |
| 7 | +#[derive(StructOpt, Debug)] |
| 8 | +#[structopt(name = "basic")] |
| 9 | +struct Opt { |
| 10 | + #[structopt(name = "FILE")] |
| 11 | + file: String |
| 12 | +} |
| 13 | + |
| 14 | + |
| 15 | +fn find_frequencies(data : &String) -> HashMap<char, u32> { |
| 16 | + let mut frequencies : HashMap<char, u32> = HashMap::new(); |
| 17 | + |
| 18 | + for c in data.chars() { |
| 19 | + // Read previous character count |
| 20 | + let mut cur_count : u32 = 0; |
| 21 | + if let Some(&count) = frequencies.get(&c) { cur_count = count; } |
| 22 | + |
| 23 | + // Increase it |
| 24 | + let entry = frequencies.entry(c).or_insert(cur_count); |
| 25 | + *entry = *entry + 1; |
| 26 | + } |
| 27 | + |
| 28 | + return frequencies; |
| 29 | +} |
| 30 | + |
| 31 | +fn compression_ratio(before: &String, after: &Float) -> f64 { |
| 32 | + let before_size = mem::size_of_val(&before[..]); |
| 33 | + let after_size = after.prec() + 32; |
| 34 | + println!("before : {:?}, after : {:?}", before_size, after_size); |
| 35 | + return after_size as f64 / before_size as f64; |
| 36 | + |
| 37 | +} |
| 38 | + |
| 39 | +fn find_bounds(freqs: &HashMap<char, u32>, filesize: u32, step_precision: u32) -> HashMap<char, (Float, Float)> { |
| 40 | + let mut bounds : HashMap<char, (Float, Float)> = HashMap::new(); |
| 41 | + |
| 42 | + let mut prevhigh = Float::with_val(step_precision, 0.0); |
| 43 | + for (&c, &count) in freqs { |
| 44 | + let low = prevhigh; |
| 45 | + let high = Float::with_val(step_precision, low.clone() + (count as f64 / filesize as f64)); |
| 46 | + prevhigh = Float::with_val(step_precision, high.clone()); |
| 47 | + |
| 48 | + bounds.insert(c, (low, high)); |
| 49 | + } |
| 50 | + |
| 51 | + return bounds; |
| 52 | +} |
| 53 | + |
| 54 | +fn arithmetic_encode(file: &String, bounds: &HashMap<char, (Float, Float)>, step_precision: u32) -> Float { |
| 55 | + |
| 56 | + // Start with 1-bit precision |
| 57 | + let mut high : Float = Float::with_val(32, 1); |
| 58 | + let mut low : Float = Float::with_val(32, 0); |
| 59 | + |
| 60 | + for c in file.chars() { |
| 61 | + |
| 62 | + // Increase the precision of the floats |
| 63 | + high.set_prec(high.prec() + step_precision); |
| 64 | + low.set_prec(low.prec() + step_precision); |
| 65 | + |
| 66 | + //println!("{:?} : {:?}, {:?}, {:?}, {:?}, ", c, low.prec(), high.prec(), high, low); |
| 67 | + |
| 68 | + let range = high.clone() - low.clone(); |
| 69 | + |
| 70 | + //println!("{:?} : {:?}, {:?}, {:?}, {:?}, {:?}, ", c, low.prec(), high.prec(), range, high, low); |
| 71 | + |
| 72 | + high.assign_round(low.clone() + (range.clone() * bounds.get(&c).unwrap().1.clone()), Round::Down); |
| 73 | + low.assign_round(low.clone() + (range.clone() * bounds.get(&c).unwrap().0.clone()), Round::Up); |
| 74 | + //println!("{:?} : {:?}, {:?}, {:?}, {:?}, {:?}, ", c, low.prec(), high.prec(), range, high, low); |
| 75 | + } |
| 76 | + |
| 77 | + return (high + low)/2; |
| 78 | +} |
| 79 | + |
| 80 | +fn arithmetic_decode(encoded : &Float, bounds: &HashMap<char, (Float, Float)>, filesize : u32, step_precision: u32) -> String { |
| 81 | + let mut result : String = String::new(); |
| 82 | + let mut data = encoded.clone(); |
| 83 | + |
| 84 | + // Initial precision is 32, so get back to that |
| 85 | + while data.prec() > 32 { |
| 86 | + for(c, bounds) in bounds { |
| 87 | + if bounds.1 > data && bounds.0 < data { |
| 88 | + println!("{:?} : {:?} at prec {:?}, bounds are {:?} - {:?}", c, data, data.prec(), bounds.0.clone(), bounds.1.clone()); |
| 89 | + result.push(*c); |
| 90 | + data.assign_round((data.clone() - bounds.0.clone()) / (bounds.1.clone() - bounds.0.clone()), Round::Up); |
| 91 | + data.set_prec(data.prec() - step_precision); |
| 92 | + break; |
| 93 | + } |
| 94 | + } |
| 95 | + } |
| 96 | + |
| 97 | + return result; |
| 98 | +} |
| 99 | + |
1 | 100 | fn main() {
|
2 |
| - println!("Hello, world!"); |
| 101 | + // Parse arguments |
| 102 | + let opt = Opt::from_args(); |
| 103 | + |
| 104 | + println!("Reading file"); |
| 105 | + let contents = fs::read_to_string(opt.file).unwrap(); |
| 106 | + let filesize = contents.len() as u32; |
| 107 | + |
| 108 | + // https://stackoverflow.com/questions/7150035/ and add a small pad :) |
| 109 | + let step_precision = (filesize as f64).log2().ceil() as u32; |
| 110 | + println!("Step precision : {:?}", step_precision); |
| 111 | + |
| 112 | + // Find frequency |
| 113 | + let frequencies = find_frequencies(&contents); |
| 114 | + let bounds = find_bounds(&frequencies, filesize, step_precision); |
| 115 | + println!("{:?}\n{:?}", frequencies.clone(), bounds.clone()); |
| 116 | + |
| 117 | + // Encode file |
| 118 | + let encoded = arithmetic_encode(&contents, &bounds, step_precision); |
| 119 | + println!("encoder result : {:?}", encoded); |
| 120 | + |
| 121 | + // Compute compression ratio |
| 122 | + let ratio = compression_ratio(&contents, &encoded); |
| 123 | + println!("Compression ratio : {:?}", ratio); |
| 124 | + |
| 125 | + // Decode file |
| 126 | + let decoded = arithmetic_decode(&encoded, &bounds, filesize, step_precision); |
| 127 | + println!("decoder result : {:?}", decoded); |
| 128 | + |
| 129 | + println!("Saving result"); |
| 130 | + fs::write("output.txt", &decoded[..]).unwrap(); |
| 131 | + |
| 132 | + |
3 | 133 | }
|
0 commit comments