depolymerization

wire gateway for Bitcoin/Ethereum
Log | Files | Refs | Submodules | README | LICENSE

commit 99f474072cebe75081d835c02838553b734b87fc
parent 676a7ee88d82f734c729270a7ccefad957b90549
Author: Antoine A <>
Date:   Thu, 25 Nov 2021 20:55:03 +0100

pack bits for real

Diffstat:
Muri-pack/src/main.rs | 86++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 70 insertions(+), 16 deletions(-)

diff --git a/uri-pack/src/main.rs b/uri-pack/src/main.rs @@ -154,29 +154,72 @@ pub fn encode_str(str: &str) -> Result<Vec<u8>, EncodeErr> { assert!(str.as_bytes().iter().all(|c| supported_char(*c as char))); + // Amount of pending bits stored in buffer. + let mut buffer_bits = 0u8; + // Holds pending bits beginning from the most significant bits + let mut buffer: u8 = 0; + let mut write_bits = |nb: u8, mut nb_bits: u8| { + while nb_bits > 0 { + let writable = (8 - buffer_bits).min(nb_bits); + let remove_right = nb_bits - writable; + let remove_left = 8 - writable; + let mask = ((nb >> remove_right) << (remove_left)) >> buffer_bits; + buffer = buffer | mask; + buffer_bits += writable; + nb_bits -= writable; + // Write filled byte + if buffer_bits == 8 { + vec.push(buffer); + buffer = 0; + buffer_bits = 0; + } + } + }; + for c in str.bytes() { - let nb = match encode_ascii(c) { - Encoded::Simple(nb) => nb, + match encode_ascii(c) { + Encoded::Simple(nb) => write_bits(nb, 5), Encoded::Extended(nb) => { - vec.push(EXTENDED); - nb + write_bits(EXTENDED, 5); + write_bits(nb, 6); } - }; - vec.push(nb); + } + } + + if buffer_bits > 0 { + vec.push(buffer); } return Ok(vec); } -pub fn decode_str(bytes: &[u8]) -> Result<String, DecodeErr> { - let mut buf = String::new(); +pub fn decode_str(bytes: &[u8], len: usize) -> Result<String, DecodeErr> { + let mut buf = String::with_capacity(len); let mut iter = bytes.iter(); + // Amount of pending bits stored in buffer. + let mut buffer_bits = 0u8; + // Holds pending bits beginning from the most significant bits + let mut buffer: u8 = 0; + let mut read_nb = |mut nb_bits: u8| -> u8 { + let mut nb = 8; + while nb_bits > 0 { + if buffer_bits == 0 { + buffer = *iter.next().unwrap(); + buffer_bits = 8; + } + let readable = buffer_bits.min(nb_bits); + let mask = (buffer << 8 - buffer_bits) >> (8 - readable); + nb = (nb << readable) | mask; + buffer_bits -= readable; + nb_bits -= readable; + } + return nb; + }; - while let Some(next) = iter.next() { - let encoded = if *next == EXTENDED { - Encoded::Extended(*iter.next().unwrap()) - } else { - Encoded::Simple(*next) + for _ in 0..len { + let encoded = match read_nb(5) { + EXTENDED => Encoded::Extended(read_nb(6)), + nb => Encoded::Simple(nb), }; buf.push(decode_ascii(encoded) as char); } @@ -205,7 +248,18 @@ mod test { } #[test] - fn url_test() { + fn url_simple() { + let mut majestic = + csv::Reader::from_reader(include_str!("majestic_million.csv").as_bytes()); + for record in majestic.records() { + let domain = &record.unwrap()[2]; + let decoded = decode_str(&encode_str(domain).unwrap(), domain.len()).unwrap(); + assert_eq!(domain, decoded); + } + } + + #[test] + fn url_complex() { let mut json = Value::from_str(include_str!("urltestdata.json")) .expect("JSON parse error in urltestdata.json"); for entry in json.as_array_mut().unwrap() { @@ -218,8 +272,8 @@ mod test { continue; // extended ascii } let encoded = encode_str(&href).expect(&format!("Failed to encode {}", &href)); - let decoded = - decode_str(&encoded).expect(&format!("Failed to decode encoded {}", &href)); + let decoded = decode_str(&encoded, href.len()) + .expect(&format!("Failed to decode encoded {}", &href)); assert_eq!(href, decoded); } }