depolymerization

wire gateway for Bitcoin/Ethereum
Log | Files | Refs | Submodules | README | LICENSE

commit f0a82b3a64457d18b29ae497688cfbe9890443e2
parent 0200926d8458b6e8f8f8275e569df11c4c476fb7
Author: Antoine A <>
Date:   Thu,  2 Dec 2021 13:06:27 +0100

Use a u5 terminator in uri-pack

Diffstat:
Muri-pack/benches/pack.rs | 15+++++----------
Muri-pack/src/lib.rs | 62++++++++++++++++++++++++++++++++++++++++----------------------
Muri-pack/src/main.rs | 2+-
Mwire-gateway/src/error_codes.rs | 3+--
Mwire-gateway/src/main.rs | 6++----
5 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/uri-pack/benches/pack.rs b/uri-pack/benches/pack.rs @@ -1,23 +1,18 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use uri_pack::{pack_uri, unpack_uri}; +use uri_pack::{pack_uri, unpack_uri, PACKED}; fn rand_compat(size: usize) -> String { String::from_utf8( - std::iter::repeat_with(|| fastrand::u8(b' '..=b'~')) + std::iter::repeat_with(|| fastrand::u8(b'!'..=b'~')) .take(size) .collect(), ) .unwrap() } -const COMMON: [u8; 31] = [ - b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', - b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'_', b'%', -]; - fn rand_simple(size: usize) -> String { String::from_utf8( - std::iter::repeat_with(|| COMMON[fastrand::usize(..COMMON.len())]) + std::iter::repeat_with(|| PACKED[fastrand::usize(..PACKED.len())]) .take(size) .collect(), ) @@ -38,7 +33,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_with_input(BenchmarkId::new("unpack rand", size), size, |b, &size| { b.iter_batched( || pack_uri(&rand_compat(size)).unwrap(), - |(packed, len)| unpack_uri(&packed, len), + |packed| unpack_uri(&packed), criterion::BatchSize::SmallInput, ) }); @@ -52,7 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_with_input(BenchmarkId::new("unpack simple", size), size, |b, &size| { b.iter_batched( || pack_uri(&rand_simple(size)).unwrap(), - |(packed, len)| unpack_uri(&packed, len), + |packed| unpack_uri(&packed), criterion::BatchSize::SmallInput, ) }); diff --git a/uri-pack/src/lib.rs b/uri-pack/src/lib.rs @@ -2,12 +2,11 @@ /// Panic if char not supported fn pack_ascii(c: u8) -> u8 { [ - 94, 67, 68, 69, 70, 30, 71, 72, 73, 74, 75, 76, 77, 28, 26, 27, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 66, 78, 79, 80, 81, 82, 83, 84, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 85, 86, 87, 88, 29, 89, 0, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 90, 91, 92, - 93, - ][(c - b' ') as usize] + 67, 68, 69, 70, 29, 71, 72, 73, 74, 75, 76, 77, 28, 26, 27, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 78, 79, 80, 81, 82, 83, 84, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 85, 86, 87, 88, 30, 89, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 90, 91, 92, 93, + ][(c - b'!') as usize] } /// Unpack an URI ascii char @@ -15,22 +14,29 @@ fn pack_ascii(c: u8) -> u8 { fn unpack_ascii(c: u8) -> u8 { [ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', - b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'_', - b'%', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', + b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'%', + b'_', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'!', b'"', b'#', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b':', b';', b'<', b'=', b'>', b'?', b'@', b'[', b'\\', b']', b'^', b'`', - b'{', b'|', b'}', b'~', b' ', + b'{', b'|', b'}', b'~', ][c as usize] } -/// Check if an ascii char is packable +/// Check if an ascii char is supported by the encoding fn supported_ascii(c: &u8) -> bool { - (b' '..=b'~').contains(c) + (b'!'..=b'~').contains(c) } /// Extended packing limit -const EXTENDED: u8 = 31; +pub const EXTENDED: u8 = 30; +/// EOF u5 encoding +pub const TERMINATOR: u8 = 31; +/// Ascii char that can be packed into 5 bits +pub const PACKED: [u8; 30] = [ + b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', + b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'%', +]; #[derive(Debug, Clone, Copy, thiserror::Error)] pub enum EncodeErr { @@ -51,7 +57,7 @@ pub enum DecodeErr { } /// Pack an uri string into an optimized binary format -pub fn pack_uri(uri: &str) -> Result<(Vec<u8>, usize), EncodeErr> { +pub fn pack_uri(uri: &str) -> Result<Vec<u8>, EncodeErr> { let len = uri.as_bytes().len(); let mut vec = Vec::with_capacity(len); @@ -116,18 +122,19 @@ pub fn pack_uri(uri: &str) -> Result<(Vec<u8>, usize), EncodeErr> { write_bits(nb - EXTENDED, 6); } } + write_bits(TERMINATOR, 5); // Push pending buffer if not empty if buff_bits > 0 { vec.push(buff); } - return Ok((vec, len)); + return Ok(vec); } /// Unpack an uri string from its optimized binary format -pub fn unpack_uri(bytes: &[u8], len: usize) -> Result<String, DecodeErr> { - let mut buf = String::with_capacity(len); +pub fn unpack_uri(bytes: &[u8]) -> Result<String, DecodeErr> { + let mut buf = String::with_capacity(bytes.len()); let mut iter = bytes.iter(); // Holds pending bits beginning from the most significant bits @@ -175,8 +182,9 @@ pub fn unpack_uri(bytes: &[u8], len: usize) -> Result<String, DecodeErr> { return Ok(nb); }; - for _ in 0..len { + loop { let encoded = match read_nb(5)? { + TERMINATOR => break, EXTENDED => read_nb(6)? + EXTENDED, nb => nb, }; @@ -192,7 +200,17 @@ mod test { use serde_json::Value; - use crate::{pack_ascii, pack_uri, supported_ascii, unpack_ascii, unpack_uri}; + use crate::{ + pack_ascii, pack_uri, supported_ascii, unpack_ascii, unpack_uri, EXTENDED, PACKED, + }; + + #[test] + /// Check support every packable ascii character is packed + fn packed() { + for c in PACKED { + assert!(pack_ascii(c) < EXTENDED); + } + } #[test] /// Check support every ascii graphic character and space @@ -217,8 +235,8 @@ mod test { csv::Reader::from_reader(include_str!("majestic_million.csv").as_bytes()); for record in majestic.records() { let domain = &record.unwrap()[2]; - let (encoded, len) = pack_uri(domain).unwrap(); - let decoded = unpack_uri(&encoded, len).unwrap(); + let encoded = pack_uri(domain).unwrap(); + let decoded = unpack_uri(&encoded).unwrap(); assert_eq!(domain, decoded); } } @@ -236,9 +254,9 @@ mod test { if href.chars().any(|c| !c.is_ascii_graphic() || c != ' ') { continue; // extended ascii } - let (encoded, len) = pack_uri(&href).expect(&format!("Failed to encode {}", &href)); + let encoded = pack_uri(&href).expect(&format!("Failed to encode {}", &href)); let decoded = - unpack_uri(&encoded, len).expect(&format!("Failed to decode encoded {}", &href)); + unpack_uri(&encoded).expect(&format!("Failed to decode encoded {}", &href)); assert_eq!(href, decoded); } } diff --git a/uri-pack/src/main.rs b/uri-pack/src/main.rs @@ -14,7 +14,7 @@ fn main() { ascii_counter[*ascii as usize] += 1; } let before = domain.as_bytes().len(); - let after = pack_uri(domain).unwrap().0.len(); + let after = pack_uri(domain).unwrap().len(); before_len += before; after_len += after; if before == after { diff --git a/wire-gateway/src/error_codes.rs b/wire-gateway/src/error_codes.rs @@ -23,8 +23,7 @@ /// Error codes used by GNU Taler #[derive(Debug, Copy, Clone, PartialEq, Eq)] -#[allow(non_camel_case_types)] -#[allow(dead_code)] +#[allow(non_camel_case_types, dead_code)] #[repr(u32)] pub enum ErrorCode { /** diff --git a/wire-gateway/src/main.rs b/wire-gateway/src/main.rs @@ -80,16 +80,14 @@ fn encode_info(wtid: &[u8; 32], url: &Url) -> Vec<u8> { let mut buffer = Vec::new(); buffer.extend_from_slice(wtid); let parts = format!("{}{}", url.domain().unwrap_or(""), url.path()); - let (packed, len) = uri_pack::pack_uri(&parts).unwrap(); + let packed = uri_pack::pack_uri(&parts).unwrap(); buffer.push((url.scheme() == "http:") as u8); - buffer.push(len as u8); buffer.extend_from_slice(&packed); return buffer; } fn decode_info(bytes: &[u8]) -> ([u8; 32], Url) { - let len = bytes[33] as usize; - let mut packed = uri_pack::unpack_uri(&bytes[34..], len).unwrap(); + let mut packed = uri_pack::unpack_uri(&bytes[33..]).unwrap(); packed.insert_str(0, "://"); if bytes[32] == 0 { packed.insert(0, 's');