commit f0a82b3a64457d18b29ae497688cfbe9890443e2
parent 0200926d8458b6e8f8f8275e569df11c4c476fb7
Author: Antoine A <>
Date: Thu, 2 Dec 2021 13:06:27 +0100
Use a u5 terminator in uri-pack
Diffstat:
5 files changed, 49 insertions(+), 39 deletions(-)
diff --git a/uri-pack/benches/pack.rs b/uri-pack/benches/pack.rs
@@ -1,23 +1,18 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
-use uri_pack::{pack_uri, unpack_uri};
+use uri_pack::{pack_uri, unpack_uri, PACKED};
fn rand_compat(size: usize) -> String {
String::from_utf8(
- std::iter::repeat_with(|| fastrand::u8(b' '..=b'~'))
+ std::iter::repeat_with(|| fastrand::u8(b'!'..=b'~'))
.take(size)
.collect(),
)
.unwrap()
}
-const COMMON: [u8; 31] = [
- b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
- b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'_', b'%',
-];
-
fn rand_simple(size: usize) -> String {
String::from_utf8(
- std::iter::repeat_with(|| COMMON[fastrand::usize(..COMMON.len())])
+ std::iter::repeat_with(|| PACKED[fastrand::usize(..PACKED.len())])
.take(size)
.collect(),
)
@@ -38,7 +33,7 @@ fn criterion_benchmark(c: &mut Criterion) {
group.bench_with_input(BenchmarkId::new("unpack rand", size), size, |b, &size| {
b.iter_batched(
|| pack_uri(&rand_compat(size)).unwrap(),
- |(packed, len)| unpack_uri(&packed, len),
+ |packed| unpack_uri(&packed),
criterion::BatchSize::SmallInput,
)
});
@@ -52,7 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) {
group.bench_with_input(BenchmarkId::new("unpack simple", size), size, |b, &size| {
b.iter_batched(
|| pack_uri(&rand_simple(size)).unwrap(),
- |(packed, len)| unpack_uri(&packed, len),
+ |packed| unpack_uri(&packed),
criterion::BatchSize::SmallInput,
)
});
diff --git a/uri-pack/src/lib.rs b/uri-pack/src/lib.rs
@@ -2,12 +2,11 @@
/// Panic if char not supported
fn pack_ascii(c: u8) -> u8 {
[
- 94, 67, 68, 69, 70, 30, 71, 72, 73, 74, 75, 76, 77, 28, 26, 27, 57, 58, 59, 60, 61, 62, 63,
- 64, 65, 66, 78, 79, 80, 81, 82, 83, 84, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
- 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 85, 86, 87, 88, 29, 89, 0, 1, 2, 3, 4,
- 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 90, 91, 92,
- 93,
- ][(c - b' ') as usize]
+ 67, 68, 69, 70, 29, 71, 72, 73, 74, 75, 76, 77, 28, 26, 27, 57, 58, 59, 60, 61, 62, 63, 64,
+ 65, 66, 78, 79, 80, 81, 82, 83, 84, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 85, 86, 87, 88, 30, 89, 0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 90, 91, 92, 93,
+ ][(c - b'!') as usize]
}
/// Unpack an URI ascii char
@@ -15,22 +14,29 @@ fn pack_ascii(c: u8) -> u8 {
fn unpack_ascii(c: u8) -> u8 {
[
b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
- b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'_',
- b'%', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
+ b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'%',
+ b'_', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'0', b'1', b'2',
b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'!', b'"', b'#', b'$', b'&', b'\'', b'(', b')',
b'*', b'+', b',', b':', b';', b'<', b'=', b'>', b'?', b'@', b'[', b'\\', b']', b'^', b'`',
- b'{', b'|', b'}', b'~', b' ',
+ b'{', b'|', b'}', b'~',
][c as usize]
}
-/// Check if an ascii char is packable
+/// Check if an ascii char is supported by the encoding
fn supported_ascii(c: &u8) -> bool {
- (b' '..=b'~').contains(c)
+ (b'!'..=b'~').contains(c)
}
/// Extended packing limit
-const EXTENDED: u8 = 31;
+pub const EXTENDED: u8 = 30;
+/// EOF u5 encoding
+pub const TERMINATOR: u8 = 31;
+/// Ascii char that can be packed into 5 bits
+pub const PACKED: [u8; 30] = [
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
+ b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'%',
+];
#[derive(Debug, Clone, Copy, thiserror::Error)]
pub enum EncodeErr {
@@ -51,7 +57,7 @@ pub enum DecodeErr {
}
/// Pack an uri string into an optimized binary format
-pub fn pack_uri(uri: &str) -> Result<(Vec<u8>, usize), EncodeErr> {
+pub fn pack_uri(uri: &str) -> Result<Vec<u8>, EncodeErr> {
let len = uri.as_bytes().len();
let mut vec = Vec::with_capacity(len);
@@ -116,18 +122,19 @@ pub fn pack_uri(uri: &str) -> Result<(Vec<u8>, usize), EncodeErr> {
write_bits(nb - EXTENDED, 6);
}
}
+ write_bits(TERMINATOR, 5);
// Push pending buffer if not empty
if buff_bits > 0 {
vec.push(buff);
}
- return Ok((vec, len));
+ return Ok(vec);
}
/// Unpack an uri string from its optimized binary format
-pub fn unpack_uri(bytes: &[u8], len: usize) -> Result<String, DecodeErr> {
- let mut buf = String::with_capacity(len);
+pub fn unpack_uri(bytes: &[u8]) -> Result<String, DecodeErr> {
+ let mut buf = String::with_capacity(bytes.len());
let mut iter = bytes.iter();
// Holds pending bits beginning from the most significant bits
@@ -175,8 +182,9 @@ pub fn unpack_uri(bytes: &[u8], len: usize) -> Result<String, DecodeErr> {
return Ok(nb);
};
- for _ in 0..len {
+ loop {
let encoded = match read_nb(5)? {
+ TERMINATOR => break,
EXTENDED => read_nb(6)? + EXTENDED,
nb => nb,
};
@@ -192,7 +200,17 @@ mod test {
use serde_json::Value;
- use crate::{pack_ascii, pack_uri, supported_ascii, unpack_ascii, unpack_uri};
+ use crate::{
+ pack_ascii, pack_uri, supported_ascii, unpack_ascii, unpack_uri, EXTENDED, PACKED,
+ };
+
+ #[test]
+ /// Check support every packable ascii character is packed
+ fn packed() {
+ for c in PACKED {
+ assert!(pack_ascii(c) < EXTENDED);
+ }
+ }
#[test]
/// Check support every ascii graphic character and space
@@ -217,8 +235,8 @@ mod test {
csv::Reader::from_reader(include_str!("majestic_million.csv").as_bytes());
for record in majestic.records() {
let domain = &record.unwrap()[2];
- let (encoded, len) = pack_uri(domain).unwrap();
- let decoded = unpack_uri(&encoded, len).unwrap();
+ let encoded = pack_uri(domain).unwrap();
+ let decoded = unpack_uri(&encoded).unwrap();
assert_eq!(domain, decoded);
}
}
@@ -236,9 +254,9 @@ mod test {
if href.chars().any(|c| !c.is_ascii_graphic() || c != ' ') {
continue; // extended ascii
}
- let (encoded, len) = pack_uri(&href).expect(&format!("Failed to encode {}", &href));
+ let encoded = pack_uri(&href).expect(&format!("Failed to encode {}", &href));
let decoded =
- unpack_uri(&encoded, len).expect(&format!("Failed to decode encoded {}", &href));
+ unpack_uri(&encoded).expect(&format!("Failed to decode encoded {}", &href));
assert_eq!(href, decoded);
}
}
diff --git a/uri-pack/src/main.rs b/uri-pack/src/main.rs
@@ -14,7 +14,7 @@ fn main() {
ascii_counter[*ascii as usize] += 1;
}
let before = domain.as_bytes().len();
- let after = pack_uri(domain).unwrap().0.len();
+ let after = pack_uri(domain).unwrap().len();
before_len += before;
after_len += after;
if before == after {
diff --git a/wire-gateway/src/error_codes.rs b/wire-gateway/src/error_codes.rs
@@ -23,8 +23,7 @@
/// Error codes used by GNU Taler
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-#[allow(non_camel_case_types)]
-#[allow(dead_code)]
+#[allow(non_camel_case_types, dead_code)]
#[repr(u32)]
pub enum ErrorCode {
/**
diff --git a/wire-gateway/src/main.rs b/wire-gateway/src/main.rs
@@ -80,16 +80,14 @@ fn encode_info(wtid: &[u8; 32], url: &Url) -> Vec<u8> {
let mut buffer = Vec::new();
buffer.extend_from_slice(wtid);
let parts = format!("{}{}", url.domain().unwrap_or(""), url.path());
- let (packed, len) = uri_pack::pack_uri(&parts).unwrap();
+ let packed = uri_pack::pack_uri(&parts).unwrap();
buffer.push((url.scheme() == "http:") as u8);
- buffer.push(len as u8);
buffer.extend_from_slice(&packed);
return buffer;
}
fn decode_info(bytes: &[u8]) -> ([u8; 32], Url) {
- let len = bytes[33] as usize;
- let mut packed = uri_pack::unpack_uri(&bytes[34..], len).unwrap();
+ let mut packed = uri_pack::unpack_uri(&bytes[33..]).unwrap();
packed.insert_str(0, "://");
if bytes[32] == 0 {
packed.insert(0, 's');