taler-rust

GNU Taler code in Rust. Largely core banking integrations.
Log | Files | Refs | Submodules | README | LICENSE

commit b94ab22e7b48693d804a00c11166ab19a6860dd9
parent 746650e94729c23fe3765a3f561525cfb35d59c4
Author: Antoine A <>
Date:   Tue, 24 Dec 2024 12:44:23 +0100

taler-common: optimize base32 and subject parsing

Diffstat:
MCargo.lock | 7-------
Mtaler-common/Cargo.toml | 1-
Mtaler-common/src/api_common.rs | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Mtaler-common/src/lib.rs | 2+-
Mtaler-common/src/subject.rs | 31++++++++++++++++---------------
5 files changed, 91 insertions(+), 30 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -220,12 +220,6 @@ dependencies = [ ] [[package]] -name = "base32" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022dfe9eb35f19ebbcb51e0b40a5ab759f46ad60cadf7297e0bd085afb50e076" - -[[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2415,7 +2409,6 @@ dependencies = [ name = "taler-common" version = "0.1.0" dependencies = [ - "base32", "criterion", "ed25519-dalek", "fastrand", diff --git a/taler-common/Cargo.toml b/taler-common/Cargo.toml @@ -4,7 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -base32 = "0.5.1" serde_with = "3.11.0" rand = "0.8" fastrand = "2.2.0" diff --git a/taler-common/src/api_common.rs b/taler-common/src/api_common.rs @@ -136,20 +136,88 @@ impl<const L: usize> Deref for Base32<L> { } } +#[rustfmt::skip] +/* + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C, + D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, + X, Y, Z, [, \, ], ^, _, `, a, b, c, d, e, f, g, h, i, j, k, + l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, +*/ +const CROCKFORD_INV: [i8; 75] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, + 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, + 29, 30, 31, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19, + 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, 29, 30, 31, +]; + impl<const L: usize> FromStr for Base32<L> { type Err = Base32Error<L>; fn from_str(s: &str) -> Result<Self, Self::Err> { - let bytes = base32::decode(base32::Alphabet::Crockford, s).ok_or(Base32Error::Format)?; - let exact: [u8; L] = bytes - .try_into() - .map_err(|vec: Vec<u8>| Base32Error::Length(vec.len()))?; - Ok(Self(exact)) + let encoded = s.as_bytes(); + + // Check decode length + let output_length = encoded.len() * 5 / 8; + if output_length != L { + return Err(Base32Error::Length(output_length)); + } + + let mut decoded = [0u8; L]; + let mut bits = 0; + let mut buf = 0u16; + let mut cursor = 0usize; + + for b in encoded { + // Read input + match CROCKFORD_INV.get(b.wrapping_sub(b'0') as usize).copied() { + Some(-1) | None => return Err(Base32Error::Format), + Some(lookup) => { + // Add 5 bits + buf = (buf << 5) | (lookup as u16); + bits += 5; + + // Write byte if full + if bits >= 8 { + bits -= 8; + unsafe { + // SAFETY we know this algorithm never produce more than L bytes + *decoded.get_unchecked_mut(cursor) = (buf >> bits) as u8; + } + cursor += 1; + } + } + } + } + Ok(Self(decoded)) } } pub fn base32(bytes: &[u8]) -> String { - base32::encode(base32::Alphabet::Crockford, bytes) + const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ"; + let mut encoded = Vec::with_capacity((bytes.len() + 3) / 4 * 5); + let mut bits = 0; + let mut buf = 0u32; + + for &byte in bytes { + // Add a byte + buf = (buf << 8) | byte as u32; + bits += 8; + + // Write 5 bits + while bits >= 5 { + bits -= 5; + let index = (buf >> bits) & 0b11111; // Extract top 5 bits + encoded.push(CROCKFORD_ALPHABET[index as usize]); + } + } + + if bits > 0 { + // Handle remaining bits (padding) + let index = (buf << (5 - bits)) & 0b11111; // Shift to fill 5 bits + encoded.push(CROCKFORD_ALPHABET[index as usize]); + } + + unsafe { String::from_utf8_unchecked(encoded) } } impl<const L: usize> Display for Base32<L> { diff --git a/taler-common/src/lib.rs b/taler-common/src/lib.rs @@ -18,8 +18,8 @@ pub mod api_common; pub mod api_params; pub mod api_wire; pub mod error_code; -pub mod types; pub mod subject; +pub mod types; pub mod config { // TODO } diff --git a/taler-common/src/subject.rs b/taler-common/src/subject.rs @@ -39,7 +39,6 @@ pub enum IncomingSubjectResult { pub struct SubjectParse { part_regex: Regex, key_regex: Regex, - whole_regex: Regex, } impl SubjectParse { @@ -47,7 +46,6 @@ impl SubjectParse { Self { part_regex: Regex::new("[:a-z0-9A-Z]*").expect("part_pattern regex"), key_regex: Regex::new("(KYC:)?([a-z0-9A-Z]{52})").expect("key_regex regex"), - whole_regex: Regex::new("^(KYC:)?([a-z0-9A-Z]{52})$").expect("whole_pattern regex"), } } @@ -58,7 +56,7 @@ impl SubjectParse { * one from parts. **/ pub fn parse_incoming_unstructured(&self, subject: &str) -> Option<IncomingSubjectResult> { - /** Parse an incoming subject */ + /** Parse an incoming subject */ fn parse_single(str: &str) -> Option<IncomingSubject> { // Check key type let (kind, key) = if let Some(key) = str.strip_prefix("KYC:") { @@ -77,22 +75,25 @@ impl SubjectParse { } /** Parse a unique incoming subject */ - fn parse_unique<A: AsRef<str> + Debug>( - candidates: impl Iterator<Item = A>, + fn parse_unique( + mut candidates: impl Iterator<Item = IncomingSubject>, ) -> Option<IncomingSubjectResult> { - let mut keys = candidates.filter_map(|s| parse_single(s.as_ref())); // Check none - let first = keys.next()?; + let first = candidates.next()?; // Check many - if keys.next().is_some() { + if candidates.next().is_some() { return Some(IncomingSubjectResult::Ambiguous); } Some(IncomingSubjectResult::Success(first)) } // Check whole key - if let Some(res) = parse_unique(self.key_regex.find_iter(subject).map(|it| it.as_str())) { + if let Some(res) = parse_unique( + self.key_regex + .find_iter(subject) + .filter_map(|it| parse_single(it.as_str())), + ) { return Some(res); } @@ -103,6 +104,7 @@ impl SubjectParse { .map(|it| it.as_str()) .collect(); // We start by assembling big chunks first as we need to detect keys with prefix before other ones + let mut concatenated = String::with_capacity(56); for window_size in (2..=parts.len()).rev() { if let Some(res) = parse_unique(parts.windows(window_size).filter_map(|parts| { // Fast skipping path @@ -112,13 +114,12 @@ impl SubjectParse { return None; } - // TODO lending iterator to reduce allocations - let concatenated: String = parts.iter().copied().collect(); - if self.whole_regex.is_match(&concatenated) { - Some(concatenated) - } else { - None + // Reuse a single allocation + concatenated.clear(); + for part in parts { + concatenated.push_str(part); } + parse_single(&concatenated) })) { return Some(res); }