commit b94ab22e7b48693d804a00c11166ab19a6860dd9
parent 746650e94729c23fe3765a3f561525cfb35d59c4
Author: Antoine A <>
Date: Tue, 24 Dec 2024 12:44:23 +0100
taler-common: optimize base32 and subject parsing
Diffstat:
5 files changed, 91 insertions(+), 30 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -220,12 +220,6 @@ dependencies = [
]
[[package]]
-name = "base32"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "022dfe9eb35f19ebbcb51e0b40a5ab759f46ad60cadf7297e0bd085afb50e076"
-
-[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2415,7 +2409,6 @@ dependencies = [
name = "taler-common"
version = "0.1.0"
dependencies = [
- "base32",
"criterion",
"ed25519-dalek",
"fastrand",
diff --git a/taler-common/Cargo.toml b/taler-common/Cargo.toml
@@ -4,7 +4,6 @@ version = "0.1.0"
edition = "2021"
[dependencies]
-base32 = "0.5.1"
serde_with = "3.11.0"
rand = "0.8"
fastrand = "2.2.0"
diff --git a/taler-common/src/api_common.rs b/taler-common/src/api_common.rs
@@ -136,20 +136,88 @@ impl<const L: usize> Deref for Base32<L> {
}
}
+#[rustfmt::skip]
+/*
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C,
+ D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W,
+ X, Y, Z, [, \, ], ^, _, `, a, b, c, d, e, f, g, h, i, j, k,
+ l, m, n, o, p, q, r, s, t, u, v, w, x, y, z,
+*/
+const CROCKFORD_INV: [i8; 75] = [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12,
+ 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28,
+ 29, 30, 31, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19,
+ 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, 29, 30, 31,
+];
+
impl<const L: usize> FromStr for Base32<L> {
type Err = Base32Error<L>;
fn from_str(s: &str) -> Result<Self, Self::Err> {
- let bytes = base32::decode(base32::Alphabet::Crockford, s).ok_or(Base32Error::Format)?;
- let exact: [u8; L] = bytes
- .try_into()
- .map_err(|vec: Vec<u8>| Base32Error::Length(vec.len()))?;
- Ok(Self(exact))
+ let encoded = s.as_bytes();
+
+ // Check decode length
+ let output_length = encoded.len() * 5 / 8;
+ if output_length != L {
+ return Err(Base32Error::Length(output_length));
+ }
+
+ let mut decoded = [0u8; L];
+ let mut bits = 0;
+ let mut buf = 0u16;
+ let mut cursor = 0usize;
+
+ for b in encoded {
+ // Read input
+ match CROCKFORD_INV.get(b.wrapping_sub(b'0') as usize).copied() {
+ Some(-1) | None => return Err(Base32Error::Format),
+ Some(lookup) => {
+ // Add 5 bits
+ buf = (buf << 5) | (lookup as u16);
+ bits += 5;
+
+ // Write byte if full
+ if bits >= 8 {
+ bits -= 8;
+ unsafe {
+ // SAFETY we know this algorithm never produce more than L bytes
+ *decoded.get_unchecked_mut(cursor) = (buf >> bits) as u8;
+ }
+ cursor += 1;
+ }
+ }
+ }
+ }
+ Ok(Self(decoded))
}
}
pub fn base32(bytes: &[u8]) -> String {
- base32::encode(base32::Alphabet::Crockford, bytes)
+ const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";
+ let mut encoded = Vec::with_capacity((bytes.len() + 3) / 4 * 5);
+ let mut bits = 0;
+ let mut buf = 0u32;
+
+ for &byte in bytes {
+ // Add a byte
+ buf = (buf << 8) | byte as u32;
+ bits += 8;
+
+ // Write 5 bits
+ while bits >= 5 {
+ bits -= 5;
+ let index = (buf >> bits) & 0b11111; // Extract top 5 bits
+ encoded.push(CROCKFORD_ALPHABET[index as usize]);
+ }
+ }
+
+ if bits > 0 {
+ // Handle remaining bits (padding)
+ let index = (buf << (5 - bits)) & 0b11111; // Shift to fill 5 bits
+ encoded.push(CROCKFORD_ALPHABET[index as usize]);
+ }
+
+ unsafe { String::from_utf8_unchecked(encoded) }
}
impl<const L: usize> Display for Base32<L> {
diff --git a/taler-common/src/lib.rs b/taler-common/src/lib.rs
@@ -18,8 +18,8 @@ pub mod api_common;
pub mod api_params;
pub mod api_wire;
pub mod error_code;
-pub mod types;
pub mod subject;
+pub mod types;
pub mod config {
// TODO
}
diff --git a/taler-common/src/subject.rs b/taler-common/src/subject.rs
@@ -39,7 +39,6 @@ pub enum IncomingSubjectResult {
pub struct SubjectParse {
part_regex: Regex,
key_regex: Regex,
- whole_regex: Regex,
}
impl SubjectParse {
@@ -47,7 +46,6 @@ impl SubjectParse {
Self {
part_regex: Regex::new("[:a-z0-9A-Z]*").expect("part_pattern regex"),
key_regex: Regex::new("(KYC:)?([a-z0-9A-Z]{52})").expect("key_regex regex"),
- whole_regex: Regex::new("^(KYC:)?([a-z0-9A-Z]{52})$").expect("whole_pattern regex"),
}
}
@@ -58,7 +56,7 @@ impl SubjectParse {
* one from parts.
**/
pub fn parse_incoming_unstructured(&self, subject: &str) -> Option<IncomingSubjectResult> {
- /** Parse an incoming subject */
+ /** Parse an incoming subject */
fn parse_single(str: &str) -> Option<IncomingSubject> {
// Check key type
let (kind, key) = if let Some(key) = str.strip_prefix("KYC:") {
@@ -77,22 +75,25 @@ impl SubjectParse {
}
/** Parse a unique incoming subject */
- fn parse_unique<A: AsRef<str> + Debug>(
- candidates: impl Iterator<Item = A>,
+ fn parse_unique(
+ mut candidates: impl Iterator<Item = IncomingSubject>,
) -> Option<IncomingSubjectResult> {
- let mut keys = candidates.filter_map(|s| parse_single(s.as_ref()));
// Check none
- let first = keys.next()?;
+ let first = candidates.next()?;
// Check many
- if keys.next().is_some() {
+ if candidates.next().is_some() {
return Some(IncomingSubjectResult::Ambiguous);
}
Some(IncomingSubjectResult::Success(first))
}
// Check whole key
- if let Some(res) = parse_unique(self.key_regex.find_iter(subject).map(|it| it.as_str())) {
+ if let Some(res) = parse_unique(
+ self.key_regex
+ .find_iter(subject)
+ .filter_map(|it| parse_single(it.as_str())),
+ ) {
return Some(res);
}
@@ -103,6 +104,7 @@ impl SubjectParse {
.map(|it| it.as_str())
.collect();
// We start by assembling big chunks first as we need to detect keys with prefix before other ones
+ let mut concatenated = String::with_capacity(56);
for window_size in (2..=parts.len()).rev() {
if let Some(res) = parse_unique(parts.windows(window_size).filter_map(|parts| {
// Fast skipping path
@@ -112,13 +114,12 @@ impl SubjectParse {
return None;
}
- // TODO lending iterator to reduce allocations
- let concatenated: String = parts.iter().copied().collect();
- if self.whole_regex.is_match(&concatenated) {
- Some(concatenated)
- } else {
- None
+ // Reuse a single allocation
+ concatenated.clear();
+ for part in parts {
+ concatenated.push_str(part);
}
+ parse_single(&concatenated)
})) {
return Some(res);
}