commit 77cd5c7b937e54cb7eb4045798187a4e7943c642
parent 2fb89d3901e0177c1e1130c6306b4a9a22a6f2ec
Author: Antoine A <>
Date: Wed, 25 Dec 2024 05:07:21 +0100
taler-common: optimize base32
Diffstat:
3 files changed, 150 insertions(+), 82 deletions(-)
diff --git a/taler-common/Cargo.toml b/taler-common/Cargo.toml
@@ -22,3 +22,7 @@ criterion = { version = "0.5" }
[[bench]]
name = "subject"
harness = false
+
+[[bench]]
+name = "base32"
+harness = false
diff --git a/taler-common/benches/base32.rs b/taler-common/benches/base32.rs
@@ -0,0 +1,58 @@
+/*
+ This file is part of TALER
+ Copyright (C) 2024 Taler Systems SA
+
+ TALER is free software; you can redistribute it and/or modify it under the
+ terms of the GNU Affero General Public License as published by the Free Software
+ Foundation; either version 3, or (at your option) any later version.
+
+ TALER is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License along with
+ TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/>
+*/
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use taler_common::types::base32::{decode, encode, Base32};
+
+fn parser(c: &mut Criterion) {
+ let mut rng = fastrand::Rng::with_seed(42);
+ let encode_random: Vec<_> = (0..30)
+ .map(|_| {
+ let mut bytes = [0; 64];
+ rng.fill(&mut bytes);
+ bytes
+ })
+ .collect();
+ let decode_valid: Vec<String> = (0..30).map(|_| Base32::<64>::rand().to_string()).collect();
+ let decode_randoms: Vec<String> = (0..30)
+ .map(|_| (0..56).map(|_| rng.char(..)).collect())
+ .collect();
+
+ c.bench_function("encode_random", |b| {
+ b.iter(|| {
+ for case in &encode_random {
+ encode(black_box(case.as_ref()));
+ }
+ })
+ });
+ c.bench_function("decode_valid", |b| {
+ b.iter(|| {
+ for case in &decode_valid {
+ decode::<32>(black_box(case.as_str())).ok();
+ }
+ })
+ });
+ c.bench_function("decode_randoms", |b| {
+ b.iter(|| {
+ for case in &decode_randoms {
+ decode::<32>(black_box(case.as_str())).ok();
+ }
+ })
+ });
+}
+
+criterion_group!(benches, parser);
+criterion_main!(benches);
diff --git a/taler-common/src/types/base32.rs b/taler-common/src/types/base32.rs
@@ -18,14 +18,97 @@ use std::{fmt::Display, ops::Deref, str::FromStr};
use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer};
+pub const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";
+
+/** Encode bytes into a Crockford's base32 string */
+pub fn encode(bytes: &[u8]) -> String {
+ // Pre allocate for all chunks
+ let mut encoded = Vec::with_capacity((bytes.len() / 5 + 1) * 8);
+ // Encode chunks of 5B for 8 chars
+ for chunk in bytes.chunks(5) {
+ let mut buf = [0u8; 5];
+ for (i, &b) in chunk.iter().enumerate() {
+ buf[i] = b;
+ }
+ encoded.extend_from_slice(&[
+ CROCKFORD_ALPHABET[((buf[0] & 0xF8) >> 3) as usize],
+ CROCKFORD_ALPHABET[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize],
+ CROCKFORD_ALPHABET[((buf[1] & 0x3E) >> 1) as usize],
+ CROCKFORD_ALPHABET[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize],
+ CROCKFORD_ALPHABET[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize],
+ CROCKFORD_ALPHABET[((buf[3] & 0x7C) >> 2) as usize],
+ CROCKFORD_ALPHABET[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize],
+ CROCKFORD_ALPHABET[(buf[4] & 0x1F) as usize],
+ ]);
+ }
+ // Truncate incomplete ending chunk
+ encoded.truncate((bytes.len() * 8 + 4) / 5);
+
+ // SAFETY: only contains valid ASCII characters from CROCKFORD_ALPHABET
+ unsafe { String::from_utf8_unchecked(encoded) }
+}
+
#[derive(Debug, thiserror::Error)]
-pub enum Base32Error<const L: usize> {
- #[error("Invalid Crockford’s base32 format")]
+pub enum Base32Error<const N: usize> {
+ #[error("Invalid Crockford's base32 format")]
Format,
- #[error("Invalid length: expected {L} bytess got {0}")]
+ #[error("Invalid length: expected {N} bytess got {0}")]
Length(usize),
}
+#[rustfmt::skip]
+/*
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C,
+ D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W,
+ X, Y, Z, [, \, ], ^, _, `, a, b, c, d, e, f, g, h, i, j, k,
+ l, m, n, o, p, q, r, s, t, u, v, w, x, y, z,
+*/
+const CROCKFORD_INV: [i8; 75] = [
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12,
+ 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28,
+ 29, 30, 31, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19,
+ 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, 29, 30, 31,
+];
+
+/** Decode N bytes from a Crockford's base32 string */
+pub fn decode<const N: usize>(str: &str) -> Result<[u8; N], Base32Error<N>> {
+ let encoded = str.as_bytes();
+
+ // Check decode length
+ let output_length = encoded.len() * 5 / 8;
+ if output_length != N {
+ return Err(Base32Error::Length(output_length));
+ }
+
+ let mut decoded = [0u8; N];
+ let mut bits = 0;
+ let mut buf = 0u16;
+ let mut cursor = 0usize;
+
+ for b in encoded {
+ // Read input
+ match CROCKFORD_INV.get(b.wrapping_sub(b'0') as usize).copied() {
+ Some(-1) | None => return Err(Base32Error::Format),
+ Some(lookup) => {
+ // Add 5 bits
+ buf = (buf << 5) | (lookup as u16);
+ bits += 5;
+
+ // Write byte if full
+ if bits >= 8 {
+ bits -= 8;
+ unsafe {
+ // SAFETY: never produce more than L bytes
+ *decoded.get_unchecked_mut(cursor) = (buf >> bits) as u8;
+ }
+ cursor += 1;
+ }
+ }
+ }
+ }
+ Ok(decoded)
+}
+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Base32<const L: usize>([u8; L]);
@@ -51,94 +134,17 @@ impl<const L: usize> Deref for Base32<L> {
}
}
-#[rustfmt::skip]
-/*
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C,
- D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W,
- X, Y, Z, [, \, ], ^, _, `, a, b, c, d, e, f, g, h, i, j, k,
- l, m, n, o, p, q, r, s, t, u, v, w, x, y, z,
-*/
-const CROCKFORD_INV: [i8; 75] = [
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12,
- 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28,
- 29, 30, 31, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19,
- 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, 29, 30, 31,
-];
-
-pub const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";
-
impl<const L: usize> FromStr for Base32<L> {
type Err = Base32Error<L>;
fn from_str(s: &str) -> Result<Self, Self::Err> {
- let encoded = s.as_bytes();
-
- // Check decode length
- let output_length = encoded.len() * 5 / 8;
- if output_length != L {
- return Err(Base32Error::Length(output_length));
- }
-
- let mut decoded = [0u8; L];
- let mut bits = 0;
- let mut buf = 0u16;
- let mut cursor = 0usize;
-
- for b in encoded {
- // Read input
- match CROCKFORD_INV.get(b.wrapping_sub(b'0') as usize).copied() {
- Some(-1) | None => return Err(Base32Error::Format),
- Some(lookup) => {
- // Add 5 bits
- buf = (buf << 5) | (lookup as u16);
- bits += 5;
-
- // Write byte if full
- if bits >= 8 {
- bits -= 8;
- unsafe {
- // SAFETY we know this algorithm never produce more than L bytes
- *decoded.get_unchecked_mut(cursor) = (buf >> bits) as u8;
- }
- cursor += 1;
- }
- }
- }
- }
- Ok(Self(decoded))
- }
-}
-
-pub fn base32(bytes: &[u8]) -> String {
- let mut encoded = Vec::with_capacity((bytes.len() + 3) / 4 * 5);
- let mut bits = 0;
- let mut buf = 0u32;
-
- for &byte in bytes {
- // Add a byte
- buf = (buf << 8) | byte as u32;
- bits += 8;
-
- // Write 5 bits
- while bits >= 5 {
- bits -= 5;
- let index = (buf >> bits) & 0b11111; // Extract top 5 bits
- encoded.push(CROCKFORD_ALPHABET[index as usize]);
- }
+ Ok(Self(decode(s)?))
}
-
- if bits > 0 {
- // Handle remaining bits (padding)
- let index = (buf << (5 - bits)) & 0b11111; // Shift to fill 5 bits
- encoded.push(CROCKFORD_ALPHABET[index as usize]);
- }
-
- unsafe { String::from_utf8_unchecked(encoded) }
}
impl<const L: usize> Display for Base32<L> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- f.write_str(&base32(&self.0))
+ f.write_str(&encode(&self.0))
}
}