taler-rust

GNU Taler code in Rust. Largely core banking integrations.
Log | Files | Refs | Submodules | README | LICENSE

commit 77cd5c7b937e54cb7eb4045798187a4e7943c642
parent 2fb89d3901e0177c1e1130c6306b4a9a22a6f2ec
Author: Antoine A <>
Date:   Wed, 25 Dec 2024 05:07:21 +0100

taler-common: optimize base32

Diffstat:
Mtaler-common/Cargo.toml | 4++++
Ataler-common/benches/base32.rs | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtaler-common/src/types/base32.rs | 170+++++++++++++++++++++++++++++++++++++++++--------------------------------------
3 files changed, 150 insertions(+), 82 deletions(-)

diff --git a/taler-common/Cargo.toml b/taler-common/Cargo.toml @@ -22,3 +22,7 @@ criterion = { version = "0.5" } [[bench]] name = "subject" harness = false + +[[bench]] +name = "base32" +harness = false diff --git a/taler-common/benches/base32.rs b/taler-common/benches/base32.rs @@ -0,0 +1,58 @@ +/* + This file is part of TALER + Copyright (C) 2024 Taler Systems SA + + TALER is free software; you can redistribute it and/or modify it under the + terms of the GNU Affero General Public License as published by the Free Software + Foundation; either version 3, or (at your option) any later version. + + TALER is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with + TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/> +*/ + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use taler_common::types::base32::{decode, encode, Base32}; + +fn parser(c: &mut Criterion) { + let mut rng = fastrand::Rng::with_seed(42); + let encode_random: Vec<_> = (0..30) + .map(|_| { + let mut bytes = [0; 64]; + rng.fill(&mut bytes); + bytes + }) + .collect(); + let decode_valid: Vec<String> = (0..30).map(|_| Base32::<64>::rand().to_string()).collect(); + let decode_randoms: Vec<String> = (0..30) + .map(|_| (0..56).map(|_| rng.char(..)).collect()) + .collect(); + + c.bench_function("encode_random", |b| { + b.iter(|| { + for case in &encode_random { + encode(black_box(case.as_ref())); + } + }) + }); + c.bench_function("decode_valid", |b| { + b.iter(|| { + for case in &decode_valid { + decode::<32>(black_box(case.as_str())).ok(); + } + }) + }); + c.bench_function("decode_randoms", |b| { + b.iter(|| { + for case in &decode_randoms { + decode::<32>(black_box(case.as_str())).ok(); + } + }) + }); +} + +criterion_group!(benches, parser); +criterion_main!(benches); diff --git a/taler-common/src/types/base32.rs b/taler-common/src/types/base32.rs @@ -18,14 +18,97 @@ use std::{fmt::Display, ops::Deref, str::FromStr}; use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; +pub const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ"; + +/** Encode bytes into a Crockford's base32 string */ +pub fn encode(bytes: &[u8]) -> String { + // Pre allocate for all chunks + let mut encoded = Vec::with_capacity((bytes.len() / 5 + 1) * 8); + // Encode chunks of 5B for 8 chars + for chunk in bytes.chunks(5) { + let mut buf = [0u8; 5]; + for (i, &b) in chunk.iter().enumerate() { + buf[i] = b; + } + encoded.extend_from_slice(&[ + CROCKFORD_ALPHABET[((buf[0] & 0xF8) >> 3) as usize], + CROCKFORD_ALPHABET[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize], + CROCKFORD_ALPHABET[((buf[1] & 0x3E) >> 1) as usize], + CROCKFORD_ALPHABET[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize], + CROCKFORD_ALPHABET[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize], + CROCKFORD_ALPHABET[((buf[3] & 0x7C) >> 2) as usize], + CROCKFORD_ALPHABET[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize], + CROCKFORD_ALPHABET[(buf[4] & 0x1F) as usize], + ]); + } + // Truncate incomplete ending chunk + encoded.truncate((bytes.len() * 8 + 4) / 5); + + // SAFETY: only contains valid ASCII characters from CROCKFORD_ALPHABET + unsafe { String::from_utf8_unchecked(encoded) } +} + #[derive(Debug, thiserror::Error)] -pub enum Base32Error<const L: usize> { - #[error("Invalid Crockford’s base32 format")] +pub enum Base32Error<const N: usize> { + #[error("Invalid Crockford's base32 format")] Format, - #[error("Invalid length: expected {L} bytess got {0}")] + #[error("Invalid length: expected {N} bytess got {0}")] Length(usize), } +#[rustfmt::skip] +/* + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C, + D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, + X, Y, Z, [, \, ], ^, _, `, a, b, c, d, e, f, g, h, i, j, k, + l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, +*/ +const CROCKFORD_INV: [i8; 75] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, + 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, + 29, 30, 31, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19, + 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, 29, 30, 31, +]; + +/** Decode N bytes from a Crockford's base32 string */ +pub fn decode<const N: usize>(str: &str) -> Result<[u8; N], Base32Error<N>> { + let encoded = str.as_bytes(); + + // Check decode length + let output_length = encoded.len() * 5 / 8; + if output_length != N { + return Err(Base32Error::Length(output_length)); + } + + let mut decoded = [0u8; N]; + let mut bits = 0; + let mut buf = 0u16; + let mut cursor = 0usize; + + for b in encoded { + // Read input + match CROCKFORD_INV.get(b.wrapping_sub(b'0') as usize).copied() { + Some(-1) | None => return Err(Base32Error::Format), + Some(lookup) => { + // Add 5 bits + buf = (buf << 5) | (lookup as u16); + bits += 5; + + // Write byte if full + if bits >= 8 { + bits -= 8; + unsafe { + // SAFETY: never produce more than L bytes + *decoded.get_unchecked_mut(cursor) = (buf >> bits) as u8; + } + cursor += 1; + } + } + } + } + Ok(decoded) +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Base32<const L: usize>([u8; L]); @@ -51,94 +134,17 @@ impl<const L: usize> Deref for Base32<L> { } } -#[rustfmt::skip] -/* - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C, - D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, - X, Y, Z, [, \, ], ^, _, `, a, b, c, d, e, f, g, h, i, j, k, - l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, -*/ -const CROCKFORD_INV: [i8; 75] = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, - 13, 14, 15, 16, 17, 1, 18, 19, 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, - 29, 30, 31, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 1, 18, 19, - 1, 20, 21, 0, 22, 23, 24, 25, 26, -1, 27, 28, 29, 30, 31, -]; - -pub const CROCKFORD_ALPHABET: &[u8] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ"; - impl<const L: usize> FromStr for Base32<L> { type Err = Base32Error<L>; fn from_str(s: &str) -> Result<Self, Self::Err> { - let encoded = s.as_bytes(); - - // Check decode length - let output_length = encoded.len() * 5 / 8; - if output_length != L { - return Err(Base32Error::Length(output_length)); - } - - let mut decoded = [0u8; L]; - let mut bits = 0; - let mut buf = 0u16; - let mut cursor = 0usize; - - for b in encoded { - // Read input - match CROCKFORD_INV.get(b.wrapping_sub(b'0') as usize).copied() { - Some(-1) | None => return Err(Base32Error::Format), - Some(lookup) => { - // Add 5 bits - buf = (buf << 5) | (lookup as u16); - bits += 5; - - // Write byte if full - if bits >= 8 { - bits -= 8; - unsafe { - // SAFETY we know this algorithm never produce more than L bytes - *decoded.get_unchecked_mut(cursor) = (buf >> bits) as u8; - } - cursor += 1; - } - } - } - } - Ok(Self(decoded)) - } -} - -pub fn base32(bytes: &[u8]) -> String { - let mut encoded = Vec::with_capacity((bytes.len() + 3) / 4 * 5); - let mut bits = 0; - let mut buf = 0u32; - - for &byte in bytes { - // Add a byte - buf = (buf << 8) | byte as u32; - bits += 8; - - // Write 5 bits - while bits >= 5 { - bits -= 5; - let index = (buf >> bits) & 0b11111; // Extract top 5 bits - encoded.push(CROCKFORD_ALPHABET[index as usize]); - } + Ok(Self(decode(s)?)) } - - if bits > 0 { - // Handle remaining bits (padding) - let index = (buf << (5 - bits)) & 0b11111; // Shift to fill 5 bits - encoded.push(CROCKFORD_ALPHABET[index as usize]); - } - - unsafe { String::from_utf8_unchecked(encoded) } } impl<const L: usize> Display for Base32<L> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&base32(&self.0)) + f.write_str(&encode(&self.0)) } }