From 0200926d8458b6e8f8f8275e569df11c4c476fb7 Mon Sep 17 00:00:00 2001 From: Antoine A <> Date: Wed, 1 Dec 2021 22:25:37 +0100 Subject: Benchmark the happy path and improve common domain analysis --- uri-pack/benches/pack.rs | 32 ++++++++++++++++++++++++++++++-- uri-pack/src/main.rs | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/uri-pack/benches/pack.rs b/uri-pack/benches/pack.rs index e90b818..b7fb975 100644 --- a/uri-pack/benches/pack.rs +++ b/uri-pack/benches/pack.rs @@ -10,24 +10,52 @@ fn rand_compat(size: usize) -> String { .unwrap() } +const COMMON: [u8; 31] = [ + b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', + b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'_', b'%', +]; + +fn rand_simple(size: usize) -> String { + String::from_utf8( + std::iter::repeat_with(|| COMMON[fastrand::usize(..COMMON.len())]) + .take(size) + .collect(), + ) + .unwrap() +} + fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("Uri"); for size in [50, 500, 4048].iter() { group.throughput(Throughput::Bytes(*size as u64)); - group.bench_with_input(BenchmarkId::new("pack", size), size, |b, &size| { + group.bench_with_input(BenchmarkId::new("pack rand", size), size, |b, &size| { b.iter_batched( || rand_compat(size), |uri| pack_uri(&uri).unwrap(), criterion::BatchSize::SmallInput, ) }); - group.bench_with_input(BenchmarkId::new("unpack", size), size, |b, &size| { + group.bench_with_input(BenchmarkId::new("unpack rand", size), size, |b, &size| { b.iter_batched( || pack_uri(&rand_compat(size)).unwrap(), |(packed, len)| unpack_uri(&packed, len), criterion::BatchSize::SmallInput, ) }); + group.bench_with_input(BenchmarkId::new("pack simple", size), size, |b, &size| { + b.iter_batched( + || rand_simple(size), + |uri| pack_uri(&uri).unwrap(), + criterion::BatchSize::SmallInput, + ) + }); + group.bench_with_input(BenchmarkId::new("unpack simple", size), size, |b, &size| { + b.iter_batched( + || pack_uri(&rand_simple(size)).unwrap(), + |(packed, len)| unpack_uri(&packed, len), + criterion::BatchSize::SmallInput, + ) + }); } group.finish(); } diff --git a/uri-pack/src/main.rs b/uri-pack/src/main.rs index 85ab3f3..e660fe4 100644 --- a/uri-pack/src/main.rs +++ b/uri-pack/src/main.rs @@ -3,33 +3,52 @@ use uri_pack::pack_uri; fn main() { let mut majestic = csv::Reader::from_reader(include_str!("majestic_million.csv").as_bytes()); let mut ascii_counter = [0u64; 255]; - let mut count = 0; - let mut before = 0; - let mut after = 0; + let mut before_len = 0; + let mut after_len = 0; + let mut bigger = 0; + let mut same = 0; + let mut smaller = 0; for record in majestic.records() { let domain = &record.unwrap()[2]; for ascii in domain.as_bytes() { ascii_counter[*ascii as usize] += 1; } - count += 1; - before += domain.as_bytes().len(); - after += pack_uri(domain).unwrap().0.len(); + let before = domain.as_bytes().len(); + let after = pack_uri(domain).unwrap().0.len(); + before_len += before; + after_len += after; + if before == after { + same += 1; + } else if before > after { + smaller += 1; + } else { + bigger += 1; + } } let sum: u64 = ascii_counter.iter().sum(); + let max_len = ascii_counter.iter().max().unwrap_or(&0).to_string().len(); for (ascii, count) in ascii_counter .into_iter() .enumerate() .filter(|(_, count)| *count > 0) { println!( - "{} {:.2}% {:>4$} {:=<5$}", + "{} {:>4$} {:.2}% {:=<5$}", ascii as u8 as char, - count as f32 / sum as f32 * 100., count, + count as f32 / sum as f32 * 100., "", - sum.to_string().len(), - (count * 100 / sum) as usize, + max_len, + (count * 200 / sum) as usize, ) } - println!("\nBefore: {} After: {}", before / count, after / count); + let count = bigger + smaller + same; + println!( + "\nBefore ~{}B After ~{}B\nBigger {:.2}% Same {:.2}% Smaller {:.2}%", + before_len / count, + after_len / count, + (bigger as f32 / count as f32 * 100.), + (same as f32 / count as f32 * 100.), + (smaller as f32 / count as f32 * 100.) + ); } -- cgit v1.2.3