summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntoine A <>2021-12-01 22:25:37 +0100
committerAntoine A <>2021-12-01 22:25:37 +0100
commit0200926d8458b6e8f8f8275e569df11c4c476fb7 (patch)
tree91e08801e1d288ea57e8d22ab895856b7a6c8058
parentd7e604f226a74e6a0b57382cedf687d651c3ca5e (diff)
downloaddepolymerization-0200926d8458b6e8f8f8275e569df11c4c476fb7.tar.gz
depolymerization-0200926d8458b6e8f8f8275e569df11c4c476fb7.tar.bz2
depolymerization-0200926d8458b6e8f8f8275e569df11c4c476fb7.zip
Benchmark the happy path and improve common domain analysis
-rw-r--r--uri-pack/benches/pack.rs32
-rw-r--r--uri-pack/src/main.rs41
2 files changed, 60 insertions, 13 deletions
diff --git a/uri-pack/benches/pack.rs b/uri-pack/benches/pack.rs
index e90b818..b7fb975 100644
--- a/uri-pack/benches/pack.rs
+++ b/uri-pack/benches/pack.rs
@@ -10,24 +10,52 @@ fn rand_compat(size: usize) -> String {
.unwrap()
}
+const COMMON: [u8; 31] = [
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
+ b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'.', b'/', b'-', b'_', b'%',
+];
+
+fn rand_simple(size: usize) -> String {
+ String::from_utf8(
+ std::iter::repeat_with(|| COMMON[fastrand::usize(..COMMON.len())])
+ .take(size)
+ .collect(),
+ )
+ .unwrap()
+}
+
fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("Uri");
for size in [50, 500, 4048].iter() {
group.throughput(Throughput::Bytes(*size as u64));
- group.bench_with_input(BenchmarkId::new("pack", size), size, |b, &size| {
+ group.bench_with_input(BenchmarkId::new("pack rand", size), size, |b, &size| {
b.iter_batched(
|| rand_compat(size),
|uri| pack_uri(&uri).unwrap(),
criterion::BatchSize::SmallInput,
)
});
- group.bench_with_input(BenchmarkId::new("unpack", size), size, |b, &size| {
+ group.bench_with_input(BenchmarkId::new("unpack rand", size), size, |b, &size| {
b.iter_batched(
|| pack_uri(&rand_compat(size)).unwrap(),
|(packed, len)| unpack_uri(&packed, len),
criterion::BatchSize::SmallInput,
)
});
+ group.bench_with_input(BenchmarkId::new("pack simple", size), size, |b, &size| {
+ b.iter_batched(
+ || rand_simple(size),
+ |uri| pack_uri(&uri).unwrap(),
+ criterion::BatchSize::SmallInput,
+ )
+ });
+ group.bench_with_input(BenchmarkId::new("unpack simple", size), size, |b, &size| {
+ b.iter_batched(
+ || pack_uri(&rand_simple(size)).unwrap(),
+ |(packed, len)| unpack_uri(&packed, len),
+ criterion::BatchSize::SmallInput,
+ )
+ });
}
group.finish();
}
diff --git a/uri-pack/src/main.rs b/uri-pack/src/main.rs
index 85ab3f3..e660fe4 100644
--- a/uri-pack/src/main.rs
+++ b/uri-pack/src/main.rs
@@ -3,33 +3,52 @@ use uri_pack::pack_uri;
fn main() {
let mut majestic = csv::Reader::from_reader(include_str!("majestic_million.csv").as_bytes());
let mut ascii_counter = [0u64; 255];
- let mut count = 0;
- let mut before = 0;
- let mut after = 0;
+ let mut before_len = 0;
+ let mut after_len = 0;
+ let mut bigger = 0;
+ let mut same = 0;
+ let mut smaller = 0;
for record in majestic.records() {
let domain = &record.unwrap()[2];
for ascii in domain.as_bytes() {
ascii_counter[*ascii as usize] += 1;
}
- count += 1;
- before += domain.as_bytes().len();
- after += pack_uri(domain).unwrap().0.len();
+ let before = domain.as_bytes().len();
+ let after = pack_uri(domain).unwrap().0.len();
+ before_len += before;
+ after_len += after;
+ if before == after {
+ same += 1;
+ } else if before > after {
+ smaller += 1;
+ } else {
+ bigger += 1;
+ }
}
let sum: u64 = ascii_counter.iter().sum();
+ let max_len = ascii_counter.iter().max().unwrap_or(&0).to_string().len();
for (ascii, count) in ascii_counter
.into_iter()
.enumerate()
.filter(|(_, count)| *count > 0)
{
println!(
- "{} {:.2}% {:>4$} {:=<5$}",
+ "{} {:>4$} {:.2}% {:=<5$}",
ascii as u8 as char,
- count as f32 / sum as f32 * 100.,
count,
+ count as f32 / sum as f32 * 100.,
"",
- sum.to_string().len(),
- (count * 100 / sum) as usize,
+ max_len,
+ (count * 200 / sum) as usize,
)
}
- println!("\nBefore: {} After: {}", before / count, after / count);
+ let count = bigger + smaller + same;
+ println!(
+ "\nBefore ~{}B After ~{}B\nBigger {:.2}% Same {:.2}% Smaller {:.2}%",
+ before_len / count,
+ after_len / count,
+ (bigger as f32 / count as f32 * 100.),
+ (same as f32 / count as f32 * 100.),
+ (smaller as f32 / count as f32 * 100.)
+ );
}