Continuing the discussion from Cheap random number generator with `std`:
I tried that. I'm using criterion
. However, creating a cycling iterator over my data seemed to have even more overhead than picking elements randomly. Yet there seems to be overhead. I don't understand criterion
well enough to see how I can avoid it (and yet get measurements of a single access). This is what I do:
use mmtkvdb::{self as kv, traits::*};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::{rngs::SmallRng, seq::SliceRandom, Rng, SeedableRng};
use tempfile::tempdir;
fn bench(c: &mut Criterion) {
let rng = &mut SmallRng::from_entropy();
{
let mut bgrp = c.benchmark_group("single_access");
let mut used_keys: Vec<[u8; 12]> = (0..10000).map(|_| rng.gen()).collect();
let unused_keys: Vec<[u8; 12]> = (0..10000).map(|_| rng.gen()).collect();
let unused_values: Vec<[u8; 24]> = (0..10000).map(|_| rng.gen()).collect();
let location = tempdir().unwrap();
let db_opts = kv::DbBuilder::new().name("benchmark");
let env_builder = kv::EnvBuilder::new().dir(location.path()).max_dbs(64);
let mut env = unsafe { env_builder.open_rw() }.unwrap();
let db = unsafe { env.create_db(&db_opts) }.unwrap();
let mut txn = env.txn_rw().unwrap();
for key in used_keys.iter() {
let value = rng.gen::<[u8; 24]>();
txn.put(&db, &key[..], &value).unwrap();
}
for _ in 0..1_000_000 - used_keys.len() {
let key = rng.gen::<[u8; 12]>();
let value = rng.gen::<[u8; 24]>();
txn.put(&db, &key, &value).unwrap();
}
txn.commit().unwrap();
used_keys.shuffle(rng);
let txn = env.txn_ro().unwrap();
bgrp.bench_function("read_used", |b| {
b.iter(|| {
black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
})
});
bgrp.bench_function("read_unused", |b| {
b.iter(|| {
black_box(txn.get(&db, &unused_keys.choose(rng).unwrap()[..]).unwrap());
})
});
drop(txn);
let mut txn = env.txn_rw().unwrap();
bgrp.bench_function("nested_txn_write_used_rollback", |b| {
b.iter(|| {
let mut txn = txn.nested().unwrap();
let key = used_keys.choose(rng).unwrap();
let value = unused_values.choose(rng).unwrap();
black_box(txn.put(&db, &key[..], &value[..]).unwrap());
})
});
bgrp.bench_function("nested_txn_write_unused_rollback", |b| {
b.iter(|| {
let mut txn = txn.nested().unwrap();
let key = unused_keys.choose(rng).unwrap();
let value = unused_values.choose(rng).unwrap();
black_box(txn.put(&db, &key[..], &value[..]).unwrap());
})
});
location.close().unwrap();
bgrp.finish();
}
{
let mut bgrp = c.benchmark_group("longrun");
bgrp.sample_size(10);
bgrp.measurement_time(std::time::Duration::from_secs(20));
bgrp.bench_function("create_and_fill_db", |b| {
b.iter(|| {
let location = tempdir().unwrap();
let db_opts = kv::DbBuilder::new().name("benchmark");
let env_builder = kv::EnvBuilder::new().dir(location.path()).max_dbs(64);
let mut env = unsafe { env_builder.open_rw() }.unwrap();
let db = unsafe { env.create_db(&db_opts) }.unwrap();
let mut txn = env.txn_rw().unwrap();
for _ in 0..1_000_000 {
let key = rng.gen::<[u8; 12]>();
let value = rng.gen::<[u8; 24]>();
txn.put(&db, &key, &value).unwrap();
}
txn.commit().unwrap();
location.close().unwrap();
})
});
bgrp.finish();
}
}
I avoid keeping all created keys/values in memory but keep a small amount of them (10,000). Yet if I replace
bgrp.bench_function("read_used", |b| {
b.iter(|| {
- black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
+ black_box(txn.get(&db, &used_keys[0][..]).unwrap());
})
});
bgrp.bench_function("read_unused", |b| {
b.iter(|| {
- black_box(txn.get(&db, &unused_keys.choose(rng).unwrap()[..]).unwrap());
+ black_box(txn.get(&db, &unused_keys[0][..]).unwrap());
})
});
then I get about as twice as fast results (and I don't think it's all because of caching, but not sure).
If I do
+ let mut used_keys_iter = used_keys.iter().cycle();
+ let mut unused_keys_iter = unused_keys.iter().cycle();
bgrp.bench_function("read_used", |b| {
b.iter(|| {
- black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
+ black_box(txn.get(&db, &used_keys_iter.next().unwrap()[..]).unwrap());
})
});
bgrp.bench_function("read_unused", |b| {
b.iter(|| {
- black_box(txn.get(&db, &unused_keys.choose(rng).unwrap()[..]).unwrap());
+ black_box(txn.get(&db, &unused_keys_iter.next().unwrap()[..]).unwrap());
})
});
then I get about 10% slower results. So picking a random entry is even faster than using std::iter::Cycle
. (Interesting, isn't it?)
If I add a for
loop which iterates over all entries in used_keys
and unused_keys
, then the overhead will be minimal. However, then, apart from having a factor of 10,000 in the results, the calculation of the confidence interval will be flawed.
What's the right way to proceed when I want to measure the time of txn.get
with criterion
?