Benchmarking with criterion using random data

Continuing the discussion from Cheap random number generator with `std`:

I tried that. I'm using criterion. However, creating a cycling iterator over my data seemed to have even more overhead than picking elements randomly. Yet there seems to be overhead. I don't understand criterion well enough to see how I can avoid it (and yet get measurements of a single access). This is what I do:

use mmtkvdb::{self as kv, traits::*};

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::{rngs::SmallRng, seq::SliceRandom, Rng, SeedableRng};
use tempfile::tempdir;

fn bench(c: &mut Criterion) {
    let rng = &mut SmallRng::from_entropy();
    {
        let mut bgrp = c.benchmark_group("single_access");
        let mut used_keys: Vec<[u8; 12]> = (0..10000).map(|_| rng.gen()).collect();
        let unused_keys: Vec<[u8; 12]> = (0..10000).map(|_| rng.gen()).collect();
        let unused_values: Vec<[u8; 24]> = (0..10000).map(|_| rng.gen()).collect();
        let location = tempdir().unwrap();
        let db_opts = kv::DbBuilder::new().name("benchmark");
        let env_builder = kv::EnvBuilder::new().dir(location.path()).max_dbs(64);
        let mut env = unsafe { env_builder.open_rw() }.unwrap();
        let db = unsafe { env.create_db(&db_opts) }.unwrap();
        let mut txn = env.txn_rw().unwrap();
        for key in used_keys.iter() {
            let value = rng.gen::<[u8; 24]>();
            txn.put(&db, &key[..], &value).unwrap();
        }
        for _ in 0..1_000_000 - used_keys.len() {
            let key = rng.gen::<[u8; 12]>();
            let value = rng.gen::<[u8; 24]>();
            txn.put(&db, &key, &value).unwrap();
        }
        txn.commit().unwrap();
        used_keys.shuffle(rng);
        let txn = env.txn_ro().unwrap();
        bgrp.bench_function("read_used", |b| {
            b.iter(|| {
                black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
            })
        });
        bgrp.bench_function("read_unused", |b| {
            b.iter(|| {
                black_box(txn.get(&db, &unused_keys.choose(rng).unwrap()[..]).unwrap());
            })
        });
        drop(txn);
        let mut txn = env.txn_rw().unwrap();
        bgrp.bench_function("nested_txn_write_used_rollback", |b| {
            b.iter(|| {
                let mut txn = txn.nested().unwrap();
                let key = used_keys.choose(rng).unwrap();
                let value = unused_values.choose(rng).unwrap();
                black_box(txn.put(&db, &key[..], &value[..]).unwrap());
            })
        });
        bgrp.bench_function("nested_txn_write_unused_rollback", |b| {
            b.iter(|| {
                let mut txn = txn.nested().unwrap();
                let key = unused_keys.choose(rng).unwrap();
                let value = unused_values.choose(rng).unwrap();
                black_box(txn.put(&db, &key[..], &value[..]).unwrap());
            })
        });
        location.close().unwrap();
        bgrp.finish();
    }
    {
        let mut bgrp = c.benchmark_group("longrun");
        bgrp.sample_size(10);
        bgrp.measurement_time(std::time::Duration::from_secs(20));
        bgrp.bench_function("create_and_fill_db", |b| {
            b.iter(|| {
                let location = tempdir().unwrap();
                let db_opts = kv::DbBuilder::new().name("benchmark");
                let env_builder = kv::EnvBuilder::new().dir(location.path()).max_dbs(64);
                let mut env = unsafe { env_builder.open_rw() }.unwrap();
                let db = unsafe { env.create_db(&db_opts) }.unwrap();
                let mut txn = env.txn_rw().unwrap();
                for _ in 0..1_000_000 {
                    let key = rng.gen::<[u8; 12]>();
                    let value = rng.gen::<[u8; 24]>();
                    txn.put(&db, &key, &value).unwrap();
                }
                txn.commit().unwrap();
                location.close().unwrap();
            })
        });
        bgrp.finish();
    }
}

I avoid keeping all created keys/values in memory but keep a small amount of them (10,000). Yet if I replace

         bgrp.bench_function("read_used", |b| {
             b.iter(|| {
-                black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
+                black_box(txn.get(&db, &used_keys[0][..]).unwrap());
             })
         });
         bgrp.bench_function("read_unused", |b| {
             b.iter(|| {
-                black_box(txn.get(&db, &unused_keys.choose(rng).unwrap()[..]).unwrap());
+                black_box(txn.get(&db, &unused_keys[0][..]).unwrap());
             })
         });

then I get about as twice as fast results (and I don't think it's all because of caching, but not sure).

If I do

+        let mut used_keys_iter = used_keys.iter().cycle();
+        let mut unused_keys_iter = unused_keys.iter().cycle();
         bgrp.bench_function("read_used", |b| {
             b.iter(|| {
-                black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
+                black_box(txn.get(&db, &used_keys_iter.next().unwrap()[..]).unwrap());
             })
         });
         bgrp.bench_function("read_unused", |b| {
             b.iter(|| {
-                black_box(txn.get(&db, &unused_keys.choose(rng).unwrap()[..]).unwrap());
+                black_box(txn.get(&db, &unused_keys_iter.next().unwrap()[..]).unwrap());
             })
         });

then I get about 10% slower results. So picking a random entry is even faster than using std::iter::Cycle. (Interesting, isn't it?)

If I add a for loop which iterates over all entries in used_keys and unused_keys, then the overhead will be minimal. However, then, apart from having a factor of 10,000 in the results, the calculation of the confidence interval will be flawed.

What's the right way to proceed when I want to measure the time of txn.get with criterion?

I figured I can measure the overhead to see if it's relevant:

+        bgrp.bench_function("read_overhead", |b| {
+            b.iter(|| {
+                black_box(&used_keys.choose(rng).unwrap()[..]);
+            })
+        });
         bgrp.bench_function("read_used", |b| {
             b.iter(|| {
                 black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
             })
         });

Resulting in:

single_access/read_overhead time:   [8.6451 ns 8.6832 ns 8.7225 ns]
single_access/read_used     time:   [383.56 ns 385.66 ns 388.03 ns]

So it's about 2% overhead, I guess.

But I also found a section in criterion's documentation on that problem: What do I do if my function's runtime is smaller than the measurement overhead? I will read further into that.


The solution is:

-            b.iter(|| {
-                black_box(txn.get(&db, &used_keys.choose(rng).unwrap()[..]).unwrap());
-            })
+            b.iter_batched(
+                || {
+                    &used_keys.choose(rng).unwrap()[..]
+                },
+                |key| {
+                    black_box(txn.get(&db, key).unwrap());
+                },
+                BatchSize::NumIterations(1000),
+            )

So criterion does already account for this problem, I just didn't know the right function to use.

2 Likes

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.