Everyone knows that ZST is an exciting feature in Rust, and I like it very much.
However, I found that in practical use, the performance obtained from the following two functions is completely different. But in my understanding, ZST should ensure they are the same. Is this a misunderstanding on my part? If so, how should I set my expectations for ZST?
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use rand::prelude::*;
pub struct Container<NoZst, MaybeZST> {
values: Vec<Data<NoZst, MaybeZST>>,
}
impl<NoZst, MaybeZST> Container<NoZst, MaybeZST> {
pub fn get_maybe_zst_slow(&self, i: usize) -> MaybeZST
where
MaybeZST: Copy,
{
self.values[i].maybe_zst
}
pub fn get_maybe_zst_fast(&self, i: usize) -> MaybeZST
where
MaybeZST: Copy + Default,
{
if std::mem::size_of::<MaybeZST>() == 0 {
return Default::default();
}
self.values[i].maybe_zst
}
}
struct Data<NoZst, MaybeZST> {
_no_zst: NoZst,
maybe_zst: MaybeZST,
}
fn random_container<T: Copy + Default>(size: usize) -> Container<u64, T> {
let mut rng = rand::thread_rng();
Container {
values: (0..size)
.map(|_| Data {
_no_zst: rng.gen(),
maybe_zst: Default::default(),
})
.collect(),
}
}
fn bench_zst_visit(c: &mut Criterion) {
let mut group = c.benchmark_group("zst_visit");
const N: usize = 100_000_000;
let container = random_container::<()>(N);
for i in [10_000usize, 100_000usize] {
group.bench_with_input(BenchmarkId::new("slow", i), &container, |b, container| {
b.iter(|| {
for j in 0..i {
container.get_maybe_zst_slow(j % N);
}
})
});
group.bench_with_input(BenchmarkId::new("fast", i), &container, |b, container| {
b.iter(|| {
for j in 0..i {
container.get_maybe_zst_fast(j % N);
}
})
});
}
group.finish();
}
criterion_group!(benches, bench_zst_visit);
criterion_main!(benches);
The result is as follows:
zst_visit/slow/10000 time: [22.437 µs 22.531 µs 22.652 µs]
zst_visit/fast/10000 time: [0.0000 ps 0.0000 ps 0.0000 ps]
zst_visit/slow/100000 time: [223.97 µs 224.02 µs 224.08 µs]
zst_visit/fast/100000 time: [0.0000 ps 0.0000 ps 0.0000 ps]