I was expecting atoi
to use some optimized SIMD tricks, but no. Turns out it's just a pile of generic code, with basically no specialization. In the end my benchmark shows it to be mostly equivalent in performance to the simple function that @BurntSushi wrote above. There can be a difference of 10-20%, but depending on the size of integers different implementation comes out on top, and I don't see any clear pattern (e.g. it doesn't simply monotonically depend on the input size). Without any more specific benchmarks, I'd say the simple code is good enough.
Benchmark:
pub enum ParseU64Error {
InvalidDigit { got: u8 },
NumberTooBig { bytes: Vec<u8> },
}
pub fn parse_u64(bytes: &[u8]) -> Result<u64, ParseU64Error> {
let mut n: u64 = 0;
for &byte in bytes {
let digit = match byte.checked_sub(b'0') {
None => return Err(ParseU64Error::InvalidDigit { got: byte }),
Some(digit) if digit > 9 => return Err(ParseU64Error::InvalidDigit { got: byte }),
Some(digit) => {
debug_assert!((0..=9).contains(&digit));
u64::from(digit)
}
};
n = n
.checked_mul(10)
.and_then(|n| n.checked_add(digit))
.ok_or_else(|| ParseU64Error::NumberTooBig {
bytes: bytes.to_vec(),
})?;
}
Ok(n)
}
use atoi::FromRadix10SignedChecked;
use std::fmt::Display;
use criterion::{criterion_main, BatchSize, Bencher, Criterion};
use rand::{distributions::uniform::SampleUniform, Rng};
fn bench<T, R>(low: T, high: T, f: fn(&[u8]) -> R) -> impl FnMut(&mut Bencher)
where
T: SampleUniform + Display + Ord + Copy,
{
move |b| {
b.iter_batched(
|| rand::thread_rng().gen_range::<T, _>(low..=high).to_string(),
|s| f(s.as_bytes()),
BatchSize::SmallInput,
)
}
}
fn bench_group<T>(c: &mut Criterion, group_name: &str, low: T, high: T)
where
T: SampleUniform + Display + Ord + Copy + FromRadix10SignedChecked,
{
c.benchmark_group(group_name)
.bench_function("atoi", bench(low, high, ::atoi::atoi::<u64>))
.bench_function(
format!("atoi/{}", std::any::type_name::<T>()),
bench(low, high, atoi::atoi::<T>),
)
.bench_function("parse_num", bench(low, high, parse_u64));
}
pub fn benches() {
let mut criterion = Criterion::default().configure_from_args();
bench_group(&mut criterion, "bench_digit", 0u8, 9u8);
bench_group(&mut criterion, "bench_u8", u8::MIN, u8::MAX);
bench_group(&mut criterion, "bench_u16", u16::MIN, u16::MAX);
bench_group(&mut criterion, "bench_u32", u32::MIN, u32::MAX);
bench_group(&mut criterion, "bench_u64", u64::MIN, u64::MAX);
}
criterion_main!(benches);