The two functions shown below are nearly identically, except for the order of let mut state = vec![1., 0.];
and test::black_box(123);
. But one takes 126 µs while the other takes just 22 µs. Why?
This also happens if instead of black_box, I call a function that is fast and side-effect-free but not inlined because it's inside another crate.
This effect goes away if I replace
let x = state[0];
let v = state[1];
let diff = vec![v, -x];
with let diff = vec![state[1], -state[0]];
pub fn slow() -> Vec<f64> {
let mut state = vec![1., 0.];
test::black_box(123);
for _ in 0..10000 {
let x = state[0];
let v = state[1];
let diff = vec![v, -x];
for (diff, state) in diff.into_iter().zip(state.iter_mut()) {
*state = *state + diff * 0.01;
}
}
state
}
pub fn fast() -> Vec<f64> {
test::black_box(123);
let mut state = vec![1., 0.];
for _ in 0..10000 {
let x = state[0];
let v = state[1];
let diff = vec![v, -x];
for (diff, state) in diff.into_iter().zip(state.iter_mut()) {
*state = *state + diff * 0.01;
}
}
state
}