I ran the following basic test to give a first glance at performance improvement between usual math operations and fast math (with nightly rust):
#![feature(core_intrinsics)]
extern crate rand;
use std::vec::Vec;
use std::intrinsics::*;
use std::time::{Instant, Duration};
fn main() {
let loop_count = 10_000_000;
let duration_fast: Duration;
let duration_slow: Duration;
let mut tuples = Vec::<(f64, f64, f64)>::new();
let mut x = 0.5f64;
for _ in 0..loop_count {
x = (0.78f64 * x + 0.22f64).fract();
let y = (0.12f64 * x + 0.13f64).fract();
let z = (0.54f64 * x + 0.07f64).fract();
tuples.push((x, y, z));
}
{
let mut p = 1.0f64;
let now = Instant::now();
for tuple in (&tuples).into_iter() {
// chain polynomials
unsafe {
p = fadd_fast(
fmul_fast(
fsub_fast(fmul_fast(tuple.0, p), tuple.1),
p),
tuple.2);
}
}
duration_fast = now.elapsed();
}
{
let mut p = 1.0f64;
let now = Instant::now();
for tuple in (&tuples).into_iter() {
// chain polynomials
p = (tuple.0 * p - tuple.1) * p + tuple.2;
}
duration_slow = now.elapsed();
}
println!("fast: {}.{:09} s", duration_fast.as_secs(), duration_fast.subsec_nanos());
println!("slow: {}.{:09} s", duration_slow.as_secs(), duration_slow.subsec_nanos());
}
and it turned out that the expected faster version was in reality slower:
fast: 0.407347297 s
slow: 0.249692092 s
I was originally running this on my laptop on windows10 64 and wondered if the feature was unavailable or unoptimized for Windows, but I get similar results on Rust Playground. Is there any kind of additional flag necessary to activate fast math? Or is there something fundamentally wrong with my code?