fn rank(data: Vec<f32>) -> Vec<f32> {
let mut res = vec![NAN; data.len()];
let mut sorted_res: Vec<f32> = Vec::new();
for i in 0..data.len() {
let n: f32 = data[i];
let iloc = sorted_res.partition_point(|&x| x <= n);
sorted_res.insert(iloc, n);
res[i] = 100.0_f32 * ((iloc as i32 - 1) as f32) / i as f32;
}
res
}
:dep rand
use rand::{distributions::Uniform, Rng}
let mut rng = rand::thread_rng();
let range = Uniform::new(0, 20);
let range2 = Uniform::new(0.0_f32, 1.0_f32);
let mut data: Vec<f32> = (0..2000_000).map(|_| rng.sample(&range2)).collect();
let old: Vec<f32> = rank(data); // this is the line to time.
The last line consume 270s, and the similar python code (with using numpy) just consume 250s. Why this happen? What am I missing? I run it on jupyter with using Evcxr
According to evcxr/COMMON.md at main · google/evcxr · GitHub there's a way to set the optimisation level with :opt. I'm unfamiliar with all this as well, but I guess you could try to plop :opt 3 somewhere and see if it helps.
:opt 3
:dep rand
use std::time::Instant;
use rand::{distributions::Uniform, Rng};
fn rank(data: Vec<f32>) -> Vec<f32> {
let mut res = vec![f32::NAN; data.len()];
let mut sorted_res: Vec<f32> = Vec::new();
for i in 0..data.len() {
let n: f32 = data[i];
let iloc = sorted_res.partition_point(|&x| x <= n);
sorted_res.insert(iloc, n);
res[i] = 100.0_f32 * ((iloc as i32 - 1) as f32) / i as f32;
}
res
}
fn main() {
let mut rng = rand::thread_rng();
let range2 = Uniform::new(0.0_f32, 1.0_f32);
let data: Vec<f32> = (0..2000_000).map(|_| rng.sample(&range2)).collect();
let t0 = Instant::now();
let old: Vec<f32> = rank(data);
let t1 = Instant::now();
let elapsed = (t1 - t0).as_millis();
println!("{elapsed}");
dbg!(old.len(), elapsed);
}
main()
// out put
281013
()
[src/lib.rs:24] old.len() = 2000000
[src/lib.rs:24] elapsed = 281013
You are basically writing an O(n2) algorithm by using insert(). I cannot fathom that the same Python code would do this significantly faster. What's the "equivalent" Python code that you are measuring?