#![feature(portable_simd)]
use std::simd::f64x8;
pub fn div_matrix<const N: usize>(a: &[f64; N], b: &[f64; N]) -> [f64; N] {
// f64 -> init matrix
let mut res = [0.0; N];
// N % 8 split to calculate
let mut start = 0;
let mut end = 8;
loop {
if end >= N {
end = N;
let mut p = [1.0; 8];
let mut q = [1.0; 8];
for i in start..end {
p[i % 8] = a[i];
q[i % 8] = b[i];
}
let temp = f64x8::from_array(p) / f64x8::from_array(q);
for i in start..end {
res[i] = temp[i % 8];
}
break;
} else {
// length will always be 8 just copy_from_slice
let temp = f64x8::from_slice(&a[start..end]) / f64x8::from_slice(&b[start..end]);
res[start..end].copy_from_slice(temp.as_array());
}
start += 8;
end += 8;
}
res
}
fn main() {
let d = [1.0, 6.0, 3.0, 3.0, 20.0, 9.0, 1.0, 9.0, 4.0, 3.0];
let f = [10.0, 2.0, 2.0, 6.0, 40.0, 3.0, 6.0, 7.0, 63.0, 95.2];
let p = div_matrix::<10>(&d, &f);
dbg!(p);
}
i never use simd
before,
Do you have any suggestions for performance improvements?