How to turn this 3x3 matrix multiplication into a SIMD one?
I have written this very bad matrix multiplication implementation and I wanted to know how one even approaches turning this normal implementation into a SIMD one. thanks!
fn normal_matrix_multiplication(matrix_a: [[u64; 3]; 3], matrix_b: [[u64; 3]; 3]) -> [[u64; 3]; 3] {
let mut result = [[0u64; 3]; 3];
for (k, row_a) in matrix_a.iter().enumerate() {
for j in 0usize..3usize {
let mut sum = 0;
for (i, row_b) in matrix_b.iter().enumerate() {
sum += row_a[i] * row_b[j];
}
result[k][j] = sum;
}
}
result
}
use std::arch::x86_64::{_mm256_mul_pd, _mm256_set_pd};
fn main() {
let resultant = normal_matrix_multiplication(MATRIX_A, MATRIX_B);
let v1 = unsafe { _mm256_set_pd(1.0, 2.0, 3.0, 4.0) };
let v2 = unsafe { _mm256_set_pd(1.0, 2.0, 3.0, 4.0) };
let (a, b, c, d): (f64, f64, f64, f64) = unsafe { std::mem::transmute(_mm256_mul_pd(v1, v2)) };
println!("{a} {b} {c} {d}");
}