I have two implementations of the same Nbody simulator, one using Scalar operations and the other packed_simd
crate. I simply replaced a custom Vec3D
by f64x4
and respectively operations, but both have almost the same running time (SIMD is a bit slower), is this something to be expected? I am pasting the implementations bellow.
SIMD
//! Based on https://github.com/rust-lang/packed_simd/blob/master/examples/nbody/src/simd.rs
use std::path::Path;
use std::{fs, mem};
use packed_simd::*;
#[derive(Clone)]
pub struct Particle {
position: f64x4,
velocity: f64x4,
acceleration: [f64x4; 2],
mass: f64,
}
pub struct Bodies {
particles: Vec<Particle>,
}
impl Bodies {
pub fn new(path_name: &str) -> Bodies {
let file_path = Path::new(path_name);
let particles: Vec<Particle> = fs::read_to_string(file_path)
.expect("File not found!")
.lines()
.filter(|x| !x.is_empty())
.map(|x| parse_row(x))
.collect();
Bodies { particles }
}
pub fn compute_energy(&self, pe: f64) -> f64 {
let res: f64 = self
.particles
.iter()
.map(|p| 0.5 * p.mass * (p.velocity*p.velocity).sum())
.sum();
pe + res
}
pub fn advance_velocities(&mut self, dt: f64) {
for p in &mut self.particles {
p.velocity += 0.5 * dt * (p.acceleration[0] + p.acceleration[1]);
}
}
pub fn advance_positions(&mut self, dt: f64) {
for p in &mut self.particles {
p.position += dt * p.velocity + 0.5 * dt.powi(2) * p.acceleration[0];
}
}
pub fn accelerate(&mut self) -> f64 {
for p in self.particles.iter_mut() {
p.acceleration[1] = mem::take(&mut p.acceleration[0]);
}
let mut pe = 0.0;
let mut particles = self.particles.as_mut_slice();
while let Some((p1, rest)) = particles.split_first_mut() {
particles = rest;
for p2 in particles.iter_mut() {
let vector = p1.position - p2.position;
let norm2 = (vector*vector).sum();
let distance = norm2.sqrt();
let distance_cube = norm2 * distance;
p1.acceleration[0] -= (p2.mass / distance_cube) * vector;
p2.acceleration[0] += (p1.mass / distance_cube) * vector;
pe -= (p1.mass * p2.mass) / distance;
}
}
pe
}
}
pub fn parse_row(line: &str) -> Particle {
let row_vec: Vec<f64> = line
.split(' ')
.filter(|s| s.len() > 2)
.map(|s| s.parse().unwrap())
.collect();
Particle {
position: f64x4::new(row_vec[1], row_vec[2], row_vec[3],0.),
velocity: f64x4::new(row_vec[4], row_vec[5], row_vec[6],0.),
acceleration: [f64x4::new(0.,0.,0.,0.);2],
mass: row_vec[0],
}
}
pub fn run(path: &str) {
let mut pe;
let mut energy = 0.;
let (tend, dt) = (10.0, 0.001); // end time, timestep
let (mut old_energy, energy0) = (-0.25, -0.25);
println!("Running SIMD version");
let mut bodies = Bodies::new(path);
bodies.accelerate();
for step in 1..(tend / dt + 1.0) as i64 {
bodies.advance_positions(dt);
pe = bodies.accelerate();
bodies.advance_velocities(dt);
if step % 100 == 0 {
energy = bodies.compute_energy(pe);
println!(
"t = {:5.2}, E = {:.10}, dE/E = {:+.10}",
dt * step as f64,
energy,
(energy - old_energy) / old_energy
);
old_energy = energy;
}
}
println!("Final dE/E = {}", (energy - energy0) / energy0);
}
Scalar
/*
The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
Contributed by Ilia Schelokov, modified by Darley Barreto with help of Rust discourse
*/
use std::default::Default;
use std::ops::{Add, AddAssign, Mul, Sub, SubAssign};
use std::path::Path;
use std::{fs, mem};
#[derive(Clone, Copy, Default)]
struct Vec3D(f64, f64, f64);
impl Vec3D {
fn sum_squares(&self) -> f64 {
self.0 * self.0 + self.1 * self.1 + self.2 * self.2
}
}
impl Add for Vec3D {
type Output = Vec3D;
fn add(self, rhs: Self) -> Self::Output {
Vec3D(self.0 + rhs.0, self.1 + rhs.1, self.2 + rhs.2)
}
}
impl Sub for &Vec3D {
type Output = Vec3D;
fn sub(self, rhs: Self) -> Self::Output {
Vec3D(self.0 - rhs.0, self.1 - rhs.1, self.2 - rhs.2)
}
}
impl Mul<Vec3D> for f64 {
type Output = Vec3D;
fn mul(self, rhs: Vec3D) -> Self::Output {
Vec3D(self * rhs.0, self * rhs.1, self * rhs.2)
}
}
impl AddAssign for Vec3D {
fn add_assign(&mut self, rhs: Self) {
self.0 += rhs.0;
self.1 += rhs.1;
self.2 += rhs.2;
}
}
impl SubAssign for Vec3D {
fn sub_assign(&mut self, rhs: Self) {
self.0 -= rhs.0;
self.1 -= rhs.1;
self.2 -= rhs.2;
}
}
#[derive(Clone)]
pub struct Particle {
position: Vec3D,
velocity: Vec3D,
acceleration: [Vec3D; 2],
mass: f64,
}
pub struct Bodies {
particles: Vec<Particle>,
}
impl Bodies {
pub fn new(path_name: &str) -> Bodies {
let file_path = Path::new(path_name);
let particles: Vec<Particle> = fs::read_to_string(file_path)
.expect("File not found!")
.lines()
.filter(|x| !x.is_empty())
.map(|x| parse_row(x))
.collect();
Bodies { particles }
}
pub fn compute_energy(&self, pe: f64) -> f64 {
let res: f64 = self
.particles
.iter()
.map(|p| 0.5 * p.mass * p.velocity.sum_squares())
.sum();
pe + res
}
pub fn advance_velocities(&mut self, dt: f64) {
for p in &mut self.particles {
p.velocity += 0.5 * dt * (p.acceleration[0] + p.acceleration[1]);
}
}
pub fn advance_positions(&mut self, dt: f64) {
for p in &mut self.particles {
p.position += dt * p.velocity + 0.5 * dt.powi(2) * p.acceleration[0];
}
}
pub fn accelerate(&mut self) -> f64 {
for p in self.particles.iter_mut() {
p.acceleration[1] = mem::take(&mut p.acceleration[0]);
}
let mut pe = 0.0;
let mut particles = self.particles.as_mut_slice();
while let Some((p1, rest)) = particles.split_first_mut() {
particles = rest;
for p2 in particles.iter_mut() {
let vector = &p1.position - &p2.position;
let distance = vector.sum_squares().sqrt();
let distance_cube = distance.powi(3);
p1.acceleration[0] -= (p2.mass / distance_cube) * vector;
p2.acceleration[0] += (p1.mass / distance_cube) * vector;
pe -= (p1.mass * p2.mass) / distance;
}
}
pe
}
}
pub fn parse_row(line: &str) -> Particle {
let row_vec: Vec<f64> = line
.split(' ')
.filter(|s| s.len() > 2)
.map(|s| s.parse().unwrap())
.collect();
Particle {
position: Vec3D(row_vec[1], row_vec[2], row_vec[3]),
velocity: Vec3D(row_vec[4], row_vec[5], row_vec[6]),
acceleration: Default::default(),
mass: row_vec[0],
}
}
pub fn run(path: &str) {
let mut pe;
let mut energy = 0.;
let (tend, dt) = (10.0, 0.001); // end time, timestep
let (mut old_energy, energy0) = (-0.25, -0.25);
println!("Running Scalar version");
let mut bodies = Bodies::new(path);
bodies.accelerate();
for step in 1..(tend / dt + 1.0) as i64 {
bodies.advance_positions(dt);
pe = bodies.accelerate();
bodies.advance_velocities(dt);
if step % 100 == 0 {
energy = bodies.compute_energy(pe);
println!(
"t = {:5.2}, E = {:.10}, dE/E = {:+.10}",
dt * step as f64,
energy,
(energy - old_energy) / old_energy
);
old_energy = energy;
}
}
println!("Final dE/E = {}", (energy - energy0) / energy0);
}
Data can be found here. One can call either one of them as
fn main() {
(simd | scalar)::run("input2k");
}