Time for performance optimization


#1

I read that you like to read code and try to make it more efficient.

So, this is my code, have fun.

extern crate criterion_plot as plot;
extern crate rayon;
extern crate csv;
extern crate chashmap;
extern crate walkdir;
#[macro_use]
extern crate serde_derive;
extern crate statistical;
extern crate itertools_num as itertools;
extern crate palette;

use std::path::PathBuf;

use chashmap::CHashMap;
use rayon::prelude::*;
use walkdir::WalkDir;
use csv::Reader;
use plot::prelude::*;
use itertools::linspace;
use palette::{Hsl, Rgb, FromColor};

type SerialNumber = String;

#[derive(Deserialize)]
struct Record {
    #[serde(rename = "serial_number")]
    serial: SerialNumber,
    model: String,
    #[serde(rename = "capacity_bytes")]
    capacity: u64,
    failure: u8
}

struct Record2 {
    serial: Option<SerialNumber>,
    model: String,
    capacity: u64
}

struct Drive{
    model: Option<String>, 
    capacity:u64,  //bytes
    life: u64     //days
}

struct Model{
    capacity: u64,
    lifes: Vec<f64>
}


/// Compares some Hard Disk Drives using the data provided by
/// [BackBlaze] (https://www.backblaze.com/b2/hard-drive-test-data.html)
/// in order to extract a survival analysis. Produces a plot for each
/// capacity and an output with all models ordered by capacity and
/// mean life time.

fn main() {
    let root = {
        let mut args = std::env::args();
        let prog_name = args.next().expect("ERROR. Missing executable name.");
        args.next()
            .expect(format!("ERROR. Usage: {} <root of folder containing csv>",
                            prog_name).as_str())
    };

    let drives: CHashMap<SerialNumber, Drive> = CHashMap::new();

    let files = WalkDir::new(root).into_iter()
        .filter_map(|de| {
            de.ok().and_then(|e| {
                if e.file_type().is_file() {
                    Some(e.path().to_path_buf())
                } else {
                    None
                }
            })
        })
        .collect::<Vec<_>>();
    let days = files.len();

    files.into_par_iter()
        .filter_map(|pb| Reader::from_path(pb).ok())
        .flat_map(|reader|
                  reader.into_deserialize::<Record>()
                  .collect::<Vec<_>>())
        .filter_map(|rec| {
            if let Ok(rec) = rec {
                if rec.failure == 0 { return Some(
                    Record2 { serial: Some(rec.serial),
                              capacity: rec.capacity,
                              model: rec.model} )
                }}
            None
        })
        .for_each(|mut r| {
            drives.upsert(r.serial.take().unwrap(),
                          move || Drive{ model: Some(r.model),
                                         capacity: r.capacity,
                                         life: 1 },
                          |d| d.life += 1);
        });

    let models = CHashMap::new();

    drives.into_iter()
        .collect::<Vec<_>>()
        .into_par_iter()
        .for_each(|(_, mut drive)| {
            models.upsert(drive.model.take().unwrap(),
                          || Model { capacity: drive.capacity,
                                     lifes: vec![drive.life as f64] },
                          |m| m.lifes.push(drive.life as f64));
        });
    let base_color:Hsl = Hsl::from_rgb(Rgb::new_u8(0x8a, 0x56, 0xe2));
    let ref xs: Vec<_> = linspace::<f64>(0.0, days as f64, days + 1).collect();

    let figures = CHashMap::new();
    let count_for_cap:CHashMap<u64, (u32, u32)> = CHashMap::new();

    let models:Vec<_> = models.into_iter().collect();
    models.par_iter().for_each(|&(_, ref  data)| {
        count_for_cap.upsert(data.capacity,
                             || (1, 0),
                             |c| c.0+=1);
    });
    
    println!("Reliability per model:");
    let mut ordered: Vec<_> = models.into_par_iter()
        .map(|(model, data)| {
            let color :Rgb = {
                let mut color = base_color;
                let mut guard = count_for_cap.get_mut(&data.capacity).unwrap();
                let n = guard.0;
                let i = guard.1;
                let step = 240f32/n as f32;
                color.hue = (((color.hue.to_degrees() + step * i as f32)) % 300f32).into();
                guard.1 += 1;
                Rgb::from_hsl(color)
            };

            let mean = statistical::mean(data.lifes.as_slice());
            let cap = data.capacity;
            let model2 = model.clone();
            figures.alter(data.capacity,
                          move |optf| {
                              let mut f =
                                  if let Some(f) = optf {
                                      f
                                  } else {
                                      let mut f = Figure::new();
                                      let path = format!("./{}.svg", cap);
                                      let mut pb = PathBuf::new();
                                      pb.push(path);
                                      f.set(Title(format!("Survival analysis. {}bytes drives", cap)))
                                          .set(Output(pb))
                                          .set(Size(1920, 1080))
                                          .set(FontSize(8.0))
                                          .configure(Key, |k| k.set(Boxed::Yes));
                                      f};
                              let mut ys = vec![0; days+1];
                              let total = data.lifes.len();
                              let mut alives = total;
                              for l in data.lifes.into_iter() {
                                  ys[l as usize] +=1;
                              }
                              //at this point ys[i] says how much drives died in the i-th day
                              for y in ys.iter_mut() {
                                  alives -= *y;
                                  *y = alives*1000 / total;
                              }
                              f.plot(
                                  Lines {
                                      x: xs,
                                      y: ys
                                  },
                                  |lp| lp.set(Label(model))
                                      .set(Color::Rgb((color.red*255.) as u8,
                                                      (color.green*255.) as u8,
                                                      (color.blue*255.) as u8))
                              );
                              Some(f)
                          });
            (model2, cap, mean)
        }).collect();

    ordered.par_sort_unstable_by(|&(ref m1, c1, l1), &(ref m2, c2, l2)| {
        if c1 == c2 {
            if l1 == l2 {
                m1.cmp(&m2)
            } else {
                l2.partial_cmp(&l1).unwrap()
            }
        } else {
            c1.cmp(&c2)
        }
    });

    for (model, capacity, life) in ordered.into_iter() {
        println!("{:30}{:15}{:8.3}", model, capacity, life);
    }

    for (_, mut f) in figures.into_iter() {
        f.draw().ok().and_then(|gnuplot| {
            gnuplot.wait_with_output().ok()
                .and_then(|p| String::from_utf8(p.stderr).ok())
        }).expect("ERROR occurred while plotting");
    }
}

This code compile and work.
Using the data of 2013, 2014, 2015 (8.3GiB, 996 CSV files) it takes
real 3m40.479s
user 2m33.840s
sys 0m9.532s
(from the time command, --release version).
So if you think I can do something to improve the performance I will try and I will reply telling you if it did work.
Thank you.


#2

How’d you know? :smiley:

I have no idea what this code does (and don’t really care), but regardless, some general advice usually applies!

  1. First of all, have a good benchmark so you can quickly measure improvements. Is running the code enough? Can you get better data with a script that automatically runs this 10 times and gives you the average runtime?

  2. Run cargo clippy and do what it says.

  3. Then, reduce the amount of allocations.

    For example:

    This allocates a string in format! as well as a buffer for the PathBuf. Can you be sure it’s reusing the buffer? Or: Can you get it to always reuse the same buffer? You can! Turns out it’s just a matter of calling PathBuf::from(path) (see std source).

  4. Reduce the amount of unneeded conveniences that may stand in the way of optimizations/inlining.

    format! is also a quite powerful macro. Do you need it here? You’re just concatenating a string. How about let mut s = String::from("./"); s.push(cap); s.push(".svg");? Not sure if it’s actually faster or if rustc inlines and optimizes away the whole format!ting machinery, but you can find out!

  5. Don’t have locks in hot loops. Even invisible ones!

    Put an let out = std::io::stdout(); let handle = out.lock(); above this and replace the println!s with writeln!(handle, ...) (docs).

  6. Compile with lto = true and panic=abort and try to compile with system memory allocator.

  7. Actually profile your code, using perf or a tool like Instruments.app.

Let me know if that helped :slight_smile:


#3

I’m sorry. It does process the data provided by BackBlaze in order to extract a survival analysis plot and finally choose the best Hard Drives to buy for the next upgrade. I also gave some more details here Criterion plot does not plot [solved]


#4

I played around with this for a little while.

Discovered with a bit of printf-profiling that your bottleneck is the deserialization of the csvs into Vec<Record> (aha, allocation!). Rewrote to remove the intermediate call to collect. That didn’t do the trick. Looked a bit further, spotted that you are allocating two strings each time you deserialize a Record. Rewrote to use serdes zero-copy parsing. That didn’t do much either!

Eventually I discovered that you are triggering some kind of pathological case in the csv crate. It seems to have occured because the CSVs are very wide (~80 columns) - csv appears to scan through the header every time when deserializing each row. Indeed perhaps there was some kind of O(n^2) logic going on inside the csv - serde handshake. (@BurntSushi?)

I managed to get a factor of 4 speedup by passing in the header explicitly to each deserialize call (through the StringRecord interface) but first pruning the header so it was only the necessary length (which turned out to be 5).

So, my advice is to pre-process all your CSVs with AWK to remove all but the first 5 rows :slight_smile:

My hacky code changes are below:

    files
        .into_par_iter()
        .filter_map(|pb| {
            println!("Processing file {:?}", pb);
            Reader::from_path(pb).ok()
        })
        .for_each(|mut reader| {
            let header = reader.headers().unwrap().clone().iter().take(5).collect();  // Ta-dah!
            for record in reader.records() {
                if let Ok(record) = record {
                    let r = record.deserialize::<Record>(Some(&header));
                    if let Ok(r) = r {
                        if r.failure == 0 {
                            drives.upsert(
                                r.serial.into(),
                                move || {
                                    Drive {
                                        model: Some(r.model.into()),
                                        capacity: r.capacity,
                                        life: 1,
                                    }
                                },
                                |d| d.life += 1,
                            );
                        }
                    }
                }
            }
        });


#5

I’m running the code 3 times and computing the average manually

This helped me to have more “idiomatic” code, but didn’t change the performance.

I was not able to compile with lto (error: lto can only be run for executables, cdylibs and static library outputs) and neither with panic=abort (Compiling serde_derive v1.0.11 / error: the linked panic runtime panic_unwind is not compiled with this crate’s panic strategy abort).

compiling with nighly+system allocator did not help

I have no idea how to use it :frowning: but I’ll try to learn something

The effect was negligible, also adding the unwrap() to make the compiler happier.
I will try the other tips soon!
Thank you


#6

There are some interesting things going on with csv<->serde interaction! https://github.com/BurntSushi/rust-csv/issues/87#issuecomment-325199285 @adwhit Nice job diagnosing that. :slight_smile:


#7

Can I ask you where did you find the negative values for the capacity field?


#8

@garro Line 2189 of 2017-04-03.csv:

2017-04-03,ZA13GM06,ST8000DM002,-1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

#9

I think this had a big impact on efficiency.
Although, I think because of Disk/Memory limitations of my computer, the net effect was a minor use of CPU time rather then a great speed-up.
Thank you very much