I'm not sure about the last one. But regarding speed, using a hashmap seems a lot slower, at least the way I've done it:
extern crate fnv;
use fnv::FnvHashMap;
use std::collections::HashMap;
use std::time::{Duration, Instant};
fn main() {
let n = 1_000_000;
let now = Instant::now();
test(normalize1, n);
let elapsed = now.elapsed();
println!("String::replace {}", secs(&elapsed));
let now = Instant::now();
test(normalize2, n);
let elapsed = now.elapsed();
println!("HashMap {}", secs(&elapsed));
let now = Instant::now();
test(normalize3, n);
let elapsed = now.elapsed();
println!("FnvHashMap {}", secs(&elapsed));
}
fn test(normalize: fn(&str)->String, n: u32) -> bool {
for _ in 0..n {
let t = "please file the flight plans";
let e = "please file the flight plans";
let s = normalize(t);
if e != s {
panic!("test_replace failed: {} != {}", e, s);
}
}
return true;
}
fn normalize1(s: &str) -> String {
s.replace("ff", "ff").replace("fi", "fi").replace("fl", "fl")
.replace("ffi", "ffi").replace("ffl", "ffl").replace("ſt", "ſt")
.replace("st", "st")
}
fn normalize2(s: &str) -> String {
let mut table: HashMap<char, String> = HashMap::new();
table.insert('ff', "ff".to_string());
table.insert('fi', "fi".to_string());
table.insert('fl', "fl".to_string());
table.insert('ffi', "ffi".to_string());
table.insert('ffl', "ffl".to_string());
table.insert('ſt', "ſt".to_string());
table.insert('st', "st".to_string());
s.chars().map(|c| match table.get(&c) {
Some(rep) => rep.to_string(),
None => c.to_string(),
}).collect::<String>()
}
fn normalize3(s: &str) -> String {
let mut table: FnvHashMap<char, String> = FnvHashMap::default();
table.insert('ff', "ff".to_string());
table.insert('fi', "fi".to_string());
table.insert('fl', "fl".to_string());
table.insert('ffi', "ffi".to_string());
table.insert('ffl', "ffl".to_string());
table.insert('ſt', "ſt".to_string());
table.insert('st', "st".to_string());
s.chars().map(|c| match table.get(&c) {
Some(rep) => rep.to_string(),
None => c.to_string(),
}).collect::<String>()
}
fn secs(duration: &Duration) -> String {
format!("{:.03} sec", duration.as_secs() as f64 +
duration.subsec_nanos() as f64 * 1e-9)
}
For me the timings are:
String::replace 0.531 sec
HashMap 2.384 sec
FnvHashMap 2.170 sec
+- 0.1 sec on multiple runs