Why is phf::Map slower than std HashMap?

I recently found the phf crate, which offers compile-time-built, immutable data structures. Since I have effectively static HashMaps in several of my projects, I was excited to try it out. Expecting to see better performance (or at the very least, the same performance) for phf::Map as for std::collections::HashMap, I wrote a simple benchmark.

The idea of the benchmark is: build maps from and to 100 short strings (random English words), and retrieve 200 existing keys and 29 not existing keys.

However I am getting consistently worse performance for phf compared to std. Why is this the case? I expected better or at least the same. Or is my benchmark code stupid?

What I'm getting on my old-ish laptop:

     Running benches/phf.rs (target/release/deps/phf-2135ec4e8358a54c)
std                     time:   [7.7444 µs 7.7999 µs 7.8647 µs]
                        change: [-1.5735% +0.4674% +2.9370%] (p = 0.71 > 0.05)
                        No change in performance detected.
Found 20 outliers among 100 measurements (20.00%)
  5 (5.00%) high mild
  15 (15.00%) high severe

phf                     time:   [8.6136 µs 8.6991 µs 8.7947 µs]
                        change: [-0.9140% +0.1582% +1.1499%] (p = 0.77 > 0.05)
                        No change in performance detected.
Found 16 outliers among 100 measurements (16.00%)
  2 (2.00%) high mild
  14 (14.00%) high severe

Full code:

use std::collections::HashMap;
use criterion::{black_box, Criterion, criterion_group, criterion_main};
use phf::phf_map;

// 200 strs which exist in map, 29 which don't
static KEYS: [&str; 229] = [
    "screened",
    "equivalent",
    "tilt's",
    "minting",
    "orienting",
    "aviator",
    "blonder",
    "ambition",
    "fits",
    "linoleum's",
    "hell",
    "harnessing",
    "asphyxiations",
    "psychologies",
    "pageantry's",
    "padlock's",
    "riots",
    "enrolment",
    "mart's",
    "aquatic's",
    "madhouses",
    "jingle",
    "dependent's",
    "raging",
    "corkscrew's",
    "month",
    "site's",
    "neglect's",
    "child's",
    "moderate's",
    "merchandizes",
    "foam's",
    "mobility",
    "slob's",
    "wharfs",
    "hostel",
    "clear's",
    "cushioned",
    "strived",
    "art",
    "studs",
    "solitary",
    "nits",
    "someplace",
    "paving",
    "forcible",
    "engineer's",
    "plurals",
    "violated",
    "deadens",
    "upbeat's",
    "larks",
    "incompatibility's",
    "aching",
    "hurl's",
    "shiver's",
    "therapist",
    "streamer",
    "spying",
    "shortly",
    "multi",
    "edit's",
    "publicise",
    "banish",
    "veneering",
    "hitherto",
    "porticos",
    "plodded",
    "pictorials",
    "seasonable",
    "barometer",
    "timings",
    "scampers",
    "irony's",
    "room",
    "reach",
    "profaned",
    "vestige's",
    "evangelist",
    "warpath's",
    "tawnier",
    "ruffling",
    "bread's",
    "vigour",
    "tripping",
    "obnoxious",
    "slimiest",
    "epoch's",
    "dismiss",
    "devotion",
    "overpower",
    "regaled",
    "free",
    "effort's",
    "brocades",
    "stimulus's",
    "contact",
    "subjugated",
    "motivation",
    "smelled",
    "screened",
    "equivalent",
    "tilt's",
    "minting",
    "orienting",
    "aviator",
    "blonder",
    "ambition",
    "fits",
    "linoleum's",
    "hell",
    "harnessing",
    "asphyxiations",
    "psychologies",
    "pageantry's",
    "padlock's",
    "riots",
    "enrolment",
    "mart's",
    "aquatic's",
    "madhouses",
    "jingle",
    "dependent's",
    "raging",
    "corkscrew's",
    "month",
    "site's",
    "neglect's",
    "child's",
    "moderate's",
    "merchandizes",
    "foam's",
    "mobility",
    "slob's",
    "wharfs",
    "hostel",
    "clear's",
    "cushioned",
    "strived",
    "art",
    "studs",
    "solitary",
    "nits",
    "someplace",
    "paving",
    "forcible",
    "engineer's",
    "plurals",
    "violated",
    "deadens",
    "upbeat's",
    "larks",
    "incompatibility's",
    "aching",
    "hurl's",
    "shiver's",
    "therapist",
    "streamer",
    "spying",
    "shortly",
    "multi",
    "edit's",
    "publicise",
    "banish",
    "veneering",
    "hitherto",
    "porticos",
    "plodded",
    "pictorials",
    "seasonable",
    "barometer",
    "timings",
    "scampers",
    "irony's",
    "room",
    "reach",
    "profaned",
    "vestige's",
    "evangelist",
    "warpath's",
    "tawnier",
    "ruffling",
    "bread's",
    "vigour",
    "tripping",
    "obnoxious",
    "slimiest",
    "epoch's",
    "dismiss",
    "devotion",
    "overpower",
    "regaled",
    "free",
    "effort's",
    "brocades",
    "stimulus's",
    "contact",
    "subjugated",
    "motivation",
    "smelled",
    "taimings",
    "sacampers",
    "iarony's",
    "raoom",
    "raeach",
    "parofaned",
    "vaestige's",
    "eavangelist",
    "waarpath's",
    "taawnier",
    "rauffling",
    "baread's",
    "vaigour",
    "taripping",
    "oabnoxious",
    "salimiest",
    "eapoch's",
    "daismiss",
    "daevotion",
    "oaverpower",
    "raegaled",
    "faree",
    "eaffort's",
    "barocades",
    "satimulus's",
    "caontact",
    "saubjugated",
    "maotivation",
    "samelled"
];

// 100 key phf map
static PHF_MAP: phf::Map<&'static str, &'static str> = phf_map! {
    "screened" => "smelled",
    "equivalent" => "motivation",
    "tilt's" => "subjugated",
    "minting" => "contact",
    "orienting" => "stimulus's",
    "aviator" => "brocades",
    "blonder" => "effort's",
    "ambition" => "free",
    "fits" => "regaled",
    "linoleum's" => "overpower",
    "hell" => "devotion",
    "harnessing" => "dismiss",
    "asphyxiations" => "epoch's",
    "psychologies" => "slimiest",
    "pageantry's" => "obnoxious",
    "padlock's" => "tripping",
    "riots" => "vigour",
    "enrolment" => "bread's",
    "mart's" => "ruffling",
    "aquatic's" => "tawnier",
    "madhouses" => "warpath's",
    "jingle" => "evangelist",
    "dependent's" => "vestige's",
    "raging" => "profaned",
    "corkscrew's" => "reach",
    "month" => "room",
    "site's" => "irony's",
    "neglect's" => "scampers",
    "child's" => "timings",
    "moderate's" => "barometer",
    "merchandizes" => "seasonable",
    "foam's" => "pictorials",
    "mobility" => "plodded",
    "slob's" => "porticos",
    "wharfs" => "hitherto",
    "hostel" => "veneering",
    "clear's" => "banish",
    "cushioned" => "publicise",
    "strived" => "edit's",
    "art" => "multi",
    "studs" => "shortly",
    "solitary" => "spying",
    "nits" => "streamer",
    "someplace" => "therapist",
    "paving" => "shiver's",
    "forcible" => "hurl's",
    "engineer's" => "aching",
    "plurals" => "incompatibility's",
    "violated" => "larks",
    "deadens" => "upbeat's",
    "upbeat's" => "deadens",
    "larks" => "violated",
    "incompatibility's" => "plurals",
    "aching" => "engineer's",
    "hurl's" => "forcible",
    "shiver's" => "paving",
    "therapist" => "someplace",
    "streamer" => "nits",
    "spying" => "solitary",
    "shortly" => "studs",
    "multi" => "art",
    "edit's" => "strived",
    "publicise" => "cushioned",
    "banish" => "clear's",
    "veneering" => "hostel",
    "hitherto" => "wharfs",
    "porticos" => "slob's",
    "plodded" => "mobility",
    "pictorials" => "foam's",
    "seasonable" => "merchandizes",
    "barometer" => "moderate's",
    "timings" => "child's",
    "scampers" => "neglect's",
    "irony's" => "site's",
    "room" => "month",
    "reach" => "corkscrew's",
    "profaned" => "raging",
    "vestige's" => "dependent's",
    "evangelist" => "jingle",
    "warpath's" => "madhouses",
    "tawnier" => "aquatic's",
    "ruffling" => "mart's",
    "bread's" => "enrolment",
    "vigour" => "riots",
    "tripping" => "padlock's",
    "obnoxious" => "pageantry's",
    "slimiest" => "psychologies",
    "epoch's" => "asphyxiations",
    "dismiss" => "harnessing",
    "devotion" => "hell",
    "overpower" => "linoleum's",
    "regaled" => "fits",
    "free" => "ambition",
    "effort's" => "blonder",
    "brocades" => "aviator",
    "stimulus's" => "orienting",
    "contact" => "minting",
    "subjugated" => "tilt's",
    "motivation" => "equivalent",
    "smelled" => "screened"
};

fn criterion_benchmark(c: &mut Criterion) {
    // 100 same keys in hashmap as in phf
    let hm: HashMap<&str, &str> = PHF_MAP.entries().map(|(k, v)| (*k, *v)).collect();

    // buffer to write results into
    const EMPTY_STR: &str = "";
    let mut buffer = [Some(&EMPTY_STR); 229];

    c.bench_function("std", |b| b.iter(|| black_box(std(&hm, &mut buffer))));
    c.bench_function("phf", |b| b.iter(|| black_box(phf(&PHF_MAP, &mut buffer))));

}

fn phf(m: &'static phf::Map<&'static str, &'static str>, b: &mut [Option<&&str>]) {
    KEYS.iter().map(|k| m.get(k)).enumerate().for_each(|(i, k)| { b[i] = k; });
}

fn std<'a>(m: &'a HashMap<&'static str, &'static str>, b: &mut [Option<&'a &str>]) {
    KEYS.iter().map(|k| m.get(k)).enumerate().for_each(|(i, k)| { b[i] = k; });
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

100 strings is a relatively small size. For comparison / to get a better picture, you should probably also try larger sizes.

1 Like

It looks like the phf crate uses the same SipHasher algorithm as libstd, but outputs a 96bit hash instead of a 64bit hash for libstd:

And then during lookup uses a more complex algorithm to find the location of the key with the given hash which involves a table lookup and a modulo. Both operations are slower than the masking that libstd does.

for pfh vs

for libstd.

That said the tradeoff between having fast initial guesses for the hash table entry to look at and avoiding potentially needing many successive probes at high fill ratios can be entirely different at higher hashmap sizes. You should try benchmarking it with the actual data you will be using. You may also want to take a look at odht which is a hashmap serializable like phf, but without using perfect hashing. Rustc uses it for serializing various kinds of information in the crate metadata.

4 Likes