I am trying to calculate the Shannon Entropy (uniqueness) of a given device fingerprint.
In most papers, this is the given formula:
My question is:
Would this be a correct implementation?
use std::collections::HashMap;
type Fingerprint<'a> = Vec<(&'a str, &'a str)>;
type DataSet<'a> = Vec<Fingerprint<'a>>;
/// Calculate the entropy of a given value (where value == i).
fn shannon_entropy(data: DataSet, value: usize) -> f64 {
let kv_occurances = data
.iter()
.flatten()
.fold(HashMap::new(), |mut acc, &(key, val)| {
*acc.entry((key, val)).or_insert(0) += 1;
acc
});
let key_occurances = data
.iter()
.flatten()
.fold(HashMap::new(), |mut acc, &(key, _)| {
*acc.entry(key).or_insert(0) += 1;
acc
});
let mut entropy = 0.0;
for kv in &data[value] {
let p = *occurances.get(kv).unwrap_or(&0) as f64 / *counts.get(kv.0).unwrap() as f64;
entropy -= p * p.log2();
}
entropy
}
fn main() {
println!("Hello, world!");
let example = vec![
vec![("x", "1"), ("y", "1")],
vec![("x", "1"), ("y", "1")],
vec![("x", "1"), ("y", "2")],
vec![("x", "2"), ("y", "3")],
];
println!("{}", shannon_entropy(example, 2))
}