Shannon Entropy

I am trying to calculate the Shannon Entropy (uniqueness) of a given device fingerprint.

In most papers, this is the given formula:

My question is:
Would this be a correct implementation?

use std::collections::HashMap;

type Fingerprint<'a> = Vec<(&'a str, &'a str)>;
type DataSet<'a> = Vec<Fingerprint<'a>>;

/// Calculate the entropy of a given value (where value == i).
fn shannon_entropy(data: DataSet, value: usize) -> f64 {
    let kv_occurances = data
        .iter()
        .flatten()
        .fold(HashMap::new(), |mut acc, &(key, val)| {
            *acc.entry((key, val)).or_insert(0) += 1;
            acc
        });

    let key_occurances = data
        .iter()
        .flatten()
        .fold(HashMap::new(), |mut acc, &(key, _)| {
            *acc.entry(key).or_insert(0) += 1;
            acc
        });

    let mut entropy = 0.0;

    for kv in &data[value] {
        let p = *occurances.get(kv).unwrap_or(&0) as f64 / *counts.get(kv.0).unwrap() as f64;
        entropy -= p * p.log2();
    }

    entropy
}

fn main() {
    println!("Hello, world!");

    let example = vec![
        vec![("x", "1"), ("y", "1")],
        vec![("x", "1"), ("y", "1")],
        vec![("x", "1"), ("y", "2")],
        vec![("x", "2"), ("y", "3")],
    ];

    println!("{}", shannon_entropy(example, 2))
}

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.