I have a generic function from_tokens
with two functions (from_tokens1
and from_tokens2
) providing specific closures.
Now I have two questions regarding the code below:
- How can the closure in
from_tokens1
be implemented? - How can the pattern match in
from_tokens2
be avoided?
use itertools::Itertools;
use std::collections::HashMap;
enum Token {
Tpe1(String),
Tpe2(String),
}
impl Token {
pub fn as_str(&self) -> &str {
match self {
Token::Tpe1(s) => s.as_str(),
Token::Tpe2(s) => s.as_str(),
}
}
}
struct Doc<'a> {
tokens1: Vec<Token>,
tokens2: Option<Vec<&'a str>>,
}
#[derive(Debug)]
pub struct DfTable<'a> {
num_documents: u32,
inner: HashMap<&'a str, u32>,
}
impl DfTable<'_> {
fn from_tokens<'a>(docs: &[&'a Doc], get_tokens: fn(&'a Doc) -> &'a [&'a str]) -> DfTable<'a> {
let mut df: HashMap<&str, u32> = HashMap::new();
for doc in docs {
for token in get_tokens(doc).iter().unique() {
df.entry(&token)
.and_modify(|x| *x += 1)
.or_insert(1);
}
}
DfTable {
num_documents: docs.len() as u32,
inner: df,
}
}
fn from_tokens1<'a>(docs: &[&'a Doc]) -> DfTable<'a> {
Self::from_tokens(docs, |doc| {
// TODO something like this
// &doc.tokens1.iter().map(|x| x.as_str()).collect_vec()
todo!()
})
}
fn from_tokens2<'a>(docs: &[&'a Doc]) -> DfTable<'a> {
Self::from_tokens(docs, |doc| {
// TODO how to avoid pattern match?
match &doc.tokens2 {
Some(xs) => &xs,
None => &[],
}
})
}
}
fn main() {
let doc1 = Doc {
tokens1: vec![Token::Tpe1("foo".to_owned()), Token::Tpe2("bar".to_owned())],
tokens2: Some(vec!["baz"]),
};
let doc2 = Doc {
tokens1: vec![
Token::Tpe1("foo".to_owned()),
Token::Tpe2("quux".to_owned()),
],
tokens2: Some(vec!["fnord"]),
};
let docs = vec![&doc1, &doc2];
let df1 = DfTable::from_tokens1(&docs);
let df2 = DfTable::from_tokens2(&docs);
println!("{df1:#?}");
println!("{df2:#?}");
}