Hi!
I've just released a crate allowing you to run aggregating functions (think fold, find, for_each) on serialized sequences (which may be located inside a more complex structure) without needing to first deserialize all elements and store them in a Vec
.
Examples
Given the following JSON:
[
{"id": 0, "name": "bob", "subscribed_to": ["rust", "knitting", "cooking"]},
{"id": 1, "name": "toby 🐶", "subscribed_to": ["sticks", "tennis-balls"]},
{"id": 2, "name": "alice", "subscribed_to": ["rust", "hiking", "paris"]},
{"id": 3, "name": "mark", "subscribed_to": ["rust", "rugby", "doctor-who"]},
{"id": 4, "name": "vera", "subscribed_to": ["rust", "mma", "philosophy"]}
]
we can process it without allocating a 5-sized vector of items as follow:
use serde_deser_iter::top_level::DeserializerExt;
/// The type each item in the sequence will be deserialized to.
#[derive(serde::Deserialize)]
struct DataEntry {
// Not all fields are needed, but we could add "name"
// and "id".
subscribed_to: Vec<String>,
}
fn main() -> anyhow::Result<()> {
let buffered_file: BufReader<File> = BufReader::new(File::open(example_json_path)?);
let mut json_deserializer = serde_json::Deserializer::from_reader(buffered_file);
let mut all_channels = HashSet::new();
json_deserializer.for_each(|entry: DataEntry| all_channels.extend(entry.subscribed_to))?;
println!("All existing channels:");
for channel in all_channels {
println!(" - {channel}")
}
Ok(())
}
Or a more complex case where the sequence is not at the top-level:
{
"api_version": "x.y.z",
"result" : [
{"id": 0, "name": "bob", "subscribed_to": ["rust", "knitting", "cooking"]},
{"id": 1, "name": "toby 🐶", "subscribed_to": ["good-boy-lifestyle", "sticks", "tennis-balls"]},
{"id": 2, "name": "alice", "subscribed_to": ["rust", "hiking", "paris"]},
{"id": 3, "name": "mark", "subscribed_to": ["rust", "rugby", "doctor-who"]},
{"id": 4, "name": "vera", "subscribed_to": ["rust", "mma", "philosophy"]}
]
}
we can use the deep
module
#[derive(serde::Deserialize)]
struct DataEntry {
subscribed_to: Vec<String>,
}
struct Imp;
impl serde_deser_iter::deep::FoldAggregator for Imp {
type Item = DataEntry;
type Acc = HashSet<String>;
fn init() -> Self::Acc {
HashSet::new()
}
fn f(mut acc: HashSet<String>, entry: DataEntry) -> HashSet<String> {
acc.extend(entry.subscribed_to);
acc
}
}
#[derive(serde::Deserialize)]
struct Data {
result: serde_deser_iter::deep::StreamSeqDeser<serde_deser_iter::deep::Fold<Imp>>,
}
fn main() -> anyhow::Result<()> {
let buffered_file: BufReader<File> = BufReader::new(File::open(example_json_path)?);
let data: Data = serde_json::from_reader(buffered_file)?;
let all_channels = data.result.into_inner();
println!("All existing channels:");
for channel in all_channels {
println!(" - {channel}")
}
Ok(())
}