I want to create an iterator over the data in a chunked, 1-D HDF5 dataset (using the hdf5 crate).
Here is a prototype that compiles
pub fn iter_dataset<T: hdf5::H5Type>(
filename: &dyn AsRef<Path>,
dataset: &str
) -> hdf5::Result<Box<dyn Iterator<Item = T>>> {
let file = ::hdf5::File::open(filename)?;
let dataset = file.dataset(dataset)?;
let dataset_shape = dataset.shape();
assert_eq!(dataset_shape.len(), 1); // Assuming 1-D dataset
let chunk_dimensions = dataset.chunk().unwrap(); // Assuming dataset is chunked, for now
assert_eq!(chunk_dimensions.len(), 1); // Assuming 1-D chunks
let chunk_size = chunk_dimensions[0];
let dataset_size = dataset_shape[0];
let mut so_far = 0;
let mut current_chunk = dataset.read_slice_1d::<T, _>(s![so_far .. so_far + chunk_size]).into_iter().flatten();
Ok(Box::new(std::iter::from_fn(move || {
current_chunk.next().or_else(|| {
if so_far == dataset_size { return None }
current_chunk = dataset.read_slice_1d::<T, _>(s![so_far .. so_far + chunk_size]).into_iter().flatten();
so_far += chunk_size;
current_chunk.next()
})
})))
}
but I suspect that I'm reinventing a bunch of wheels and doing any number of other stupid and/or naive things.