One option is to create an AsyncRead wrapper that calls the underlying AsyncRead type to do the read and then use any new data to update the hasher
use pin_project_lite::pin_project;
use sha1::{Digest, Sha1};
use std::{error::Error, io::Read, task::Poll};
use tokio::{
fs::{File, OpenOptions},
io::AsyncRead,
};
// Using pin_project_lite to avoid the unsafe pin manipulation
pin_project! {
pub struct HashRead<T> {
#[pin]
read: T,
// CAUTION: Sha1 is considered broken, don't use it where you need strong
// cryptographic guarantees from a hash function
hasher: Sha1,
}
}
impl<T> HashRead<T> {
pub fn new(read: T) -> Self {
Self {
read,
hasher: Sha1::new(),
}
}
pub fn hash(self) -> Vec<u8> {
self.hasher.finalize().as_slice().into()
}
}
impl<T> AsyncRead for HashRead<T>
where
T: AsyncRead,
{
fn poll_read(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &mut tokio::io::ReadBuf<'_>,
) -> std::task::Poll<std::io::Result<()>> {
let this = self.project();
let before_len = buf.filled().len();
// Pass on the Poll result, updating the hasher if some new data was written to the buffer.
match this.read.poll_read(cx, buf) {
Poll::Pending => Poll::Pending,
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
Poll::Ready(Ok(())) => {
let filled = buf.filled();
let after_len = filled.len();
if after_len > before_len {
// new data was placed in the buffer, update the hasher with newly written data.
let new = &filled[before_len..];
this.hasher.update(new);
}
Poll::Ready(Ok(()))
}
}
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let file_path = std::env::args().nth(1).unwrap();
let mut src = HashRead::new(File::open(&file_path).await?);
let mut dest = OpenOptions::new()
.create(true)
.write(true)
.open("foo")
.await?;
tokio::io::copy(&mut src, &mut dest).await.unwrap();
let hash = src.hash();
println!("{hash:?}");
// Check that the hash we calculated incrementally matches the hash if we just read the whole file in one operation and feed it directly to the hasher.
assert_eq!(hash, {
use std::fs::File;
let mut buffer = Vec::new();
File::open(file_path)
.unwrap()
.read_to_end(&mut buffer)
.unwrap();
let mut hasher = Sha1::new();
hasher.update(&buffer);
std::convert::identity::<Vec<u8>>(hasher.finalize().as_slice().into())
});
Ok(())
}