One of my primary motivations in learning Rust was to be able to speed up a Perl script I wrote 15 years ago that searches through a list of directories for a file with a given name. I finally managed to accomplish this, though I had to replace the walkdir crate with a hand-rolled breadth-first traversal in order to get the program to be faster than Perl. Are there any other obvious performance improvements (or other kinds of improvements) that can be made to the code below? (Yes, I know I shouldn't hardcode in the list of directories to search; improving on that is the next step.)
use clap::Parser;
use dirs_next::home_dir;
use shellexpand::tilde;
use std::collections::{HashSet, VecDeque};
use std::ffi::{OsStr, OsString};
use std::fs::{metadata, read_dir, DirEntry};
use std::path::{Path, PathBuf};
const DEFAULT_EXCLUDE_DIRS: [&str; 13] = [
".cache",
".git",
".local",
".mypy_cache",
".nox",
".svn",
".tox",
"Downloads",
"__pycache__",
"_build",
"build",
"target",
"venv",
];
#[derive(Clone, Debug, Eq, PartialEq)]
struct FileFinder {
descend_dirs: Vec<PathBuf>,
skim_dirs: Vec<PathBuf>,
exclude_dirs: HashSet<OsString>,
}
impl FileFinder {
fn new() -> Self {
FileFinder {
descend_dirs: Vec::new(),
skim_dirs: Vec::new(),
exclude_dirs: HashSet::new(),
}
}
fn descend_dirs<I, P>(&mut self, dirs: I) -> &mut FileFinder
where
I: IntoIterator<Item = P>,
P: AsRef<Path>,
{
self.descend_dirs.clear();
self.descend_dirs
.extend(dirs.into_iter().map(|p| p.as_ref().into()));
self
}
fn skim_dirs<I, P>(&mut self, dirs: I) -> &mut FileFinder
where
I: IntoIterator<Item = P>,
P: AsRef<Path>,
{
self.skim_dirs.clear();
self.skim_dirs
.extend(dirs.into_iter().map(|p| p.as_ref().into()));
self
}
fn exclude_dirs<I, S>(&mut self, dirs: I) -> &mut FileFinder
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
self.exclude_dirs.clear();
self.exclude_dirs
.extend(dirs.into_iter().map(|p| p.as_ref().into()));
self
}
fn descend_into_dir(&self, dirpath: &Path) -> impl Iterator<Item = PathBuf> + '_ {
DirDescender::new(&dirpath, &self.exclude_dirs)
}
fn iterdirs(&self) -> impl Iterator<Item = PathBuf> + '_ {
let skim_dirs = self.skim_dirs.clone();
self.descend_dirs
.iter()
.flat_map(|d| self.descend_into_dir(d))
.chain(skim_dirs.into_iter())
}
fn find<P: AsRef<Path>>(&self, filename: P) -> impl Iterator<Item = PathBuf> + '_ {
let fname = PathBuf::from(filename.as_ref());
self.iterdirs()
.map(move |p| p.join(fname.clone()))
.filter(|p| p.exists())
}
}
impl Default for FileFinder {
fn default() -> FileFinder {
let mut ff = Self::new();
if let Some(home) = home_dir() {
ff.descend_dirs([home]);
}
ff.exclude_dirs(DEFAULT_EXCLUDE_DIRS.iter().map(OsStr::new));
ff
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct DirDescender<'a> {
queue: VecDeque<PathBuf>,
exclude_dirs: &'a HashSet<OsString>,
}
impl<'a> DirDescender<'a> {
fn new<P: AsRef<Path>>(dirpath: &P, exclude_dirs: &'a HashSet<OsString>) -> Self {
DirDescender {
queue: VecDeque::from([dirpath.as_ref().into()]),
exclude_dirs,
}
}
fn subdirs(&self, dirpath: &PathBuf) -> Vec<PathBuf> {
match read_dir(dirpath) {
Ok(it) => {
let mut sd: Vec<PathBuf> = it
.filter_map(|e| e.ok())
.filter(|e| self.filter(e))
.map(|e| e.path())
.collect();
sd.sort_unstable();
sd
}
Err(_) => Vec::new(),
}
}
fn filter(&self, entry: &DirEntry) -> bool {
let is_dir = match entry.file_type() {
Ok(ft) if ft.is_dir() => true,
Ok(ft) if ft.is_symlink() => metadata(&entry.path()).map_or(false, |m| m.is_dir()),
_ => false,
};
is_dir && !self.exclude_dirs.contains(&entry.file_name())
}
}
impl<'a> Iterator for DirDescender<'a> {
type Item = PathBuf;
fn next(&mut self) -> Option<PathBuf> {
let d = self.queue.pop_front()?;
self.queue.extend(self.subdirs(&d));
Some(d)
}
}
#[derive(Clone, Debug, Eq, Parser, PartialEq)]
#[clap(version)]
struct Arguments {
#[clap(short, long)]
all: bool,
filenames: Vec<String>,
}
fn main() {
let mut finder = FileFinder::default();
finder
.descend_dirs([
tilde("~/work").as_ref(),
tilde("~/Documents").as_ref(),
"/Library/WebServer/Documents",
tilde("~/share").as_ref(),
])
.skim_dirs([tilde("~/bin").as_ref(), tilde("~").as_ref()]);
let args = Arguments::parse();
for fname in args.filenames.iter() {
let mut it = finder.find(fname);
if let Some(p) = it.next() {
println!("{}", p.display());
if args.all {
for p in it {
println!("{}", p.display());
}
}
} else {
eprintln!("{}: not found", fname)
}
}
}