Program for searching a list of directories for a given filename

One of my primary motivations in learning Rust was to be able to speed up a Perl script I wrote 15 years ago that searches through a list of directories for a file with a given name. I finally managed to accomplish this, though I had to replace the walkdir crate with a hand-rolled breadth-first traversal in order to get the program to be faster than Perl. Are there any other obvious performance improvements (or other kinds of improvements) that can be made to the code below? (Yes, I know I shouldn't hardcode in the list of directories to search; improving on that is the next step.)

use clap::Parser;
use dirs_next::home_dir;
use shellexpand::tilde;
use std::collections::{HashSet, VecDeque};
use std::ffi::{OsStr, OsString};
use std::fs::{metadata, read_dir, DirEntry};
use std::path::{Path, PathBuf};

const DEFAULT_EXCLUDE_DIRS: [&str; 13] = [
    ".cache",
    ".git",
    ".local",
    ".mypy_cache",
    ".nox",
    ".svn",
    ".tox",
    "Downloads",
    "__pycache__",
    "_build",
    "build",
    "target",
    "venv",
];

#[derive(Clone, Debug, Eq, PartialEq)]
struct FileFinder {
    descend_dirs: Vec<PathBuf>,
    skim_dirs: Vec<PathBuf>,
    exclude_dirs: HashSet<OsString>,
}

impl FileFinder {
    fn new() -> Self {
        FileFinder {
            descend_dirs: Vec::new(),
            skim_dirs: Vec::new(),
            exclude_dirs: HashSet::new(),
        }
    }

    fn descend_dirs<I, P>(&mut self, dirs: I) -> &mut FileFinder
    where
        I: IntoIterator<Item = P>,
        P: AsRef<Path>,
    {
        self.descend_dirs.clear();
        self.descend_dirs
            .extend(dirs.into_iter().map(|p| p.as_ref().into()));
        self
    }

    fn skim_dirs<I, P>(&mut self, dirs: I) -> &mut FileFinder
    where
        I: IntoIterator<Item = P>,
        P: AsRef<Path>,
    {
        self.skim_dirs.clear();
        self.skim_dirs
            .extend(dirs.into_iter().map(|p| p.as_ref().into()));
        self
    }

    fn exclude_dirs<I, S>(&mut self, dirs: I) -> &mut FileFinder
    where
        I: IntoIterator<Item = S>,
        S: AsRef<OsStr>,
    {
        self.exclude_dirs.clear();
        self.exclude_dirs
            .extend(dirs.into_iter().map(|p| p.as_ref().into()));
        self
    }

    fn descend_into_dir(&self, dirpath: &Path) -> impl Iterator<Item = PathBuf> + '_ {
        DirDescender::new(&dirpath, &self.exclude_dirs)
    }

    fn iterdirs(&self) -> impl Iterator<Item = PathBuf> + '_ {
        let skim_dirs = self.skim_dirs.clone();
        self.descend_dirs
            .iter()
            .flat_map(|d| self.descend_into_dir(d))
            .chain(skim_dirs.into_iter())
    }

    fn find<P: AsRef<Path>>(&self, filename: P) -> impl Iterator<Item = PathBuf> + '_ {
        let fname = PathBuf::from(filename.as_ref());
        self.iterdirs()
            .map(move |p| p.join(fname.clone()))
            .filter(|p| p.exists())
    }
}

impl Default for FileFinder {
    fn default() -> FileFinder {
        let mut ff = Self::new();
        if let Some(home) = home_dir() {
            ff.descend_dirs([home]);
        }
        ff.exclude_dirs(DEFAULT_EXCLUDE_DIRS.iter().map(OsStr::new));
        ff
    }
}

#[derive(Clone, Debug, Eq, PartialEq)]
struct DirDescender<'a> {
    queue: VecDeque<PathBuf>,
    exclude_dirs: &'a HashSet<OsString>,
}

impl<'a> DirDescender<'a> {
    fn new<P: AsRef<Path>>(dirpath: &P, exclude_dirs: &'a HashSet<OsString>) -> Self {
        DirDescender {
            queue: VecDeque::from([dirpath.as_ref().into()]),
            exclude_dirs,
        }
    }

    fn subdirs(&self, dirpath: &PathBuf) -> Vec<PathBuf> {
        match read_dir(dirpath) {
            Ok(it) => {
                let mut sd: Vec<PathBuf> = it
                    .filter_map(|e| e.ok())
                    .filter(|e| self.filter(e))
                    .map(|e| e.path())
                    .collect();
                sd.sort_unstable();
                sd
            }
            Err(_) => Vec::new(),
        }
    }

    fn filter(&self, entry: &DirEntry) -> bool {
        let is_dir = match entry.file_type() {
            Ok(ft) if ft.is_dir() => true,
            Ok(ft) if ft.is_symlink() => metadata(&entry.path()).map_or(false, |m| m.is_dir()),
            _ => false,
        };
        is_dir && !self.exclude_dirs.contains(&entry.file_name())
    }
}

impl<'a> Iterator for DirDescender<'a> {
    type Item = PathBuf;

    fn next(&mut self) -> Option<PathBuf> {
        let d = self.queue.pop_front()?;
        self.queue.extend(self.subdirs(&d));
        Some(d)
    }
}

#[derive(Clone, Debug, Eq, Parser, PartialEq)]
#[clap(version)]
struct Arguments {
    #[clap(short, long)]
    all: bool,

    filenames: Vec<String>,
}

fn main() {
    let mut finder = FileFinder::default();
    finder
        .descend_dirs([
            tilde("~/work").as_ref(),
            tilde("~/Documents").as_ref(),
            "/Library/WebServer/Documents",
            tilde("~/share").as_ref(),
        ])
        .skim_dirs([tilde("~/bin").as_ref(), tilde("~").as_ref()]);
    let args = Arguments::parse();
    for fname in args.filenames.iter() {
        let mut it = finder.find(fname);
        if let Some(p) = it.next() {
            println!("{}", p.display());
            if args.all {
                for p in it {
                    println!("{}", p.display());
                }
            }
        } else {
            eprintln!("{}: not found", fname)
        }
    }
}
1 Like

Nice work!
You can:

  • Create a function called fn reset_dirs<E, T, I, P>(dirs_list: &mut E, dirs: I) where I: IntoIterator<Item = P>, P: AsRef<Path>, E: Extend<T> to factor out the common parts of descend_dirs, skim_dirs and exclude_dirs.
  • In the subdirs function, you can replace the filter followed by map with filter_map(|e| self.filter(e).then(|| e.path())).

Both suggestions are somewhat cosmetic. If I were you, I'd leave the code the way it is.

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.