Calculating the max of a vector of Results

I'm fairly new to rust, and as an exercise I decided to build a small tool to look through a directory and tell me when the most recent write to that folder was (since, on Windows, a directory's LastWriteTime isn't the same as when something was updated within that directory).

Here was the initial function I wrote:

fn get_folder_last_write(p: PathBuf, depth: usize) -> (String, SystemTime) {
    let path_string = String::from(p.to_string_lossy());
    let last_write = WalkDir::new(p).max_depth(depth)
        .into_iter()
        .map(|x| {
            let x = x.unwrap();
            x
                .metadata().unwrap()
                .modified().unwrap()
        }).max();

    (path_string, last_write.unwrap())
}

This worked, but I wanted to try to clean it up and make it a bit more idiomatic, so I switched the return type to Result and came up with this:

fn get_folder_last_write(p: PathBuf, depth: usize) -> 
    Result<(String, SystemTime), io::Error> {

    let path_string = String::from(p.to_string_lossy());

    let last_write: Result<SystemTime, io::Error> = WalkDir::new(p).max_depth(depth)
        .into_iter()
        .map(|x| {
            x?
                .metadata()?
                .modified()
        }).into_iter().try_fold(SystemTime::UNIX_EPOCH, |prev, next| {
            if next? > prev {Ok(next?)} else {Ok(prev)} 
        });

    Ok((path_string, last_write?))
}

However, I keep having issues the borrow checker that I don't understand - it seems like the if statement is consuming the Result object?. And this code feels like it's far more complex then it should be.

Could someone explain what is the correct way to approach this (and also, just for learning, why the borrow checker doesn't like this code)?

Thank you for any help!

Could you please post the full error, the cargo build output in the terminal?

Sure thing. Here's the output of the build:

PS C:\GitProjects\rustExperiments\list_foldres\sort_folders> cargo build
   Compiling sort_folders v0.1.0 (C:\GitProjects\rustExperiments\list_foldres\sort_folders)
error[E0382]: use of moved value: `next`
   --> src\main.rs:85:33
    |
84  |         }).into_iter().try_fold(SystemTime::UNIX_EPOCH, |prev, next| {
    |                                                                ---- move occurs because `next` has type `Result<SystemTime, std::io::Error>`, which does not implement the `Copy` trait
85  |             if next? > prev {Ok(next?)} else {Ok(prev)} 
    |                -----            ^^^^ value used here after move
    |                |
    |                `next` moved due to this method call
    |
note: `branch` takes ownership of the receiver `self`, which moves `next`
   --> C:\Users\username\.msrustup\packages\rust.tools-x86_64-pc-windows-msvc.1.74.1-ms-20231209.1\tools\lib/rustlib/src/rust\library\core\src\ops\try_trait.rs:217:15
    |
217 |     fn branch(self) -> ControlFlow<Self::Residual, Self::Output>;
    |               ^^^^

For more information about this error, try `rustc --explain E0382`.
error: could not compile `sort_folders` (bin "sort_folders") due to previous error

And for reference, here is the entire main.rs:

use std::fs;
use std::path::PathBuf;
use std::time::SystemTime;
use std::io;

use clap::Parser;
use walkdir::WalkDir;
use chrono::prelude::*;

/// A simple tool to list folder modification times.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
    /// Path to the directory to search
    path: String,

    /// Maximum recursion depth
    #[arg(short, long, default_value_t = 3)]
    depth: usize,
}

fn main() {

    let args = Args::parse();

    println!("Checking directory: {}", args.path);

    let target_dir = fs::read_dir(args.path).unwrap();
    let mut results:Vec<(String, SystemTime)> = Vec::new();

    for entry in target_dir {
        let entry = entry.unwrap();
        if entry.path().is_dir() {
            results.push(
                get_folder_last_write(entry.path(), args.depth).unwrap()
            );
        }
        else {
            results.push(
                (String::from(entry.path().to_str().unwrap()), 
                entry.metadata().unwrap().modified().unwrap())
            );
        }
    }

    results.sort_by_key(|x| x.1);

    let results = results.into_iter()
        .map(|x| { (x.0, DateTime::from(x.1)) })
        .collect::<Vec<(String, DateTime<Local>)>>();

    for entry in results {
        println!("{}: {}", entry.0, entry.1.format("%m/%d/%Y %H:%M"));
    }
}

/*
fn get_folder_last_write(p: PathBuf, depth: usize) -> (String, SystemTime) {
    let path_string = String::from(p.to_string_lossy());
    let last_write = WalkDir::new(p).max_depth(depth)
        .into_iter()
        .map(|x| {
            let x = x.unwrap();
            x
                .metadata().unwrap()
                .modified().unwrap()
        }).max();

    (path_string, last_write.unwrap())
}
*/

fn get_folder_last_write(p: PathBuf, depth: usize) -> 
    Result<(String, SystemTime), io::Error> {

    let path_string = String::from(p.to_string_lossy());

    let last_write: Result<SystemTime, io::Error> = WalkDir::new(p).max_depth(depth)
        .into_iter()
        .map(|x| {
            x?
                .metadata()?
                .modified()
        }).into_iter().try_fold(SystemTime::UNIX_EPOCH, |prev, next| {
            if next? > prev {Ok(next?)} else {Ok(prev)} 
        });

    Ok((path_string, last_write?))
}

1 Like

The ? operator consumes the Result. You can use .as_ref() or .is_some_and() instead to avoid this.

Your specific borrow-checker error is a problem with temporaries. Replace the inner block with the following and it should compile.

let next = next?;
if next > prev {Ok(next)} else {Ok(prev)} 

I would write the entire function something like this instead:

fn get_folder_last_write<P: AsRef<Path>>(p: P, depth: usize) -> Result<(P, SystemTime), io::Error> {
    let last_write: Result<SystemTime, io::Error> = WalkDir::new(&p)
        .max_depth(depth)
        .into_iter()
        .map(|x| x?.metadata()?.modified())
        .try_fold(SystemTime::UNIX_EPOCH, |prev, next| {
            Ok(std::cmp::max(prev, next?))
        });
    Ok((p, last_write?))
}

I've removed the String conversion (seems like an unrelated operation that doesn't belong in the function) and replaced the manual if else with a built-in function. I also got rid of the redundant second .into_iter (calling into_iter on an iterator doesn't do anything), and made the path type a generic (AsRef<Path>) to be more idiomatic.

2 Likes

This works! Adjusting the inner block fixed the borrow checker problem, and I'd agree your version definitely looks cleaner. Thank you.

Regarding the borrow checker error, just to make sure I understand the problem, the issue is that I'm consuming the result in the if statement (so I can't access it later to get the SystemTime value), but if I call

let next = next?;

I'm extracting the SystemTime or Error. And when comparing the two SystemTime values, it doesn't matter that the if statement consumed the inputs, because SystemTime implements copy?

Going forward, should I just avoid using ? in if statements like that?

Finally, again to check my understanding, the AsRef is equivalent to saying "any reference that could be converted to a Path, so strings, pathbufs, etc"?

Thank you again - this has definitely helped.

@kpreid - do you have an example of using .as_ref() as you suggested, so I could see how that would work.

The > operator actually borrows using & so it does not consume the inputs. What did consume the input was ?. Basically you needed to get a T out of a Result<T>, compare it, then potentially return it, and you can't do that by using ? twice. As long as you are aware of that there is no reason to avoid ? - it is there to make working with Results more concise.

1 Like

Sorry, I wrote that without enough consideration. There isn't a way to use .as_ref() that isn't pointless in this particular case, because it turns the Error into a reference too which won't work out.

The pattern where you return the path without modifying it in a relevant way is weird. Just take a &Path.

fn get_folder_last_write(p: &Path, depth: usize) -> SystemTime { /* ... */ }

It's not just any reference, it's any type that implements the trait -- PathBuf and String do so, for example. There's a list starting here. And if T: AsRef<U>, then &T: AsRef<U> as well.

Some rambling nitpicks about AsRef

It's true this is more general and let's you pass back an (owned) PathBuf should they pass one in...

fn get_folder_last_write<P: AsRef<Path>>(p: P, depth: usize) -> (P, SystemTime) { /* ... */ }

...but it also accepts a &Path so the caller can choose not to give up ownership if they wish, and thus there's still no need to pass it back...

fn get_folder_last_write<P: AsRef<Path>>(p: P, depth: usize) -> SystemTime { /* ... */ }

...and I know this is what is used in std and has thus become idiomatic, but it was actually chosen for aesthetic reasons over logical reasons (!); since you don't need ownership you could use the more logical...

fn get_folder_last_write<P>(p: &P, depth: usize) -> SystemTime { /* ... */ }
where //             vvvvvv    ^
    P: AsRef<Path> + ?Sized,

...which also reduces the chances of the caller deciding to call PathBuf::clone if they accidentally give up ownership.

The only benefit to all the AsRef stuff compared to just taking &Path is ergonomics at the call site.

let last = get_folder_last_write(some_string.as_ref());
let last = get_filter_last_write(&some_string);

TL;DR: Even if you decide to go with AsRef<Path> instead of just &Path, there's no reason to return the value.

3 Likes

Thank you - that was really helpful! If I'm reading this correctly, AsRef makes things easier for the caller because multiple types can be used (and the caller can choose if they want to give up ownership or not).

The one thing I didn't understand was your comment about the caller calling PathBuff::clone and using ?Sized. Switching to &P alone should force an immutable reference, so why add ?Sized. Wouldn't this be enough to prevent clone operations:

 fn get_folder_last_write<P: AsRef<Path>>(p: &P, depth: usize) -> 
    Result<SystemTime, io::Error> {

It's mostly me being pedantic about a pet peeve, but let's walk through it. (It's a lot to read about something that doesn't much matter, so also feel free to skip it.)

(You may know this part.) Type parameters like P have an implicit Sized bound, which makes sense when you're taking them by value -- you can't move things which aren't Sized. str isn't Sized for example, which is why you're always using &str. Path is like str. ?Sized just means "remove the implicit Sized bound".

References like &Path are always Sized. If your generic is always behind a reference (or other pointer, like in a Box or Arc), it can make sense to relax the bound and accept more types.


When we look at the idiomatic version:

fn gflw<P: AsRef<Path>>(by_value: P)

It has to be Sized since we're taking by value. It's more ergonomic to call this with a PathBuf or String if the caller is okay giving up ownership, but IMO that's not terribly common. If they get an error that they moved their String, they can add a reference or clone the value. The help suggests cloning, but taking a reference is the better solution. So this is a minor hazard.

The goal is just to get your hands on a &Path. What happens if the caller passes in a &Path? &Path implements AsRef<Path> because Path implements AsRef<Path>. So it goes something like this:

let path = by_value.as_ref();
// <&Path as AsRef<Path>>::as_ref takes a &&Path and calls (*self).as_ref:
// <Path as AsRef<Path>>::as_ref takes a &Path and returns self

Which is more indirect than necessary.

Those two points annoy me. Do they ultimately matter? No, not really, or very rarely. The indirection is probably always optimized out on --release and would be cheap if not anyway. (But see below for a note about AsRef more generally.) The cloning probably trips up newcomers mostly... or people who just let machines like their IDE apply the hint without thinking about it. Even then it's probably not horribly costly.


Now let's look at taking by reference, but without ?Sized.

fn gflw<P: AsRef<Path>>(by_ref: &P)

You can't passed owned things anymore, but you can't pass a &Path (or &str) anymore either -- because then P = Path and Path is not Sized. So you'd have to pass a &&Path (or &&str) which would not be ergonomic.

//   v
gflw(&"foo")

In the body when passing &&Path (so P = &Path) you'd have

let path = by_ref.as_ref();
// <&Path as AsRef<Path>>::as_ref takes a &&Path and calls (*self).as_ref:
// <Path as AsRef<Path>>::as_ref takes a &Path and returns self

same as above.


Now we add ?Sized.

fn gflw<P: ?Sized + AsRef<Path>>(by_ref: &P)

We can pass &Path again,[1] regaining the ergonomics for that case. And when passing &Path, we have P = Path, and the body goes like so:

let path = by_ref.as_ref();
// <Path as AsRef<Path>>::as_ref takes a &Path and returns self

One less layer of indirection.


Finally, a more general note about this AsRef<_> pattern. Even when you just need a &Path (or &str etc), it's idiomatic (in the Sized form) and also makes some things more ergonomic at the call site (passing in a &str instead of calling .as_ref(), say). But it also means you get multiple copies of your function, one for each type that's passed in ("monomorphization"). Ideally they would all compile down like so:

// which style we use is irrelevant to this section
fn gflw<P: AsRef<Path>>(path: P) {
    fn the_real_gflw(path: &Path) {
        // the rest of the function which uses `path`
    }
    the_real_gflw(path.as_ref())
}

However, in practice, you can't rely on this optimization happening. So in std for example, they manually write the inner function out.

Then the chances of the one-liner generic function being inlined is high, and it ideally acts like the caller just called as_ref() themselves.

Alternatively, if you're the only caller or you otherwise don't care about maximizing ergonomics at the call site, you can just take &Path.


  1. and &str ↩ī¸Ž

2 Likes

This was incredibly detailed and very, very helpful. Thank you so much for the write up.

It feels like, for my needs (where I am the only user of this function), &Path is probably the simplest option.

Though, as I consider all this, one follow up question: If the signature was fn gflw<P: AsRef<Path>>(path: P) and I called the function with gflw(dirEntry.path()) - there is no copying of the buffer, as gflw will just get the variable from higher in the stack, correct?

Basically there's no performance penalty for calling glfw(dirEntry.path()) vs glfw(&dirEntry.path())?

This method? If so, correct, for any of these

fn f(p: &Path)
fn g<P: AsRef<Path>>(p: P)
fn h<P: AsRef<Path> + ?Sized>(p: &P)

passing in a &Path (like the return from dir_entry.path()) will work and won't clone.

f(dir_entry.path());
g(dir_entry.path());
h(dir_entry.path());

This one, actually. I'm only using WalkDir for the function with recursion. For the initial "list all directories in the top level folder" I'm using the standard fs::read_dir

So for that, DirEntry::path() returns a PathBuf, which is why I was concerned about copying/performance.

Or, to put it another way, I wanted to check that glfw(dirEntry.path()) and glfw(&dirEntry.path()) are effectively identical in terms of performance.

Ah. If you don't need to use the PathBuf after the call, it doesn't matter if you pass by value when that's possible.

These all work without cloning (and the difference in indirection isn't worth caring about).

    f(&*de.path()); // Pass &Path
    f(&de.path());  // Same thing (auto deref)

    h(&*de.path()); // Pass &Path
    h(&de.path());  // Pass &PathBuf

    g(&*de.path()); // Pass &Path
    g(&de.path());  // Pass &PathBuf
    g(de.path());   // Pass PathBuf

If you do need to use it later, save it to a variable so you don't have to call .path() multiple times (creating multiple PathBuf).

let path = de.path(); // path: PathBuf
// any of the forms above except `g(path)`
println!("{}", path.display()); // use it later
2 Likes

Thank you everyone. This has answered my questions, and I've learned a great deal!

I've cleaned up the code, and it appears to be working nicely. Here's the final version:

use std::fs;
use std::path::Path;
use std::time::SystemTime;
use std::io;

use clap::Parser;
use walkdir::WalkDir;
use chrono::prelude::*;

/// A simple tool to list folder modification times.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
    /// Path to the directory to search
    path: String,

    /// Maximum recursion depth
    #[arg(short, long, default_value_t = 3)]
    depth: usize,
}

fn main() {

    let args = Args::parse();

    println!("Checking directory: {}", args.path);

    let mut results: Vec<(String, DateTime<Local>)> = get_last_write(args.path.as_ref(), args.depth)
        .unwrap()
        .into_iter()
        .map(|x| { (x.0, DateTime::from(x.1)) })
        .collect();

    results.sort_by_key(|x| x.1);

    for entry in results {
        println!("{}: {}", entry.0, entry.1.format("%m/%d/%Y %H:%M"));
    }
}

/// Get last write time of all objects in a directory.
fn get_last_write(p: &Path, depth: usize) -> Result<Vec<(String, SystemTime)>, io::Error> {
    fs::read_dir(p)?
        .into_iter()
        .map(|entry| {
            let path = entry?.path();

            if path.is_dir() { Ok((String::from(path.to_string_lossy()), get_folder_last_write(&path, depth)?)) }
            else { Ok((String::from(path.to_string_lossy()), path.metadata()?.modified()?)) }
        })
        .collect()
}

/// Get the most recent write time for any child within the directory.
fn get_folder_last_write(p: &Path, depth: usize) -> 
    Result<SystemTime, io::Error> {

    WalkDir::new(p)
        .max_depth(depth)
        .into_iter()
        .map(|x| x?.metadata()?.modified())
        .try_fold(SystemTime::UNIX_EPOCH, |prev, next| {
            Ok(std::cmp::max(prev, next?))
        })
}

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.