Can't understand the problem with this lifetime error

This works well:

//! A simple grep clone written in Rust

#![warn(missing_debug_implementations, rust_2018_idioms, missing_docs)]

use std::{env, error::Error, fs};

/// Configuration for the program
#[derive(Debug)]
pub struct Config<'a> {
    /// The string to search for
    pub query: &'a str,
    /// The path of the file to search
    pub filepath: &'a str,
    /// Whether to ignore case
    pub ignore_case: bool,
}

impl Config<'_> {
    /// Creates a new Config from command line arguments
    pub fn new(args: &[String]) -> Result<Config<'_>, &'static str> {
        if args.len() < 3 {
            return Err("not enough arguments");
        }

        let query = &args[1];
        let filepath = &args[2];
        let ignore_case = env::var("IGNORE_CASE").is_ok();

        Ok(Config {
            query,
            filepath,
            ignore_case,
        })
    }
}

/// Runs the program
pub fn run(config: &Config<'_>) -> Result<(), Box<dyn Error>> {
    let content = fs::read_to_string(config.filepath)?;

    let results = if config.ignore_case {
        search_case_insensitive(config.query, &content)
    } else {
        search(config.query, &content)
    };

    for line in results {
        println!("{line}");
    }

    Ok(())
}

/// Searches for `query` in `contents`
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
    contents
        .lines()
        .filter(|line| line.contains(query))
        .collect()
}

/// Searches for `query` in `contents` in a case-insensitive manner
pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
    let query = query.to_lowercase();
    contents
        .lines()
        .filter(|line| line.to_lowercase().contains(&query))
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn case_sensitive() {
        let query = "duct";
        let contents = "\
Rust:
safe, fast, productive.
Pick three.";

        assert_eq!(vec!["safe, fast, productive."], search(query, contents));
    }

    #[test]
    fn case_insensitive() {
        let query = "rUsT";
        let contents = "\
Rust:
safe, fast, productive.
Pick three.
Trust me.";

        assert_eq!(
            vec!["Rust:", "Trust me."],
            search_case_insensitive(query, contents)
        );
    }
}

But this doesn't:

//! A simple grep clone written in Rust

#![warn(missing_debug_implementations, rust_2018_idioms, missing_docs)]

use std::{env, error::Error, fs};

/// Configuration for the program
#[derive(Debug)]
pub struct Config<'a> {
    /// The string to search for
    pub query: &'a str,
    /// The path of the file to search
    pub filepath: &'a str,
    /// Whether to ignore case
    pub ignore_case: bool,
}

impl Config<'_> {
    /// Creates a new Config from command line arguments
    pub fn new(args: &[String]) -> Result<Self, &'static str> {
        if args.len() < 3 {
            return Err("not enough arguments");
        }

        let query = &args[1];
        let filepath = &args[2];
        let ignore_case = env::var("IGNORE_CASE").is_ok();

        Ok(Self {
            query,
            filepath,
            ignore_case,
        })
    }
}

/// Runs the program
pub fn run(config: &Config<'_>) -> Result<(), Box<dyn Error>> {
    let content = fs::read_to_string(config.filepath)?;

    let results = if config.ignore_case {
        search_case_insensitive(config.query, &content)
    } else {
        search(config.query, &content)
    };

    for line in results {
        println!("{line}");
    }

    Ok(())
}

/// Searches for `query` in `contents`
pub fn search<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
    contents
        .lines()
        .filter(|line| line.contains(query))
        .collect()
}

/// Searches for `query` in `contents` in a case-insensitive manner
pub fn search_case_insensitive<'a>(query: &str, contents: &'a str) -> Vec<&'a str> {
    let query = query.to_lowercase();
    contents
        .lines()
        .filter(|line| line.to_lowercase().contains(&query))
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn case_sensitive() {
        let query = "duct";
        let contents = "\
Rust:
safe, fast, productive.
Pick three.";

        assert_eq!(vec!["safe, fast, productive."], search(query, contents));
    }

    #[test]
    fn case_insensitive() {
        let query = "rUsT";
        let contents = "\
Rust:
safe, fast, productive.
Pick three.
Trust me.";

        assert_eq!(
            vec!["Rust:", "Trust me."],
            search_case_insensitive(query, contents)
        );
    }
}

The error:

error: lifetime may not live long enough
  --> src\lib.rs:30:13
   |
20 |     pub fn new(args: &[String]) -> Result<Self, &'static str> {
   |                      -             -------------------------- return type is Result<Config<'2>, &str>
   |                      |
   |                      let's call the lifetime of this reference `'1`
...
30 |             query,
   |             ^^^^^ this usage requires that `'1` must outlive `'2`

error: could not compile `minigrep` (lib) due to 1 previous error

The difference if renaming Config to Self

It will be easier to understand the difference if we first replace the elided lifetimes with explicit lifetimes. Your first version is equivalent to:

impl<'imp> Config<'imp> {
    pub fn new<'new>(args: &'new [String]) -> Result<Config<'new>, &'static str> {

Your second version is equivalent to:

impl<'imp> Config<'imp> {
    pub fn new<'new>(args: &'new [String]) -> Result<Self, &'static str> {

Now, in order to desugar Self, you must replace it with the type from the impl block. Not the same string of text, but the same type. That's where your change went wrong — '_ means different lifetimes depending on context. But with explicitly named lifetimes everywhere, a textual substitution will give the right answer. Specifically, your signature with Self is equivalent to:

impl<'imp> Config<'imp> {
    pub fn new<'new>(args: &'new [String]) -> Result<Config<'imp>, &'static str> {

Now the problem should be clearer: it's attempting to return Config with a lifetime unrelated to the borrowed input. The correct solution here is to link the borrowed input to the lifetime in the impl, like this:

impl<'a> Config<'a> {
    pub fn new(args: &'a [String]) -> Result<Self, &'static str> {

Now there are no unconstrained or unconstrained lifetime variables — there is only one, used as the lifetime parameter of the Config type, and used to require the appropriate lifetime for the input references.

7 Likes

This is a design error. A type like Config should store the strings, instead of not owning any strings and only being a temporary scope-bound view into some random variables it's borrowing from.

In 99% of cases use of references in structs is a misunderstanding that confuses loans for pointers/storage by reference.

2 Likes

Thank you, it's clear now

I had an idea that using references is always better for performance. Now you say the opposite!

If this is true, when to use references in structs?

Only when the struct containing the references is temporary and only exists inside the scope of the owned variables it references. And since it can be a difficult thing to get right in Rust, only when necessary.

1 Like

Temporary loans are only one of many Rust's reference types, like Box, Arc.

It also matters when they're used — they can never exist on their own, and always need some value to have storage location. By using references in inappropriate places you often can't avoid storing the value, you just prematurely borrow it from an inconvenient storage location.

References are generally the right semantic for passing arguments to functions (except maybe constructors). In structs they work as view types, and are used for things like lock guards.

But they have the primary role of forbidding holding on to a value beyond its scope. That isn't "faster". Looking at it from performance perspective is like replacing arithmetic division with addition, because addition is faster.

3 Likes

So references are for functions/methods arguments and temporary/view structs

1 Like

Yes, those, and also return values that point to existing data.

(And static/constant data that is produced at compile time, which the entire program is “temporary” relative to — that's half of what 'static is about. But 'static is basically always a special case whenever it appears; the way it's used is not much like any other lifetime.)

2 Likes

No, he's not saying the opposite. There's absolutely no judgement about performance whatsoever in @kornel's reply.

While I don't agree with the blanket statement that "use of references in structs is a misunderstanding that confuses loans for pointers/storage by reference",[1] you have to note that references aren't a magic make-performance-good trick. You have to understand what you are doing and not just repeat mantras like "references are faster".

The reason that the use of references can be faster in some situations (for example, when passing arguments to a function) is that you avoid potentially expensive cloning, heap allocation, system calls, etc.

If you already had to create a value (and perform the associated heap allocation, such as in the case of String), then you can't retroactively undo the associated cost just by taking a reference to the value. That makes no sense. I.e., there is no performance difference between:

let string = String::from("long big dynamic string, can't be a literal");
let value = StructStoringString { inner: string };
value.computation_that_only_needs_to_read_the_string();

and

let string = String::from("long big dynamic string, can't be a literal");
let value = StructStoringStrRef { inner: string.as_str() };
value.computation_that_only_needs_to_read_the_string();

because you have to perform the allocation to create a String in both cases (and there's no way to avoid that if you are hoping to handle dynamic input).


  1. A legitimate use case for "reference in a UDT" is an RAII guard that must mutate some other value on Drop, for example, a database transaction wrapping the connection. ↩︎

2 Likes

So you mean that I should use owned type when the value should be created anyway, and then use references when passing this owned value to other functions

If a value needs to be created dynamically, then you really have no other choice than an owned type. It's not a "should", it's the only possible solution.[1] References can't make values live longer.


  1. technically, there exist things such as Box::leak() that give you a reference to a 'static by consuming an owned value, heap-allocating it, and then never destroying it. But I seriously never encountered a legitimate use-case for those, and they are very much the exception rather than the rule. ↩︎

2 Likes