TrashPool to retain temporaries past the end of a scope

This feels a lot like the StringPool I wrote to help work with ASTs which borrow from the source text it came from (e.g. so you don't make unnecessary copies when storing identifiers).

The problem I had is that you'll dynamically load source code from disk and then parse them into memory. My load_from_disk() function needed a way to load text and then "extend" the lifetime of the loaded text so we can return the parsed data without running into use-after-move (e.g. the returned value references a local variable) or self-referencing structs.

Here's the StringPool:

use std::{cell::RefCell, collections::HashSet};

/// A simple string pool.
///
/// The typical use case for a [`StringPool`] is when dealing with values that
/// borrow from their original text instead of making their own copies.
///
/// By placing the source text into the string pool and deferring its cleanup
/// until the [`StringPool`] is destroyed, you can avoid annoying lifetime
/// issues or self-referential structs.
#[derive(Debug, Default, Clone, PartialEq)]
pub struct StringPool(RefCell<HashSet<Box<str>>>);

impl StringPool {
    pub fn empty() -> Self { StringPool::default() }

    /// Adds the text to the string pool, returning a reference which will live
    // as long as the [`StringPool`] itself.
    pub fn intern<'pool>(&'pool self, text: &str) -> &'pool str {
        let mut pool = self.0.borrow_mut();

        let interned_string: &str = match pool.get(text) {
            Some(existing_value) => &existing_value,
            _ => {
                let boxed_copy: Box<str> = text.into();
                pool.insert(boxed_copy);
                &pool.get(text).unwrap()
            },
        };

        // SAFETY: by construction, it is safe to expand the string's
        // lifetime to that of the StringPool.
        //
        // While the Box may move around when our hash set gets resized, the
        // bytes making up the string will stay in the same place somewhere
        // on the heap.
        //
        // Additionally, once a string is added to the pool it can never be
        // removed.
        //
        // This means any &'pool pointers returned from this function will
        // be valid until the StringPool is dropped.
        unsafe {
            return std::mem::transmute(interned_string);
        }
    }
}

And how it gets used:

use crate::{StringPool, TestCase};
use anyhow::{Context, Error};
use glob::Pattern;
use std::path::{Path, PathBuf};

pub fn load_from_disk<'s>(
    test_root: &Path,
    string_pool: &'s StringPool,
) -> Result<Vec<TestCase<'s>>, Error> {
    log::debug!("Loading test fixtures from \"{}\"", test_root.display());

    let mut test_cases = Vec::new();
    let candidate_pattern = Pattern::new("*.input.ftl")?;

    for entry in test_root.read_dir()? {
        let entry = entry?;
        let path = entry.path();

        if candidate_pattern.matches_path(&path) {
            let tc = load_test_case(path, test_root, string_pool)?;
            test_cases.push(tc);
        }
    }

    Ok(test_cases)
}

fn load_test_case<'s>(
    input_file: PathBuf,
    test_root: &Path,
    string_pool: &'s StringPool,
) -> Result<TestCase<'s>, Error> {
    let name = file_name(&input_file).ok_or_else(|| {
        Error::msg(format!(
            "Unable to get the filename for \"{}\"",
            input_file.display()
        ))
    })?;

    let input = std::fs::read_to_string(&input_file).with_context(|| {
        format!("Unable to read \"{}\"", input_file.display())
    })?;
    let input = string_pool.intern(&input);

    let fixture = fluent_syntax::parser::parse(input).unwrap();
    let output_file_name = test_root.join(&name).with_extension("output.ftl");

    let expected_output = std::fs::read_to_string(&output_file_name)
        .with_context(|| {
            format!("Unable to read \"{}\"", input_file.display())
        })?;

    Ok(TestCase {
        input_file,
        name,
        fixture,
        expected_output,
    })
}

fn file_name(path: &Path) -> Option<String> {
    let stem = path.file_stem()?.to_str()?;

    // we only want the text up to the first dot
    let first_bit = match stem.find(".") {
        Some(ix) => &stem[..ix],
        None => stem,
    };

    Some(first_bit.to_string())
}
3 Likes