This feels a lot like the StringPool
I wrote to help work with ASTs which borrow from the source text it came from (e.g. so you don't make unnecessary copies when storing identifiers).
The problem I had is that you'll dynamically load source code from disk and then parse them into memory. My load_from_disk()
function needed a way to load text and then "extend" the lifetime of the loaded text so we can return the parsed data without running into use-after-move (e.g. the returned value references a local variable) or self-referencing structs.
Here's the StringPool
:
use std::{cell::RefCell, collections::HashSet};
/// A simple string pool.
///
/// The typical use case for a [`StringPool`] is when dealing with values that
/// borrow from their original text instead of making their own copies.
///
/// By placing the source text into the string pool and deferring its cleanup
/// until the [`StringPool`] is destroyed, you can avoid annoying lifetime
/// issues or self-referential structs.
#[derive(Debug, Default, Clone, PartialEq)]
pub struct StringPool(RefCell<HashSet<Box<str>>>);
impl StringPool {
pub fn empty() -> Self { StringPool::default() }
/// Adds the text to the string pool, returning a reference which will live
// as long as the [`StringPool`] itself.
pub fn intern<'pool>(&'pool self, text: &str) -> &'pool str {
let mut pool = self.0.borrow_mut();
let interned_string: &str = match pool.get(text) {
Some(existing_value) => &existing_value,
_ => {
let boxed_copy: Box<str> = text.into();
pool.insert(boxed_copy);
&pool.get(text).unwrap()
},
};
// SAFETY: by construction, it is safe to expand the string's
// lifetime to that of the StringPool.
//
// While the Box may move around when our hash set gets resized, the
// bytes making up the string will stay in the same place somewhere
// on the heap.
//
// Additionally, once a string is added to the pool it can never be
// removed.
//
// This means any &'pool pointers returned from this function will
// be valid until the StringPool is dropped.
unsafe {
return std::mem::transmute(interned_string);
}
}
}
And how it gets used:
use crate::{StringPool, TestCase};
use anyhow::{Context, Error};
use glob::Pattern;
use std::path::{Path, PathBuf};
pub fn load_from_disk<'s>(
test_root: &Path,
string_pool: &'s StringPool,
) -> Result<Vec<TestCase<'s>>, Error> {
log::debug!("Loading test fixtures from \"{}\"", test_root.display());
let mut test_cases = Vec::new();
let candidate_pattern = Pattern::new("*.input.ftl")?;
for entry in test_root.read_dir()? {
let entry = entry?;
let path = entry.path();
if candidate_pattern.matches_path(&path) {
let tc = load_test_case(path, test_root, string_pool)?;
test_cases.push(tc);
}
}
Ok(test_cases)
}
fn load_test_case<'s>(
input_file: PathBuf,
test_root: &Path,
string_pool: &'s StringPool,
) -> Result<TestCase<'s>, Error> {
let name = file_name(&input_file).ok_or_else(|| {
Error::msg(format!(
"Unable to get the filename for \"{}\"",
input_file.display()
))
})?;
let input = std::fs::read_to_string(&input_file).with_context(|| {
format!("Unable to read \"{}\"", input_file.display())
})?;
let input = string_pool.intern(&input);
let fixture = fluent_syntax::parser::parse(input).unwrap();
let output_file_name = test_root.join(&name).with_extension("output.ftl");
let expected_output = std::fs::read_to_string(&output_file_name)
.with_context(|| {
format!("Unable to read \"{}\"", input_file.display())
})?;
Ok(TestCase {
input_file,
name,
fixture,
expected_output,
})
}
fn file_name(path: &Path) -> Option<String> {
let stem = path.file_stem()?.to_str()?;
// we only want the text up to the first dot
let first_bit = match stem.find(".") {
Some(ix) => &stem[..ix],
None => stem,
};
Some(first_bit.to_string())
}