I am writing a rust program and have this code
use std::cmp::Ordering;
use std::collections::VecDeque;
use fancy_regex::Regex;
use anyhow::{anyhow, Error, Context, Result};
#[derive(PartialEq, Eq, Debug)]
struct ParameterEntry {
id: u32,
name: String,
entries: Vec<CategoryEntry>,
}
#[derive(PartialEq, Eq, Debug)]
struct CategoryEntry {
id: u32,
data_type: String,
name: String,
optional_value: Option<String>,
}
fn parse_parameters_slice(parameter_slice: &str) -> Vec<ParameterEntry> {
let re_category = Regex::new(r"\[id = (\d+)\]\s+Category\s+(\w+)\s*\{((?:[^}]|\}(?!;))*\}\s*;)").unwrap();
let re_entry = Regex::new(r"(?m)\[id = (\d+)\]\s*(\w+)\s*(\w+)\s*(?:\{([^;]*)\})?\s*;").unwrap();
let mut parameter_entries = Vec::new();
for cap in re_category.captures_iter(parameter_slice) {
match cap {
Ok(cap) => {
let mut category_entries = Vec::new();
let outer_id: u32 = cap[1].parse().unwrap_or_else(|_| panic!("Failed to convert {} to a u32", &cap[1]));
let outer_name: &str = &cap[2];
let inner_text: &str = &cap[3];
//dbg!(inner_text);
if outer_id == 1234 {
dbg!(&cap);
}
//println!("outer id: {}, outer name: {}", outer_id, outer_name);
for inner_cap in re_entry.captures_iter(inner_text) {
match inner_cap {
Ok(inner_cap) => {
let inner_id: u32 = inner_cap[1].parse().unwrap_or_else(|_| panic!("Failed to convert {} to a u32", &inner_cap[1]));
let inner_type: &str = &inner_cap[2];
let inner_name: &str = &inner_cap[3];
let optional_value = inner_cap.get(4).map(|m| m.as_str().to_string()); // Capture the optional value inside the curly brackets.
if outer_id == 1234 {
dbg!(&inner_cap);
}
//println!("inner id: {}, inner type: {}, inner controller: {}, optional_value: {:?}", inner_id, inner_type, inner_name, &optional_value);
category_entries.push(CategoryEntry { id: inner_id, data_type: inner_type.to_string(), name: inner_name.to_string(), optional_value });
}
Err(e) => eprintln!("Inner error: {:?}", e),
}
}
parameter_entries.push(ParameterEntry { id: outer_id, name: outer_name.to_string(), entries: category_entries });
}
Err(e) => eprintln!("Outer Error: {:?}", e),
}
}
parameter_entries
}
fn main() -> anyhow::Result<()> {
let input_string = r#"Parameters
{
[id = 1234]
Category Test
{
[id = 1] byte TestCaseByte;
[id = 2] double TestCaseDouble {min = 0, max = 32767};
[id = 3] float TestCaseFloat;
[id = 4] short TestCaseShort {min = 0, max = 32767};
[id = 5] string TestCaseString {minsize = 0, maxsize = 25};
};
};"#;
let parameter_entries = parse_parameters_slice(input_string);
//dbg!(parameter_entries);
Ok(())
}
The re_category works and completely captures the parameter block, even if the presence of multiple category blocks. The problem is that the re_entry regex captures up until the 1st optional '{}'. I have confirmed this by removing the optional '{}' block from the [id=2] line and re_entry captures up until [id=4]. If I remove the one from 4, it captures up until 5. If none have the optional block, all are captured successfully. The rules are there can be zero or more whitespace characters (tabs, spaces, etc...),
- followed by '[' followed by zero or more whitespace characters,
- followed by "id", followed by zero or more whitespace characters,
- followed by "=", followed by zero or more whitespace characters,
- followed by a number, followed by zero or more whitespace characters,
- followed by ']'
- followed by one or more whitespace characters,
- followed by a word (typical variable_type naming rules, must start with a letter, but after words can contain alphanumeric characters, including '_', but no spaces) followed by zero or more whitespace characters,
- followed by one or more whitespace characters,
- followed by by a word (typical variable_name naming rules, must start with a letter, but after words can contain alphanumeric characters, including '_', but no spaces) followed by zero or more whitespace characters,
- followed by zero or more whitespace characters,
- followed by an optional '{}' block that will contain other keywords that follow a similar pattern to above for example { min = 0, max = 32767, default = 1}
- followed by zero or more whitespace characters,
- The inner id lines will always be terminated by a ;
- The outer terminating "};" will always be on its own line as is shown in the example
So how can I modify the re_entry regex to capture the inner ids.