I have this format, the result should be of Item data type,
test 1: "<A \"Test\">"
test 2: r#"<A "Test">"#
Result: Item { item_type: TEXT, ascii_data: Some("Test") }
test 3: <A>
Result: Item { item_type: TEXT, ascii_data: None }
For test 1 and test 2, following code parse, but for type 3, it is failing,
It also contains nested types.
<L
<A "Test1">
<L
<A>
<A "Test2">
>
<A "Test3">
>
Result:
Item {
item_type: LIST,
sub_items: [
Item {
item_type: ASCII,
ascii_data: "Test1",
},
Item {
item_type: LIST,
sub_items: [
Item {
item_type: ASCII,
ascii_data: None,
}
Item {
item_type: ASCII,
ascii_data: "Test2",
}
],
},
Item {
item_type: ASCII,
ascii_data: "Test3",
},
],
}
Rust Playgroud link
use nom::{
branch::alt,
bytes::complete::{tag, take_until},
character::complete::multispace0,
combinator::map,
IResult,
};
#[derive(Clone, Debug, PartialEq)]
enum ItemType {
LIST,
TEXT,
NONE,
}
#[derive(Clone, Debug, PartialEq)]
struct Item {
item_type: ItemType,
sub_items: Option<Vec<Item>>,
ascii_data: Option<String>,
}
impl Default for Item {
fn default() -> Self {
Item {
item_type: ItemType::NONE,
sub_items: None,
ascii_data: None,
}
}
}
// Parse string data, it may empty then return none,
fn parse_ascii_data(input: &str) -> IResult<&str, String> {
let (input, _) = tag("\"")(input)?;
let (input, ascii_data) = take_until("\"")(input)?;
let (input, _) = tag("\"")(input)?;
Ok((input, ascii_data.to_string()))
}
// Parse <A> or <A "string">, if no string then return empty string then return none
fn parse_ascii_item(input: &str) -> IResult<&str, Item> {
let (input, _) = tag("<A")(input)?;
let (input, _) = multispace0(input)?;
let (input, ascii_data) = alt((parse_ascii_data, map(tag("\"\""), |_| "".to_string())))(input)?;
let (input, _) = tag(">")(input)?;
Ok((
input,
Item {
item_type: ItemType::TEXT,
ascii_data: Some(ascii_data),
..Default::default()
},
))
}
// Parse <L> or <L <A "string">>, if no string then return empty string then return none
fn parse_list_item(input: &str) -> IResult<&str, Item> {
let (input, _) = tag("<L")(input)?;
let (input, _) = multispace0(input)?;
let (input, sub_items) = alt((parse_ascii_item, map(tag("<>"), |_| Item::default())))(input)?;
let (input, _) = tag(">")(input)?;
Ok((
input,
Item {
item_type: ItemType::LIST,
sub_items: Some(vec![sub_items]),
..Default::default()
},
))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_ascii_item() {
let input = "<A \"Test\">";
let expected_item = Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
..Default::default()
};
assert_eq!(parse_ascii_item(input), Ok(("", expected_item)));
let input = r#"<A "Test">"#;
let expected_item = Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
..Default::default()
};
assert_eq!(parse_ascii_item(input), Ok(("", expected_item)));
assert_eq!(
parse_ascii_item("<A>"),
Ok((
"",
Item {
item_type: ItemType::TEXT,
ascii_data: None,
..Default::default()
}
))
);
}
#[test]
fn test_parse_list_item() {
let input = "<L <A \"Test\">>";
let expected_item = Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
..Default::default()
}]),
..Default::default()
};
assert_eq!(parse_list_item(input), Ok(("", expected_item)));
assert_eq!(
parse_list_item("<L>"),
Ok((
"",
Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item::default()]),
..Default::default()
}
))
);
}
#[test]
fn test_parse_nested_list_item() {
let input = "<L \n <A \"Test1\">\n <L\n <A \"Test2\">\n >\n>";
let expected_item = Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item {
item_type: ItemType::TEXT,
ascii_data: Some("Test".to_string()),
..Default::default()
}]),
..Default::default()
};
assert_eq!(parse_list_item(input), Ok(("", expected_item)));
assert_eq!(
parse_list_item("<L>"),
Ok((
"",
Item {
item_type: ItemType::LIST,
sub_items: Some(vec![Item::default()]),
..Default::default()
}
))
);
}
}