/// This parser is designed to work inside the `nom::sequence::delimited` parser, e.g.:
/// `nom::sequence::delimited(tag("("), take_until_unmatched('(', ')'), tag(")"))(i)`
/// It skips nested brackets until it finds an extra closing bracket.
/// This function is very similar to `nom::bytes::complete::take_until(")")`, except
/// it also takes nested brackets.
/// Escaped brackets e.g. `\(` and `\)` are not considered as brackets and are taken by
/// default.
pub fn take_until_unmatched(
opening_bracket: char,
closing_bracket: char,
) -> impl Fn(&str) -> IResult<&str, &str> {
move |i: &str| {
let mut index = 0;
let mut bracket_counter = 0;
while let Some(n) = &i[index..].find(&[opening_bracket, closing_bracket, '\\'][..]) {
index += n;
let mut it = i[index..].chars();
match it.next().unwrap_or_default() {
c if c == '\\' => {
// Skip the escape char `\`.
index += '\\'.len_utf8();
// Skip also the following char.
let c = it.next().unwrap_or_default();
index += c.len_utf8();
}
c if c == opening_bracket => {
bracket_counter += 1;
index += opening_bracket.len_utf8();
}
c if c == closing_bracket => {
// Closing bracket.
bracket_counter -= 1;
index += closing_bracket.len_utf8();
}
// Can not happen.
_ => unreachable!(),
};
// We found the unmatched closing bracket.
if bracket_counter == -1 {
// We do not consume it.
index -= closing_bracket.len_utf8();
return Ok((&i[index..], &i[0..index]));
};
}
if bracket_counter == 0 {
Ok(("", i))
} else {
Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))
}
}
}
Thanks for the hint. I am actually using it already in my project: tp-note/sse_server.rs
My use case is, that I want to find and analyze as quick as possible the first hyperlink in Markdown or RestructuredText notation. Finally, this is much more complicated than I expected. Especially, when it comes to link references, because these can span multiple lines, etc. Then, many tokens have different escaping rules, ...
The good thing is, I learn about parsing with Nom 6. I found it hard to get started, but actually, it is a very natural approach to parsing: The more you dive into the details of the specification,
the more the added the parser combinator gets detailed.
use nom::bytes::complete::tag;
use nom::sequence::delimited;
use parse_hyperlinks::take_until_unbalanced;
let mut parser = delimited(tag("<"), take_until_unbalanced('<', '>'), tag(">"));
assert_eq!(parser("<<inside>inside>abc"), Ok(("abc", "<inside>inside")));