Detect Regex conflict

It does seem to work !

use regex_automata::{RegexBuilder, DFA};
use std::collections::HashSet;

/// Returns `true` if the two regexes overlap
pub fn regex_overlap(pattern1: &str, pattern2: &str) -> bool {
    let mut builder = RegexBuilder::new();
    builder.minimize(true).anchored(true);

    let re1 = builder.build(pattern1).unwrap();
    let re2 = builder.build(pattern2).unwrap();

    let dfa1 = re1.forward();
    let dfa2 = re2.forward();

    let mut visited_states = HashSet::new();
    let mut to_process = vec![(dfa1.start_state(), dfa2.start_state())];
    visited_states.insert((dfa1.start_state(), dfa2.start_state()));
    if dfa1.is_match_state(dfa1.start_state()) && dfa2.is_match_state(dfa2.start_state()) {
        return true;
    }
    while let Some(current_state) = to_process.pop() {
        for input in 0..=u8::MAX {
            let next_state = (
                dfa1.next_state(current_state.0, input),
                dfa2.next_state(current_state.1, input),
            );
            if dfa1.is_match_state(next_state.0) && dfa2.is_match_state(next_state.1) {
                return true;
            }
            if !visited_states.contains(&next_state) {
                visited_states.insert(next_state);
                to_process.push((
                    dfa1.next_state(current_state.0, input),
                    dfa2.next_state(current_state.1, input),
                ));
            }
        }
    }

    false
}

#[test]
fn overlapping_regexes() {
    let pattern1 = r"[a-zA-Z]+";
    let pattern2 = r"[a-z]+";
    assert!(regex_overlap(pattern1, pattern2));
    let pattern1 = r"a*";
    let pattern2 = r"b*";
    assert!(regex_overlap(pattern1, pattern2));
    let pattern1 = r"a*bba+";
    let pattern2 = r"b*aaab+a";
    assert!(regex_overlap(pattern1, pattern2));
    let pattern1 = r" ";
    let pattern2 = r"\s";
    assert!(regex_overlap(pattern1, pattern2));
    let pattern1 = r"[A-Z]+";
    let pattern2 = r"[a-z]+";
    assert!(!regex_overlap(pattern1, pattern2));
    let pattern1 = r"a";
    let pattern2 = r"b";
    assert!(!regex_overlap(pattern1, pattern2));
    let pattern1 = r"a*bba+";
    let pattern2 = r"b*aaabbb+a";
    assert!(!regex_overlap(pattern1, pattern2));
    let pattern1 = r"\s+";
    let pattern2 = r"a+";
    assert!(!regex_overlap(pattern1, pattern2));
}

Unfortunately every possible transition in u8 has to be tested, so it might not always perform well. But that's not that big an issue for me (and well, this test still completes in <0.1s in debug on my machine, so good enough).

1 Like