-
Notifications
You must be signed in to change notification settings - Fork 488
Description
This fails, but it should succeed:
use regex_automata::{meta::regex, nfa::thompson::pikevm::pikevm};
fn main() {
env_logger::init();
let pattern = r".+\b\n";
let haystack = "β77\n";
let baseline = pikevm::new(pattern).unwrap();
let mut cache = baseline.create_cache();
let re = regex::new(pattern).unwrap();
let found1 = re.find(haystack);
let found2 = baseline.find(&mut cache, haystack);
if let some(found1) = found1 {
let found2 = found2.expect("found in target, but not in baseline!");
assert_eq!(found1, found2);
}
}From looking at RUST_LOG=trace cargo run, my guess (without looking at the code yet) is that something in the reverse suffix optimization isn't handling the DFA quit error correctly. That is, when it does a reverse scan after a literal match, it's stopping its search for the starting point early... Probably because of the beta character (a non-ASCII codepoint). The higher level code should see it as a quit error and fall back to another strategy but instead it's seeing it as a correct match. Or perhaps a quit error isn't being returned at all somehow.
Ahhhhhhhhhhhh yeah it's not returning a quit error:
regex/regex-automata/src/meta/limited.rs
Lines 123 to 126 in 855c5c4
| } else if sid.is_quit() { | |
| if mat.is_some() { | |
| return Ok(mat); | |
| } |
This also afflicts the "stop at" engine which is used in the reverse inner optimization:
regex/regex-automata/src/meta/stopat.rs
Lines 124 to 127 in 855c5c4
| } else if sid.is_quit() { | |
| if mat.is_some() { | |
| return Ok(mat.ok_or(at)); | |
| } |
And afflicts the fully compiled DFAs for both of those as well.