Description
What version of regex are you using?
Upgrading the dependency from regex = "1.8"
to
[dependencies.regex]
version = "1.9"
features = ["perf"]
with rayon = "1.7"
and compiled with rustc 1.71.1 (eb26296b5 2023-08-03)
Describe the bug at a high level.
Used parallelization on searches in battle tested way with the combo rayon -> 1.7 and regex -> 1.8, but after upgrading, the same code generates unexpected bug.
The paralellization is made by chunks of patterns, meaning that built regex are not shared across threads neither cloned, with something like:
...
.patterns
.par_chunks(regexset_chunk_size)
.enumerate()
.map(|(pattern_chunk_index, patterns)| {
let pattern_start_index: usize = pattern_chunk_index * regexset_chunk_size;
// BUILD
let (regex_built, partial_invalid_regex_index) =
build::BuiltRegex::build_new(self.regex_config, pattern_start_index, patterns);
let partial_found_regex = perform_regex_find(
®ex_built,
&self.preprocessed_texts,
&self.text_n_char,
0,
);
(partial_found_regex, partial_invalid_regex_index)
})
.reduce(
|| {
let zero_invalid_indexes: BTreeMap<usize, (String, RegexCompilingError)> =
BTreeMap::new();
(MatchesByTextPattern::new(), zero_invalid_indexes)
},
|(mut accum_matches, mut accum_invalid), (new_matches, new_invalid)| {
accum_matches.add(new_matches);
accum_invalid.extend(new_invalid);
(accum_matches, accum_invalid)
},
...
where BuiltRegex
is a struct that has regex::RegexSet
compiled (built in each chunk of patterns <-> thread), and a vec of regex::Regex
.
Log:
**thread '<unnamed>' panicked at 'invalid 'from' id: LazyStateID(134217600)', /builds/g6313/pytextrust/.cargo/registry/src/index.crates.io-6f17d22bba15001f/regex-automata-0.3.8/src/hybrid/dfa.rs:2563:9**
stack backtrace:
0: 0x7f53e4e192e1 - std::backtrace_rs::backtrace::libunwind::trace::h782cc21a5acaf6cb
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
1: 0x7f53e4e192e1 - std::backtrace_rs::backtrace::trace_unsynchronized::hc579eb24ab204515
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: 0x7f53e4e192e1 - std::sys_common::backtrace::_print_fmt::h7223525cfdbacda2
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/sys_common/backtrace.rs:65:5
3: 0x7f53e4e192e1 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::hbd7d55b7108d2ab8
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/sys_common/backtrace.rs:44:22
4: 0x7f53e4e3a4df - core::fmt::rt::Argument::fmt::hb4f4a02b9bd9dd49
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/core/src/fmt/rt.rs:138:9
5: 0x7f53e4e3a4df - core::fmt::write::h6d54cd7c9e155ec5
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/core/src/fmt/mod.rs:1094:21
6: 0x7f53e4e17071 - std::io::Write::write_fmt::h6a453a71c692f63b
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/io/mod.rs:1713:15
7: 0x7f53e4e190f5 - std::sys_common::backtrace::_print::h1cbaa8b42678f928
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/sys_common/backtrace.rs:47:5
8: 0x7f53e4e190f5 - std::sys_common::backtrace::print::h4ddf81241a51b337
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/sys_common/backtrace.rs:34:9
9: 0x7f53e4e1a597 - std::panicking::default_hook::{{closure}}::hff91f1f484ade5cd
10: 0x7f53e4e1a384 - std::panicking::default_hook::h21f14afd59f7aef9
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/panicking.rs:288:9
11: 0x7f53e4e1aa4c - std::panicking::rust_panic_with_hook::h45f66047b14c555c
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/panicking.rs:705:13
12: 0x7f53e4e1a947 - std::panicking::begin_panic_handler::{{closure}}::h49d1a88ef0908eb4
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/panicking.rs:597:13
13: 0x7f53e4e19716 - std::sys_common::backtrace::__rust_end_short_backtrace::hccebf9e57f8cc425
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/sys_common/backtrace.rs:151:18
14: 0x7f53e4e1a692 - rust_begin_unwind
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/panicking.rs:593:5
15: 0x7f53e4c59d93 - core::panicking::panic_fmt::h54ec9d0e3180a83d
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/core/src/panicking.rs:67:14
16: 0x7f53e4d36a79 - regex_automata::hybrid::dfa::Lazy::set_transition::h40d9f99cde8aad3b
17: 0x7f53e4c53a1b - regex_automata::hybrid::dfa::Lazy::cache_next_state::hee43c6cf1eebd065
18: 0x7f53e4d4f1b0 - regex_automata::hybrid::search::find_overlapping_fwd::hab272e92ae9fe7db
19: 0x7f53e4d32ef9 - regex_automata::meta::wrappers::HybridEngine::try_which_overlapping_matches::h021040f0c7f16ca3
20: 0x7f53e4d2b2eb - <regex_automata::meta::strategy::ReverseAnchored as regex_automata::meta::strategy::Strategy>::which_overlapping_matches::h16ff42eddbd22ab3
21: 0x7f53e4c6b39d - regex::regexset::string::RegexSet::matches_at::h8c4b9f5760adeede
22: 0x7f53e4c6dca4 - pytextrust::pkg::apply::perform_regex_find::h387779a4b727fd29
23: 0x7f53e4c6904d - core::ops::function::impls::<impl core::ops::function::FnMut<A> for &F>::call_mut::h8a1a242980b7d931
24: 0x7f53e4cb4103 - <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold::hf12286d40c59ba5a
25: 0x7f53e4c9c726 - rayon::iter::plumbing::bridge_producer_consumer::helper::h4f120d10ea2efc9b
26: 0x7f53e4cd0021 - rayon_core::job::StackJob<L,F,R>::run_inline::h29d9ac8b4cd69e32
27: 0x7f53e4cd0c95 - rayon_core::join::join_context::{{closure}}::hb5a83b3e0724fddb
28: 0x7f53e4cd1477 - rayon_core::registry::in_worker::h15b4b94eed70e110
29: 0x7f53e4c9c0f3 - rayon::iter::plumbing::bridge_producer_consumer::helper::h4f120d10ea2efc9b
30: 0x7f53e4cd0a02 - rayon_core::join::join_context::{{closure}}::hb5a83b3e0724fddb
31: 0x7f53e4cd1477 - rayon_core::registry::in_worker::h15b4b94eed70e110
32: 0x7f53e4c9c0f3 - rayon::iter::plumbing::bridge_producer_consumer::helper::h4f120d10ea2efc9b
33: 0x7f53e4cd0a02 - rayon_core::join::join_context::{{closure}}::hb5a83b3e0724fddb
34: 0x7f53e4cd1477 - rayon_core::registry::in_worker::h15b4b94eed70e110
35: 0x7f53e4c9c0f3 - rayon::iter::plumbing::bridge_producer_consumer::helper::h4f120d10ea2efc9b
36: 0x7f53e4cd0a02 - rayon_core::join::join_context::{{closure}}::hb5a83b3e0724fddb
37: 0x7f53e4cd1477 - rayon_core::registry::in_worker::h15b4b94eed70e110
38: 0x7f53e4c9c0f3 - rayon::iter::plumbing::bridge_producer_consumer::helper::h4f120d10ea2efc9b
39: 0x7f53e4cd0a02 - rayon_core::join::join_context::{{closure}}::hb5a83b3e0724fddb
40: 0x7f53e4cd1477 - rayon_core::registry::in_worker::h15b4b94eed70e110
41: 0x7f53e4c9c0f3 - rayon::iter::plumbing::bridge_producer_consumer::helper::h4f120d10ea2efc9b
42: 0x7f53e4cd63b4 - <rayon_core::job::StackJob<L,F,R> as rayon_core::job::Job>::execute::hdb767f879f7dcf76
43: 0x7f53e4c508f3 - rayon_core::registry::WorkerThread::wait_until_cold::h7efda915f27a9dea
44: 0x7f53e4cf3a38 - rayon_core::registry::ThreadBuilder::run::hcbd44bc14931b20d
45: 0x7f53e4cf59ea - std::sys_common::backtrace::__rust_begin_short_backtrace::hf0a4cb93a436a68b
46: 0x7f53e4cf852f - core::ops::function::FnOnce::call_once{{vtable.shim}}::h027bf9b01eb64595
47: 0x7f53e4e1d115 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::he3e5dbdfabe0b668
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/alloc/src/boxed.rs:1985:9
48: 0x7f53e4e1d115 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::h246f7c7964633611
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/alloc/src/boxed.rs:1985:9
49: 0x7f53e4e1d115 - std::sys::unix::thread::Thread::new::thread_start::hadf9e3501ff0df23
at /rustc/eb26296b556cef10fb713a38f3d16b9886080f26/library/std/src/sys/unix/thread.rs:108:17
50: 0x7f554b09b044 - start_thread
at ./nptl/pthread_create.c:442:8
51: 0x7f554b11b5fc - __GI___clone3
at ./misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
52: 0x0 - <unknown>
What are the steps to reproduce the behavior?
It is very hard to generate reproducible steps.
What is the expected behavior?
It was usually working fine before change of minor from v1.8 to v1.9 on regex crate with default features.
On the Cargo.lock, i can see changes:
- from memchr 2.5.0 to 2.6.3
- from regex 1.8.1 to 1.9.5 with regex-automata 0.3.8 (being the last one not a dependency on previous version v1.8 as it is not in the cargo lock).
Activity