Skip to content

Commit

Permalink
perf(token): Don't allow unbounded backtrackable parsing
Browse files Browse the repository at this point in the history
In some test data for rinja, they check some parsing corner cases.
Unfortunately for us, also hit a performance corner case.
The entire file was a valid email username but without an `@`.
This mean for every byte, we checked that every byte after it was a
valid username but then backtracked at the end, repeating this until the
whole file was read.

Fixes #1088
  • Loading branch information
epage committed Aug 30, 2024
1 parent 773e4aa commit bf98193
Showing 1 changed file with 18 additions and 7 deletions.
25 changes: 18 additions & 7 deletions crates/typos/src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ mod parser {
use winnow::stream::StreamIsPartial;
use winnow::token::{one_of, take_while};

/// Avoid worst-case parse times by limiting how much a `take_while` can take if something
/// later may cause it to fail.
const NON_TERMINATING_CAP: usize = 1024;

pub(crate) fn next_identifier<T>(input: &mut T) -> PResult<<T as Stream>::Slice, ()>
where
T: Compare<char>,
Expand Down Expand Up @@ -446,7 +450,7 @@ mod parser {
trace(
"email",
(
take_while(1.., is_localport_char),
take_while(1..NON_TERMINATING_CAP, is_localport_char),
'@',
take_while(1.., is_domain_char),
)
Expand All @@ -466,15 +470,18 @@ mod parser {
"url",
(
opt((
take_while(1.., is_scheme_char),
take_while(1..NON_TERMINATING_CAP, is_scheme_char),
// HACK: Technically you can skip `//` if you don't have a domain but that would
// get messy to support.
(':', '/', '/'),
)),
(
opt((url_userinfo, '@')),
take_while(1.., is_domain_char),
opt((':', take_while(1.., AsChar::is_dec_digit))),
take_while(1..NON_TERMINATING_CAP, is_domain_char),
opt((
':',
take_while(1..NON_TERMINATING_CAP, AsChar::is_dec_digit),
)),
),
'/',
// HACK: Too lazy to enumerate
Expand All @@ -495,8 +502,8 @@ mod parser {
trace(
"userinfo",
(
take_while(1.., is_localport_char),
opt((':', take_while(0.., is_localport_char))),
take_while(1..NON_TERMINATING_CAP, is_localport_char),
opt((':', take_while(0..NON_TERMINATING_CAP, is_localport_char))),
)
.take(),
)
Expand All @@ -515,7 +522,11 @@ mod parser {
// incorrectly, we opt for just not evaluating it at all.
trace(
"escape",
(take_while(1.., is_escape), take_while(0.., is_xid_continue)).take(),
(
take_while(1..NON_TERMINATING_CAP, is_escape),
take_while(0.., is_xid_continue),
)
.take(),
)
.parse_next(input)
}
Expand Down

0 comments on commit bf98193

Please sign in to comment.