Skip to content

Commit

Permalink
perf(parser): optimize Lexer::hex_digit
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Jul 31, 2024
1 parent bb33bcc commit 71b4548
Showing 1 changed file with 23 additions and 7 deletions.
30 changes: 23 additions & 7 deletions crates/oxc_parser/src/lexer/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,30 @@ impl<'a> Lexer<'a> {
}

fn hex_digit(&mut self) -> Option<u32> {
let value = match self.peek_byte() {
Some(b @ b'0'..=b'9') => u32::from(b) - '0' as u32,
Some(b @ b'a'..=b'f') => 10 + (u32::from(b) - 'a' as u32),
Some(b @ b'A'..=b'F') => 10 + (u32::from(b) - 'A' as u32),
_ => return None,
// Reduce instructions and remove 1 branch by comparing against `A-F` and `a-f` simultaneously
// https://godbolt.org/z/9caMMzvP3
let value = if let Some(b) = self.peek_byte() {
if matches!(b, b'0'..=b'9') {
b - b'0'
} else {
// Match `A-F` or `a-f`. `b | 32` converts uppercase letters to lowercase,
// but leaves lowercase as they are
let lower_case = b | 32;
if matches!(lower_case, b'a'..=b'f') {
lower_case + 10 - b'a'
} else {
return None;
}
}
} else {
return None;
};
self.consume_char();
Some(value)
// Because of `b | 32` above, compiler cannot deduce that next byte is definitely ASCII
// so `next_byte_unchecked` is necessary to produce compact assembly, rather than `consume_char`.
// SAFETY: This code is only reachable if there is a byte remaining, and it's ASCII.
// Therefore it's safe to consume that byte, and will leave position on a UTF-8 char boundary.
unsafe { self.source.next_byte_unchecked() };
Some(u32::from(value))
}

fn code_point(&mut self) -> Option<u32> {
Expand Down

0 comments on commit 71b4548

Please sign in to comment.