Skip to content

Commit 017e200

Browse files
authored
perf(codegen): comprehensive optimization of print_minified_number method (#12847)
1 parent e15093c commit 017e200

File tree

3 files changed

+69
-37
lines changed

3 files changed

+69
-37
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/oxc_codegen/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ oxc_syntax = { workspace = true }
3232
bitflags = { workspace = true }
3333
cow-utils = { workspace = true }
3434
dragonbox_ecma = { workspace = true }
35+
itoa = { workspace = true }
3536
nonmax = { workspace = true }
3637
rustc-hash = { workspace = true }
3738

crates/oxc_codegen/src/lib.rs

Lines changed: 67 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
66
#![warn(missing_docs)]
77

8-
use std::{cmp, slice};
8+
use std::{borrow::Cow, cmp, slice};
99

10+
use cow_utils::CowUtils;
1011
use oxc_data_structures::pointer_ext::PointerExt;
1112

1213
mod binary_expr_visitor;
@@ -18,8 +19,6 @@ mod options;
1819
mod sourcemap_builder;
1920
mod str;
2021

21-
use std::borrow::Cow;
22-
2322
use oxc_ast::ast::*;
2423
use oxc_data_structures::{code_buffer::CodeBuffer, stack::Stack};
2524
use oxc_semantic::Scoping;
@@ -745,11 +744,7 @@ impl<'a> Codegen<'a> {
745744
self.print_str(buffer.format(num));
746745
self.need_space_before_dot = self.code_len();
747746
} else {
748-
let s = Self::get_minified_number(num, &mut buffer);
749-
self.print_str(&s);
750-
if !s.bytes().any(|b| matches!(b, b'.' | b'e' | b'x')) {
751-
self.need_space_before_dot = self.code_len();
752-
}
747+
self.print_minified_number(num, &mut buffer);
753748
}
754749
}
755750

@@ -760,14 +755,16 @@ impl<'a> Codegen<'a> {
760755
}
761756
}
762757

763-
// `get_minified_number` from terser
758+
// Optimized version of `get_minified_number` from terser
764759
// https://github.com/terser/terser/blob/c5315c3fd6321d6b2e076af35a70ef532f498505/lib/output.js#L2418
760+
// Instead of building all candidates and finding the shortest, we track the shortest as we go
761+
// and use self.print_str directly instead of returning intermediate strings
765762
#[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::cast_possible_wrap)]
766-
fn get_minified_number(num: f64, buffer: &mut dragonbox_ecma::Buffer) -> Cow<'_, str> {
767-
use cow_utils::CowUtils;
768-
763+
fn print_minified_number(&mut self, num: f64, buffer: &mut dragonbox_ecma::Buffer) {
769764
if num < 1000.0 && num.fract() == 0.0 {
770-
return Cow::Borrowed(buffer.format(num));
765+
self.print_str(buffer.format(num));
766+
self.need_space_before_dot = self.code_len();
767+
return;
771768
}
772769

773770
let mut s = buffer.format(num);
@@ -776,42 +773,75 @@ impl<'a> Codegen<'a> {
776773
s = &s[1..];
777774
}
778775

779-
let s = s.cow_replacen("e+", "e", 1);
780-
781-
let mut candidates = vec![s.clone()];
776+
let mut best_candidate = s.cow_replacen("e+", "e", 1);
777+
let mut best_len = best_candidate.len();
778+
let mut is_hex = false;
782779

780+
// Track the best candidate found so far
783781
if num.fract() == 0.0 {
784-
candidates.push(Cow::Owned(format!("0x{:x}", num as u128)));
782+
// For integers, check hex format and other optimizations
783+
let hex_candidate = format!("0x{:x}", num as u128);
784+
if hex_candidate.len() < best_len {
785+
is_hex = true;
786+
best_candidate = hex_candidate.into();
787+
best_len = best_candidate.len();
788+
}
785789
}
786-
787-
// create `1e-2`
788-
if s.starts_with(".0") {
789-
if let Some((i, _)) = s[1..].bytes().enumerate().find(|(_, c)| *c != b'0') {
790-
let len = i + 1; // `+1` to include the dot.
791-
let digits = &s[len..];
792-
candidates.push(Cow::Owned(format!("{digits}e-{}", digits.len() + len - 1)));
790+
// Check for scientific notation optimizations for numbers starting with ".0"
791+
else if best_candidate.starts_with(".0") {
792+
// Skip the first '0' since we know it's there from the starts_with check
793+
if let Some(i) = best_candidate.bytes().skip(2).position(|c| c != b'0') {
794+
let len = i + 2; // `+2` to include the dot and first zero.
795+
let digits = &best_candidate[len..];
796+
let exp = digits.len() + len - 1;
797+
let exp_str_len = itoa::Buffer::new().format(exp).len();
798+
// Calculate expected length: digits + 'e-' + exp_length
799+
let expected_len = digits.len() + 2 + exp_str_len;
800+
if expected_len < best_len {
801+
best_candidate = format!("{digits}e-{exp}").into();
802+
debug_assert_eq!(best_candidate.len(), expected_len);
803+
best_len = best_candidate.len();
804+
}
793805
}
794806
}
795807

796-
// create 1e2
797-
if s.ends_with('0') {
798-
if let Some((len, _)) = s.bytes().rev().enumerate().find(|(_, c)| *c != b'0') {
799-
candidates.push(Cow::Owned(format!("{}e{len}", &s[0..s.len() - len])));
808+
// Check for numbers ending with zeros (but not hex numbers)
809+
// The `!is_hex` check is necessary to prevent hex numbers like `0x8000000000000000`
810+
// from being incorrectly converted to scientific notation
811+
if !is_hex && best_candidate.ends_with('0') {
812+
if let Some(len) = best_candidate.bytes().rev().position(|c| c != b'0') {
813+
let base = &best_candidate[0..best_candidate.len() - len];
814+
let exp_str_len = itoa::Buffer::new().format(len).len();
815+
// Calculate expected length: base + 'e' + len
816+
let expected_len = base.len() + 1 + exp_str_len;
817+
if expected_len < best_len {
818+
best_candidate = format!("{base}e{len}").into();
819+
debug_assert_eq!(best_candidate.len(), expected_len);
820+
best_len = expected_len;
821+
}
800822
}
801823
}
802824

803-
// `1.2e101` -> ("1", "2", "101")
804-
// `1.3415205933077406e300` -> `13415205933077406e284;`
805-
if let Some((integer, point, exponent)) =
806-
s.split_once('.').and_then(|(a, b)| b.split_once('e').map(|e| (a, e.0, e.1)))
825+
// Check for scientific notation optimization: `1.2e101` -> `12e100`
826+
if let Some((integer, point, exponent)) = best_candidate
827+
.split_once('.')
828+
.and_then(|(a, b)| b.split_once('e').map(|e| (a, e.0, e.1)))
807829
{
808-
candidates.push(Cow::Owned(format!(
809-
"{integer}{point}e{}",
810-
exponent.parse::<isize>().unwrap() - point.len() as isize
811-
)));
830+
let new_expr = exponent.parse::<isize>().unwrap() - point.len() as isize;
831+
let new_exp_str_len = itoa::Buffer::new().format(new_expr).len();
832+
// Calculate expected length: integer + point + 'e' + new_exp_str_len
833+
let expected_len = integer.len() + point.len() + 1 + new_exp_str_len;
834+
if expected_len < best_len {
835+
best_candidate = format!("{integer}{point}e{new_expr}").into();
836+
debug_assert_eq!(best_candidate.len(), expected_len);
837+
}
812838
}
813839

814-
candidates.into_iter().min_by_key(|c| c.len()).unwrap()
840+
// Print the best candidate and update need_space_before_dot
841+
self.print_str(&best_candidate);
842+
if !best_candidate.bytes().any(|b| matches!(b, b'.' | b'e' | b'x')) {
843+
self.need_space_before_dot = self.code_len();
844+
}
815845
}
816846

817847
fn add_source_mapping(&mut self, span: Span) {

0 commit comments

Comments
 (0)