Skip to content
134 changes: 133 additions & 1 deletion crates/oxc_ecmascript/src/constant_evaluation/call_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,49 @@ use cow_utils::CowUtils;

use crate::{
StringCharAt, StringCharAtResult, StringCharCodeAt, StringIndexOf, StringLastIndexOf,
StringSubstring, ToInt32, ToJsString as ToJsStringTrait, side_effects::MayHaveSideEffects,
StringSubstring, ToInt32, ToJsString as ToJsStringTrait,
constant_evaluation::url_encoding::{
decode_uri_chars, encode_uri_chars, is_uri_always_unescaped,
},
side_effects::MayHaveSideEffects,
};

use super::{ConstantEvaluation, ConstantEvaluationCtx, ConstantValue};

fn try_fold_url_related_function<'a>(
ident: &IdentifierReference<'a>,
arguments: &Vec<'a, Argument<'a>>,
ctx: &impl ConstantEvaluationCtx<'a>,
) -> Option<ConstantValue<'a>> {
match ident.name.as_str() {
"encodeURI" if ctx.is_global_reference(ident) == Some(true) => {
try_fold_encode_uri(arguments, ctx)
}
"encodeURIComponent" if ctx.is_global_reference(ident) == Some(true) => {
try_fold_encode_uri_component(arguments, ctx)
}
"decodeURI" if ctx.is_global_reference(ident) == Some(true) => {
try_fold_decode_uri(arguments, ctx)
}
"decodeURIComponent" if ctx.is_global_reference(ident) == Some(true) => {
try_fold_decode_uri_component(arguments, ctx)
}
_ => None,
}
}

pub fn try_fold_known_global_methods<'a>(
callee: &Expression<'a>,
arguments: &Vec<'a, Argument<'a>>,
ctx: &impl ConstantEvaluationCtx<'a>,
) -> Option<ConstantValue<'a>> {
if let Expression::Identifier(ident) = callee {
if let Some(result) = try_fold_url_related_function(ident, arguments, ctx) {
return Some(result);
}
return None;
}

let (name, object) = match callee {
Expression::StaticMemberExpression(member) if !member.optional => {
(member.property.name.as_str(), &member.object)
Expand Down Expand Up @@ -499,3 +532,102 @@ fn try_fold_math_variadic<'a>(
};
Some(ConstantValue::Number(result))
}

fn try_fold_encode_uri<'a>(
args: &Vec<'a, Argument<'a>>,
ctx: &impl ConstantEvaluationCtx<'a>,
) -> Option<ConstantValue<'a>> {
if args.is_empty() {
return Some(ConstantValue::String(Cow::Borrowed("undefined")));
}
if args.len() != 1 {
return None;
}
let arg = args.first()?;
let expr = arg.as_expression()?;
let string_value = expr.get_side_free_string_value(ctx)?;

// SAFETY: should_encode only returns false for ascii chars
let encoded = unsafe {
encode_uri_chars(
string_value,
#[inline(always)]
|c| match c {
c if is_uri_always_unescaped(c) => false,
b';' | b'/' | b'?' | b':' | b'@' | b'&' | b'=' | b'+' | b'$' | b',' | b'#' => false,
_ => true,
},
)
};
Some(ConstantValue::String(encoded))
}

fn try_fold_encode_uri_component<'a>(
args: &Vec<'a, Argument<'a>>,
ctx: &impl ConstantEvaluationCtx<'a>,
) -> Option<ConstantValue<'a>> {
if args.is_empty() {
return Some(ConstantValue::String(Cow::Borrowed("undefined")));
}
if args.len() != 1 {
return None;
}
let arg = args.first()?;
let expr = arg.as_expression()?;
let string_value = expr.get_side_free_string_value(ctx)?;

// SAFETY: should_encode only returns false for ascii chars
let encoded = unsafe {
encode_uri_chars(
string_value,
#[inline(always)]
|c| !is_uri_always_unescaped(c),
)
};
Some(ConstantValue::String(encoded))
}

fn try_fold_decode_uri<'a>(
args: &Vec<'a, Argument<'a>>,
ctx: &impl ConstantEvaluationCtx<'a>,
) -> Option<ConstantValue<'a>> {
if args.is_empty() {
return Some(ConstantValue::String(Cow::Borrowed("undefined")));
}
if args.len() != 1 {
return None;
}
let arg = args.first()?;
let expr = arg.as_expression()?;
let string_value = expr.get_side_free_string_value(ctx)?;

let decoded = decode_uri_chars(
string_value,
#[inline(always)]
|c| matches!(c, b';' | b',' | b'/' | b'?' | b':' | b'@' | b'&' | b'=' | b'+' | b'$' | b'#'),
)?;
Some(ConstantValue::String(decoded))
}

fn try_fold_decode_uri_component<'a>(
args: &Vec<'a, Argument<'a>>,
ctx: &impl ConstantEvaluationCtx<'a>,
) -> Option<ConstantValue<'a>> {
if args.is_empty() {
return Some(ConstantValue::String(Cow::Borrowed("undefined")));
}
if args.len() != 1 {
return None;
}
let arg = args.first()?;
let expr = arg.as_expression()?;
let string_value = expr.get_side_free_string_value(ctx)?;

// decodeURIComponent decodes all percent-encoded sequences
let decoded = decode_uri_chars(
string_value,
#[inline(always)]
|_| false,
)?;
Some(ConstantValue::String(decoded))
}
1 change: 1 addition & 0 deletions crates/oxc_ecmascript/src/constant_evaluation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod call_expr;
mod equality_comparison;
mod is_int32_or_uint32;
mod is_literal_value;
mod url_encoding;
mod value;
mod value_type;

Expand Down
104 changes: 104 additions & 0 deletions crates/oxc_ecmascript/src/constant_evaluation/url_encoding/dec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// based on https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/src/dec.rs#L21
// MIT license: https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/LICENSE

use std::borrow::Cow;
use std::panic::panic_any;

/// Implements <https://tc39.es/ecma262/2025/multipage/global-object.html#sec-decode>
#[inline]
pub fn decode(
data_str: Cow<'_, str>,
should_not_decode: impl Fn(u8) -> bool,
) -> Option<Cow<'_, str>> {
let data = data_str.as_bytes();
let offset = data.iter().take_while(|&&c| c != b'%').count();
if offset >= data.len() {
return Some(data_str);
}

let mut decoded = Vec::new();
if decoded.try_reserve(data.len()).is_err() {
panic_any("OOM"); // more efficient codegen than built-in OOM handler
}
let mut out = NeverRealloc(&mut decoded);

let (ascii, mut data) = data.split_at(offset);
out.extend_from_slice(ascii);

loop {
let mut parts = data.splitn(2, |&c| c == b'%');
// first the decoded non-% part
let non_escaped_part = parts.next().unwrap();
let rest = parts.next();
if rest.is_none() && out.0.is_empty() {
// if empty there were no '%' in the string
return Some(data_str);
}
out.extend_from_slice(non_escaped_part);

// then decode one %xx
match rest {
Some(rest) => {
let Some(&[first, second]) = rest.get(0..2) else {
// 4.c.i.
return None;
};
let (Some(first_val), Some(second_val)) =
(from_hex_digit(first), from_hex_digit(second))
else {
// 4.c.iii.
return None;
};
let char = (first_val << 4) | second_val;
if should_not_decode(char) {
out.extend_from_slice(&[b'%', first, second]);
} else {
out.push(char);
}
data = &rest[2..];
}
None => break,
}
}
Some(Cow::Owned(String::from_utf8(decoded).ok()?))
}

#[inline]
fn from_hex_digit(digit: u8) -> Option<u8> {
match digit {
b'0'..=b'9' => Some(digit - b'0'),
b'A'..=b'F' => Some(digit - b'A' + 10),
b'a'..=b'f' => Some(digit - b'a' + 10),
_ => None,
}
}

struct NeverRealloc<'a, T>(pub &'a mut Vec<T>);

impl<T> NeverRealloc<'_, T> {
#[inline]
pub fn push(&mut self, val: T) {
// these branches only exist to remove redundant reallocation code
// (the capacity is always sufficient)
if self.0.len() != self.0.capacity() {
self.0.push(val);
}
}

#[inline]
pub fn extend_from_slice(&mut self, val: &[T])
where
T: Clone,
{
if self.0.capacity() - self.0.len() >= val.len() {
self.0.extend_from_slice(val);
}
}
}

#[test]
fn dec_borrows() {
assert!(matches!(decode("hello".into(), |_| false), Some(Cow::Borrowed("hello"))));
assert!(matches!(decode("hello%20".into(), |_| false), Some(Cow::Owned(s)) if s == "hello "));
assert!(matches!(decode("%20hello".into(), |_| false), Some(Cow::Owned(s)) if s == " hello"));
}
79 changes: 79 additions & 0 deletions crates/oxc_ecmascript/src/constant_evaluation/url_encoding/enc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Based on https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/src/enc.rs
// MIT license: https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/LICENSE

use std::borrow::Cow;

/// Implements <https://tc39.es/ecma262/2025/multipage/global-object.html#sec-encode>
/// # Safety
/// `should_encode` should only return false for characters that are ascii
#[must_use]
pub unsafe fn encode(data_str: Cow<'_, str>, should_encode: impl Fn(u8) -> bool) -> Cow<'_, str> {
let data = data_str.as_bytes();
// add maybe extra capacity, but try not to exceed allocator's bucket size
let mut escaped = String::new();
let _ = escaped.try_reserve(data.len() | 15);
let unmodified = encode_into(data, should_encode, |s| {
escaped.push_str(s);
});
if unmodified {
return data_str;
}
Cow::Owned(escaped)
}

fn encode_into(
mut data: &[u8],
should_encode: impl Fn(u8) -> bool,
mut push_str: impl FnMut(&str),
) -> bool {
let mut pushed = false;
loop {
// Fast path to skip over safe chars at the beginning of the remaining string
let ascii_len = data.iter().take_while(|&&c| !should_encode(c)).count();

let (safe, rest) = if ascii_len >= data.len() {
if !pushed {
return true;
}
(data, &[][..]) // redundant to optimize out a panic in split_at
} else {
data.split_at(ascii_len)
};
pushed = true;
if !safe.is_empty() {
// SAFETY: should_encode has checked it's ASCII
push_str(unsafe { str::from_utf8_unchecked(safe) });
}
if rest.is_empty() {
break;
}

match rest.split_first() {
Some((byte, rest)) => {
let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)];
// SAFETY: `%` is a valid UTF-8 char and to_hex_digit returns a valid UTF-8 char
push_str(unsafe { str::from_utf8_unchecked(enc) });
data = rest;
}
None => break,
}
}
false
}

#[inline]
fn to_hex_digit(digit: u8) -> u8 {
match digit {
0..=9 => b'0' + digit,
10..=255 => b'A' - 10 + digit,
}
}

/// `alwaysUnescaped` in `Encode`
/// <https://tc39.es/ecma262/2025/multipage/global-object.html#sec-encode>
const URI_ALWAYS_UNESCAPED: &[u8] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-.!~*'()";

pub fn is_uri_always_unescaped(c: u8) -> bool {
URI_ALWAYS_UNESCAPED.contains(&c)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod dec;
mod enc;

pub use dec::decode as decode_uri_chars;
pub use enc::{encode as encode_uri_chars, is_uri_always_unescaped};
Loading
Loading