-
-
Notifications
You must be signed in to change notification settings - Fork 722
feat(ecmascript): add URI encoding/decoding support to constant evaluation #12934
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Boshen
merged 12 commits into
main
from
copilot/fix-8bf3e9d5-2736-45fa-9ede-2d554baeb25b
Aug 10, 2025
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
d6729f8
Initial plan
Copilot 809f19b
Add support for decodeURI, decodeURIComponent, encodeURI, encodeURICo…
Copilot 651ca34
Address PR review feedback: refactor global function handling and opt…
Copilot 0c1a559
refactor: move global function handling into try_fold_url_related_fun…
Copilot 47b9ef0
Address PR review feedback: improve URI constant optimization and fix…
Copilot da5ba9e
update
sapphi-red 9615b76
update
sapphi-red 79ff4e3
update
sapphi-red f0a63d3
[autofix.ci] apply automated fixes
autofix-ci[bot] df34afc
update
sapphi-red e3029ae
[autofix.ci] apply automated fixes
autofix-ci[bot] 1a8b45c
small tweak
Boshen File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
crates/oxc_ecmascript/src/constant_evaluation/url_encoding/dec.rs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| // based on https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/src/dec.rs#L21 | ||
| // MIT license: https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/LICENSE | ||
|
|
||
| use std::borrow::Cow; | ||
| use std::panic::panic_any; | ||
|
|
||
| /// Implements <https://tc39.es/ecma262/2025/multipage/global-object.html#sec-decode> | ||
| #[inline] | ||
| pub fn decode( | ||
| data_str: Cow<'_, str>, | ||
| should_not_decode: impl Fn(u8) -> bool, | ||
| ) -> Option<Cow<'_, str>> { | ||
| let data = data_str.as_bytes(); | ||
| let offset = data.iter().take_while(|&&c| c != b'%').count(); | ||
| if offset >= data.len() { | ||
| return Some(data_str); | ||
| } | ||
|
|
||
| let mut decoded = Vec::new(); | ||
| if decoded.try_reserve(data.len()).is_err() { | ||
| panic_any("OOM"); // more efficient codegen than built-in OOM handler | ||
sapphi-red marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| let mut out = NeverRealloc(&mut decoded); | ||
|
|
||
| let (ascii, mut data) = data.split_at(offset); | ||
| out.extend_from_slice(ascii); | ||
|
|
||
| loop { | ||
| let mut parts = data.splitn(2, |&c| c == b'%'); | ||
| // first the decoded non-% part | ||
| let non_escaped_part = parts.next().unwrap(); | ||
| let rest = parts.next(); | ||
| if rest.is_none() && out.0.is_empty() { | ||
| // if empty there were no '%' in the string | ||
| return Some(data_str); | ||
| } | ||
| out.extend_from_slice(non_escaped_part); | ||
|
|
||
| // then decode one %xx | ||
| match rest { | ||
| Some(rest) => { | ||
| let Some(&[first, second]) = rest.get(0..2) else { | ||
sapphi-red marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // 4.c.i. | ||
| return None; | ||
| }; | ||
| let (Some(first_val), Some(second_val)) = | ||
| (from_hex_digit(first), from_hex_digit(second)) | ||
| else { | ||
| // 4.c.iii. | ||
| return None; | ||
| }; | ||
| let char = (first_val << 4) | second_val; | ||
| if should_not_decode(char) { | ||
| out.extend_from_slice(&[b'%', first, second]); | ||
| } else { | ||
| out.push(char); | ||
| } | ||
| data = &rest[2..]; | ||
| } | ||
| None => break, | ||
| } | ||
| } | ||
| Some(Cow::Owned(String::from_utf8(decoded).ok()?)) | ||
| } | ||
|
|
||
| #[inline] | ||
| fn from_hex_digit(digit: u8) -> Option<u8> { | ||
| match digit { | ||
| b'0'..=b'9' => Some(digit - b'0'), | ||
| b'A'..=b'F' => Some(digit - b'A' + 10), | ||
| b'a'..=b'f' => Some(digit - b'a' + 10), | ||
| _ => None, | ||
| } | ||
| } | ||
|
|
||
| struct NeverRealloc<'a, T>(pub &'a mut Vec<T>); | ||
|
|
||
| impl<T> NeverRealloc<'_, T> { | ||
| #[inline] | ||
| pub fn push(&mut self, val: T) { | ||
| // these branches only exist to remove redundant reallocation code | ||
| // (the capacity is always sufficient) | ||
| if self.0.len() != self.0.capacity() { | ||
| self.0.push(val); | ||
| } | ||
| } | ||
|
|
||
| #[inline] | ||
| pub fn extend_from_slice(&mut self, val: &[T]) | ||
| where | ||
| T: Clone, | ||
| { | ||
| if self.0.capacity() - self.0.len() >= val.len() { | ||
| self.0.extend_from_slice(val); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn dec_borrows() { | ||
| assert!(matches!(decode("hello".into(), |_| false), Some(Cow::Borrowed("hello")))); | ||
| assert!(matches!(decode("hello%20".into(), |_| false), Some(Cow::Owned(s)) if s == "hello ")); | ||
| assert!(matches!(decode("%20hello".into(), |_| false), Some(Cow::Owned(s)) if s == " hello")); | ||
| } | ||
79 changes: 79 additions & 0 deletions
79
crates/oxc_ecmascript/src/constant_evaluation/url_encoding/enc.rs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| // Based on https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/src/enc.rs | ||
| // MIT license: https://github.com/kornelski/rust_urlencoding/blob/a617c89d16f390e3ab4281ea68c514660b111301/LICENSE | ||
|
|
||
| use std::borrow::Cow; | ||
|
|
||
| /// Implements <https://tc39.es/ecma262/2025/multipage/global-object.html#sec-encode> | ||
| /// # Safety | ||
| /// `should_encode` should only return false for characters that are ascii | ||
| #[must_use] | ||
| pub unsafe fn encode(data_str: Cow<'_, str>, should_encode: impl Fn(u8) -> bool) -> Cow<'_, str> { | ||
| let data = data_str.as_bytes(); | ||
| // add maybe extra capacity, but try not to exceed allocator's bucket size | ||
| let mut escaped = String::new(); | ||
| let _ = escaped.try_reserve(data.len() | 15); | ||
| let unmodified = encode_into(data, should_encode, |s| { | ||
| escaped.push_str(s); | ||
| }); | ||
| if unmodified { | ||
| return data_str; | ||
| } | ||
| Cow::Owned(escaped) | ||
| } | ||
|
|
||
| fn encode_into( | ||
| mut data: &[u8], | ||
| should_encode: impl Fn(u8) -> bool, | ||
| mut push_str: impl FnMut(&str), | ||
| ) -> bool { | ||
| let mut pushed = false; | ||
| loop { | ||
| // Fast path to skip over safe chars at the beginning of the remaining string | ||
| let ascii_len = data.iter().take_while(|&&c| !should_encode(c)).count(); | ||
|
|
||
| let (safe, rest) = if ascii_len >= data.len() { | ||
| if !pushed { | ||
| return true; | ||
| } | ||
| (data, &[][..]) // redundant to optimize out a panic in split_at | ||
| } else { | ||
| data.split_at(ascii_len) | ||
| }; | ||
| pushed = true; | ||
| if !safe.is_empty() { | ||
| // SAFETY: should_encode has checked it's ASCII | ||
| push_str(unsafe { str::from_utf8_unchecked(safe) }); | ||
| } | ||
| if rest.is_empty() { | ||
| break; | ||
| } | ||
|
|
||
| match rest.split_first() { | ||
| Some((byte, rest)) => { | ||
| let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)]; | ||
| // SAFETY: `%` is a valid UTF-8 char and to_hex_digit returns a valid UTF-8 char | ||
| push_str(unsafe { str::from_utf8_unchecked(enc) }); | ||
| data = rest; | ||
| } | ||
| None => break, | ||
| } | ||
| } | ||
| false | ||
| } | ||
|
|
||
| #[inline] | ||
| fn to_hex_digit(digit: u8) -> u8 { | ||
| match digit { | ||
| 0..=9 => b'0' + digit, | ||
| 10..=255 => b'A' - 10 + digit, | ||
| } | ||
| } | ||
|
|
||
| /// `alwaysUnescaped` in `Encode` | ||
| /// <https://tc39.es/ecma262/2025/multipage/global-object.html#sec-encode> | ||
| const URI_ALWAYS_UNESCAPED: &[u8] = | ||
| b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-.!~*'()"; | ||
|
|
||
| pub fn is_uri_always_unescaped(c: u8) -> bool { | ||
| URI_ALWAYS_UNESCAPED.contains(&c) | ||
| } |
5 changes: 5 additions & 0 deletions
5
crates/oxc_ecmascript/src/constant_evaluation/url_encoding/mod.rs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| mod dec; | ||
| mod enc; | ||
|
|
||
| pub use dec::decode as decode_uri_chars; | ||
| pub use enc::{encode as encode_uri_chars, is_uri_always_unescaped}; |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.