Skip to content

Commit eda01e5

Browse files
authored
fix(hstr): Skip only \u for unicode (#11216)
**Description:** Attempt to fix #11214. The reason for unespaced `\\` was special handling for unicode `\u` codepoint, that escapes any string literal starts with `\u` - for windows, path separator with u (`\\u`) matches with this case. PR attempts to solve by lookahead, confirming all 4 following char is hex digit to represent unicode hex, otherwise consider it as plain string. To be honest I'm not sure if this is acceptable approach or not, feel free to close if there's better way to fix. **Related issue:** - Closes #11214
1 parent 5e6af60 commit eda01e5

File tree

4 files changed

+143
-2
lines changed

4 files changed

+143
-2
lines changed

.changeset/rotten-frogs-drive.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
swc_core: patch
3+
hstr: patch
4+
---
5+
6+
fix(atom): skip only unicode \u

crates/hstr/src/wtf8_atom.rs

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,43 @@ impl serde::ser::Serialize for Wtf8Atom {
100100
// By escaping literal '\u' to '\\u', we ensure:
101101
// - Unpaired surrogates serialize as '\uXXXX'
102102
// - Literal '\u' text serializes as '\\uXXXX'
103+
//
104+
// However, we should only escape '\u' if it's followed by exactly 4 hex digits,
105+
// which would indicate a Unicode escape sequence. Otherwise, '\u' followed by
106+
// non-hex characters (like '\util') should not be escaped.
103107
if c == '\\' && iter.peek().map(|cp| cp.to_u32()) == Some('u' as u32) {
104-
iter.next(); // skip 'u'
105-
result.push_str("\\\\u");
108+
// Look ahead to see if this is followed by exactly 4 hex digits
109+
let mut lookahead = iter.clone();
110+
lookahead.next(); // skip 'u'
111+
112+
let mut hex_count = 0;
113+
let mut all_hex = true;
114+
for _ in 0..4 {
115+
if let Some(next_cp) = lookahead.next() {
116+
if let Some(next_c) = next_cp.to_char() {
117+
if next_c.is_ascii_hexdigit() {
118+
hex_count += 1;
119+
} else {
120+
all_hex = false;
121+
break;
122+
}
123+
} else {
124+
all_hex = false;
125+
break;
126+
}
127+
} else {
128+
all_hex = false;
129+
break;
130+
}
131+
}
132+
133+
// Only escape if we have exactly 4 hex digits after '\u'
134+
if hex_count == 4 && all_hex {
135+
iter.next(); // skip 'u'
136+
result.push_str("\\\\u");
137+
} else {
138+
result.push(c);
139+
}
106140
} else {
107141
result.push(c)
108142
}
@@ -553,4 +587,32 @@ mod tests {
553587
let err_atom = result.unwrap_err();
554588
assert_eq!(err_atom.to_string_lossy(), "\u{FFFD}");
555589
}
590+
591+
#[test]
592+
fn test_backslash_util_issue_11214() {
593+
let atom =
594+
Wtf8Atom::from("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts");
595+
let serialized = serde_json::to_string(&atom).unwrap();
596+
597+
assert!(
598+
!serialized.contains("spec\\\\\\\\util"),
599+
"Found quadruple backslashes in spec segment! Serialized: {serialized}"
600+
);
601+
602+
assert!(
603+
serialized.contains("spec\\\\util"),
604+
"Expected double backslashes in spec segment not found! Serialized: {serialized}",
605+
);
606+
607+
// The expected serialized value should have consistent escaping
608+
let expected = r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#;
609+
assert_eq!(
610+
serialized, expected,
611+
"Serialized value should have consistent backslash escaping"
612+
);
613+
614+
// Test round-trip
615+
let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
616+
assert_eq!(atom, deserialized);
617+
}
556618
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
var coverageData = {
2+
path: "C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts",
3+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"type": "Script",
3+
"span": {
4+
"start": 1,
5+
"end": 107
6+
},
7+
"body": [
8+
{
9+
"type": "VariableDeclaration",
10+
"span": {
11+
"start": 1,
12+
"end": 107
13+
},
14+
"ctxt": 0,
15+
"kind": "var",
16+
"declare": false,
17+
"declarations": [
18+
{
19+
"type": "VariableDeclarator",
20+
"span": {
21+
"start": 5,
22+
"end": 107
23+
},
24+
"id": {
25+
"type": "Identifier",
26+
"span": {
27+
"start": 5,
28+
"end": 17
29+
},
30+
"ctxt": 0,
31+
"value": "coverageData",
32+
"optional": false,
33+
"typeAnnotation": null
34+
},
35+
"init": {
36+
"type": "ObjectExpression",
37+
"span": {
38+
"start": 20,
39+
"end": 107
40+
},
41+
"properties": [
42+
{
43+
"type": "KeyValueProperty",
44+
"key": {
45+
"type": "Identifier",
46+
"span": {
47+
"start": 26,
48+
"end": 30
49+
},
50+
"value": "path"
51+
},
52+
"value": {
53+
"type": "StringLiteral",
54+
"span": {
55+
"start": 32,
56+
"end": 101
57+
},
58+
"value": "C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts",
59+
"raw": "\"C:\\\\github\\\\swc-plugin-coverage-instrument\\\\spec\\\\util\\\\verifier.ts\""
60+
}
61+
}
62+
]
63+
},
64+
"definite": false
65+
}
66+
]
67+
}
68+
],
69+
"interpreter": null
70+
}

0 commit comments

Comments
 (0)