Skip to content

Commit 7032b3a

Browse files
raymondksesi200
authored andcommitted
fix: url decoding in ic-certified-assets (#3767)
Co-authored-by: Severin Siffert <severin.siffert@dfinity.org>
1 parent 4323945 commit 7032b3a

File tree

7 files changed

+57
-66
lines changed

7 files changed

+57
-66
lines changed

CHANGELOG.md

+15
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,21 @@ different replica version or different replica options.
4545

4646
It doesn't apply to `--pocketic` because PocketIC does not yet persist any data.
4747

48+
## Dependencies
49+
50+
### Frontend canister
51+
52+
**fix!: URL decoding follows the whatwg standard**
53+
54+
Previously, the frontend canister used custom logic to decode URLs.
55+
The logic was replaced with a dependency that follows https://url.spec.whatwg.org/#percent-decode, which is what JavaScript's `new Request("https://example.com/% $").url` also uses.
56+
This also drops support for decoding `%%` to `%`. `%` does no longer need to be encoded.
57+
58+
URLs that contain invalid encodings now return `400 Bad Request` instead of `500 Internal Server Error`
59+
60+
- Module hash: 2cc4ec4381dee231379270a08403c984986c9fc0c2eaadb64488b704a3104cc0
61+
- https://github.com/dfinity/sdk/pull/3767
62+
4863
# 0.20.2
4964

5065
### fix: `dfx canister delete` fails

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

e2e/tests-dfx/assetscanister.bash

+6-14
Original file line numberDiff line numberDiff line change
@@ -710,20 +710,15 @@ check_permission_failure() {
710710
dfx canister call --query e2e_project_frontend list '(record {})'
711711

712712
# decode as expected
713-
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%e6";headers=vec{};method="GET";body=vec{}})'
713+
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%c3%a6";headers=vec{};method="GET";body=vec{}})'
714714
assert_match "filename is an ae symbol" # candid looks like blob "filename is \c3\a6\0a"
715715

716716
ID=$(dfx canister id e2e_project_frontend)
717717
PORT=$(get_webserver_port)
718718

719-
# fails with Err(InvalidExpressionPath)
720-
assert_command_fail curl --fail -vv http://localhost:"$PORT"/%c3%a6?canisterId="$ID"
721-
722-
# fails with Err(Utf8ConversionError(FromUtf8Error { bytes: [47, 230], error: Utf8Error { valid_up_to: 1, error_len: None } }))
719+
# fails with because %e6 is not valid utf-8 percent encoding
723720
assert_command_fail curl --fail -vv http://localhost:"$PORT"/%e6?canisterId="$ID"
724-
# assert_match "200 OK" "$stderr"
725-
# assert_match "filename is an ae symbol"
726-
assert_contains "500 Internal Server Error"
721+
assert_contains "400 Bad Request"
727722
}
728723

729724
@test "http_request percent-decodes urls" {
@@ -751,7 +746,7 @@ check_permission_failure() {
751746
assert_match "contents of file with plus in filename"
752747
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/has%25percent.txt";headers=vec{};method="GET";body=vec{}})'
753748
assert_match "contents of file with percent in filename"
754-
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%e6";headers=vec{};method="GET";body=vec{}})'
749+
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%c3%a6";headers=vec{};method="GET";body=vec{}})'
755750
assert_match "filename is an ae symbol" # candid looks like blob "filename is \c3\a6\0a"
756751
assert_command dfx canister call --query e2e_project_frontend http_request '(record{url="/%25";headers=vec{};method="GET";body=vec{}})'
757752
assert_match "filename is percent"
@@ -792,11 +787,8 @@ check_permission_failure() {
792787
assert_match "contents of file with percent in filename"
793788

794789
assert_command_fail curl --fail -vv http://localhost:"$PORT"/%e6?canisterId="$ID"
795-
# see https://dfinity.atlassian.net/browse/SDK-1247
796-
# fails with Err(Utf8ConversionError(FromUtf8Error { bytes: [47, 230], error: Utf8Error { valid_up_to: 1, error_len: None } }))
797-
assert_contains "500 Internal Server Error"
798-
# assert_match "200 OK" "$stderr"
799-
# assert_match "filename is an ae symbol"
790+
# fails because %e6 is not valid utf-8 percent encoding
791+
assert_contains "400 Bad Request"
800792

801793
assert_command curl --fail -vv http://localhost:"$PORT"/%25?canisterId="$ID"
802794
assert_match "200 OK" "$stderr"

src/canisters/frontend/ic-certified-assets/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ serde.workspace = true
2525
serde_bytes.workspace = true
2626
serde_cbor.workspace = true
2727
sha2.workspace = true
28+
percent-encoding = "2.3.1"
2829

2930
[dev-dependencies]
3031
ic-http-certification = "2.3.0"

src/canisters/frontend/ic-certified-assets/src/tests.rs

+19-8
Original file line numberDiff line numberDiff line change
@@ -1077,21 +1077,32 @@ fn supports_max_age_headers() {
10771077

10781078
#[test]
10791079
fn check_url_decode() {
1080+
assert_eq!(url_decode("/%"), Ok("/%".to_string()));
1081+
assert_eq!(url_decode("/%%"), Ok("/%%".to_string()));
1082+
assert_eq!(url_decode("/%e%"), Ok("/%e%".to_string()));
1083+
1084+
assert_eq!(url_decode("/%20%a"), Ok("/ %a".to_string()));
1085+
assert_eq!(url_decode("/%%+a%20+%@"), Ok("/%%+a +%@".to_string()));
10801086
assert_eq!(
1081-
url_decode("/%"),
1082-
Err(UrlDecodeError::InvalidPercentEncoding)
1087+
url_decode("/has%percent.txt"),
1088+
Ok("/has%percent.txt".to_string())
10831089
);
1084-
assert_eq!(url_decode("/%%"), Ok("/%".to_string()));
1085-
assert_eq!(url_decode("/%20a"), Ok("/ a".to_string()));
1090+
1091+
assert_eq!(url_decode("/%%2"), Ok("/%%2".to_string()));
1092+
assert_eq!(url_decode("/%C3%A6"), Ok("/æ".to_string()));
1093+
assert_eq!(url_decode("/%c3%a6"), Ok("/æ".to_string()));
1094+
1095+
assert_eq!(url_decode("/a+b+c%20d"), Ok("/a+b+c d".to_string()));
1096+
10861097
assert_eq!(
1087-
url_decode("/%%+a%20+%@"),
1088-
Err(UrlDecodeError::InvalidPercentEncoding)
1098+
url_decode("/capture-d%E2%80%99e%CC%81cran-2023-10-26-a%CC%80.txt"),
1099+
Ok("/capture-d’écran-2023-10-26-à.txt".to_string())
10891100
);
1101+
10901102
assert_eq!(
1091-
url_decode("/has%percent.txt"),
1103+
url_decode("/%FF%FF"),
10921104
Err(UrlDecodeError::InvalidPercentEncoding)
10931105
);
1094-
assert_eq!(url_decode("/%e6"), Ok("/æ".to_string()));
10951106
}
10961107

10971108
#[test]
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,6 @@
11
use std::fmt;
22

3-
/// An iterator-like structure that decode a URL.
4-
struct UrlDecode<'a> {
5-
bytes: std::slice::Iter<'a, u8>,
6-
}
7-
8-
fn convert_percent(iter: &mut std::slice::Iter<u8>) -> Option<u8> {
9-
let mut cloned_iter = iter.clone();
10-
let result = match cloned_iter.next()? {
11-
b'%' => b'%',
12-
h => {
13-
let h = char::from(*h).to_digit(16)?;
14-
let l = char::from(*cloned_iter.next()?).to_digit(16)?;
15-
h as u8 * 0x10 + l as u8
16-
}
17-
};
18-
// Update this if we make it this far, otherwise "reset" the iterator.
19-
*iter = cloned_iter;
20-
Some(result)
21-
}
3+
use percent_encoding::percent_decode_str;
224

235
#[derive(Debug, PartialEq, Eq)]
246
pub enum UrlDecodeError {
@@ -33,31 +15,20 @@ impl fmt::Display for UrlDecodeError {
3315
}
3416
}
3517

36-
impl<'a> Iterator for UrlDecode<'a> {
37-
type Item = Result<char, UrlDecodeError>;
38-
39-
fn next(&mut self) -> Option<Self::Item> {
40-
let b = self.bytes.next()?;
41-
match b {
42-
b'%' => Some(
43-
convert_percent(&mut self.bytes)
44-
.map(char::from)
45-
.ok_or(UrlDecodeError::InvalidPercentEncoding),
46-
),
47-
b'+' => Some(Ok(' ')),
48-
x => Some(Ok(char::from(*x))),
49-
}
50-
}
51-
52-
fn size_hint(&self) -> (usize, Option<usize>) {
53-
let bytes = self.bytes.len();
54-
(bytes / 3, Some(bytes))
55-
}
56-
}
57-
18+
/// Decodes a percent encoded string according to https://url.spec.whatwg.org/#percent-decode
19+
///
20+
/// This is a wrapper around the percent-encoding crate.
21+
///
22+
/// The rules that it follow by are:
23+
/// - Start with an empty sequence of bytes of the output
24+
/// - Convert the input to a sequence of bytes
25+
/// - if the byte is `%` and the next two bytes are hex, convet the hex value to a byte
26+
/// and add it to the output, otherwise add the byte to the output
27+
/// - convert the output byte sequence to a UTF-8 string and return it. If the conversion
28+
/// fails return an error.
5829
pub fn url_decode(url: &str) -> Result<String, UrlDecodeError> {
59-
UrlDecode {
60-
bytes: url.as_bytes().iter(),
30+
match percent_decode_str(url).decode_utf8() {
31+
Ok(result) => Ok(result.to_string()),
32+
Err(_) => Err(UrlDecodeError::InvalidPercentEncoding),
6133
}
62-
.collect()
6334
}

src/distributed/assetstorage.wasm.gz

10.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)