Skip to content

Commit 7cff874

Browse files
authored
perf: remove heap allocation in parse_host (#1021)
* perf: remove heap allocation in parse_host * make compile with no_std * more comments * add size hint for Iterator * move function down to idna crate * format
1 parent 968e862 commit 7cff874

File tree

6 files changed

+116
-44
lines changed

6 files changed

+116
-44
lines changed

idna/src/lib.rs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,9 @@ impl core::fmt::Display for Errors {
8686
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
8787
/// version returning a `Cow`.
8888
///
89-
/// Most applications should be using this function rather than the sibling functions,
90-
/// and most applications should pass [`AsciiDenyList::URL`] as the second argument.
91-
/// Passing [`AsciiDenyList::URL`] as the second argument makes this function also
89+
/// Most applications should be using this function or `domain_to_ascii_from_cow` rather
90+
/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as
91+
/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also
9292
/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point)
9393
/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii)
9494
/// algorithm.
@@ -99,7 +99,7 @@ impl core::fmt::Display for Errors {
9999
///
100100
/// This process may fail.
101101
///
102-
/// If you have a `&str` instead of `&[u8]`, just call `.to_bytes()` on it before
102+
/// If you have a `&str` instead of `&[u8]`, just call `.as_bytes()` on it before
103103
/// passing it to this function. It's still preferable to use this function over
104104
/// the sibling functions that take `&str`.
105105
pub fn domain_to_ascii_cow(
@@ -114,6 +114,33 @@ pub fn domain_to_ascii_cow(
114114
)
115115
}
116116

117+
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
118+
/// version accepting and returning a `Cow`.
119+
///
120+
/// Most applications should be using this function or `domain_to_ascii_cow` rather
121+
/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as
122+
/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also
123+
/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point)
124+
/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii)
125+
/// algorithm.
126+
///
127+
/// Return the ASCII representation a domain name,
128+
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
129+
/// and using Punycode as necessary.
130+
///
131+
/// This process may fail.
132+
pub fn domain_to_ascii_from_cow(
133+
domain: Cow<'_, [u8]>,
134+
ascii_deny_list: AsciiDenyList,
135+
) -> Result<Cow<'_, str>, Errors> {
136+
Uts46::new().to_ascii_from_cow(
137+
domain,
138+
ascii_deny_list,
139+
uts46::Hyphens::Allow,
140+
uts46::DnsLength::Ignore,
141+
)
142+
}
143+
117144
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
118145
/// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_).
119146
///

idna/src/uts46.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -530,20 +530,38 @@ impl Uts46 {
530530
ascii_deny_list: AsciiDenyList,
531531
hyphens: Hyphens,
532532
dns_length: DnsLength,
533+
) -> Result<Cow<'a, str>, crate::Errors> {
534+
self.to_ascii_from_cow(
535+
Cow::Borrowed(domain_name),
536+
ascii_deny_list,
537+
hyphens,
538+
dns_length,
539+
)
540+
}
541+
542+
pub(crate) fn to_ascii_from_cow<'a>(
543+
&self,
544+
domain_name: Cow<'a, [u8]>,
545+
ascii_deny_list: AsciiDenyList,
546+
hyphens: Hyphens,
547+
dns_length: DnsLength,
533548
) -> Result<Cow<'a, str>, crate::Errors> {
534549
let mut s = String::new();
535550
match self.process(
536-
domain_name,
551+
&domain_name,
537552
ascii_deny_list,
538553
hyphens,
539554
ErrorPolicy::FailFast,
540555
|_, _, _| false,
541556
&mut s,
542557
None,
543558
) {
544-
// SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
545559
Ok(ProcessingSuccess::Passthrough) => {
546-
let cow = Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) });
560+
// SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
561+
let cow = match domain_name {
562+
Cow::Borrowed(v) => Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(v) }),
563+
Cow::Owned(v) => Cow::Owned(unsafe { String::from_utf8_unchecked(v) }),
564+
};
547565
if dns_length != DnsLength::Ignore
548566
&& !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
549567
{

url/src/host.rs

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use crate::net::{Ipv4Addr, Ipv6Addr};
1010
use alloc::borrow::Cow;
1111
use alloc::borrow::ToOwned;
1212
use alloc::string::String;
13-
use alloc::string::ToString;
1413
use alloc::vec::Vec;
1514
use core::cmp;
1615
use core::fmt::{self, Formatter};
@@ -30,8 +29,8 @@ pub(crate) enum HostInternal {
3029
Ipv6(Ipv6Addr),
3130
}
3231

33-
impl From<Host<String>> for HostInternal {
34-
fn from(host: Host<String>) -> HostInternal {
32+
impl From<Host<Cow<'_, str>>> for HostInternal {
33+
fn from(host: Host<Cow<'_, str>>) -> HostInternal {
3534
match host {
3635
Host::Domain(ref s) if s.is_empty() => HostInternal::None,
3736
Host::Domain(_) => HostInternal::Domain,
@@ -80,15 +79,34 @@ impl Host<String> {
8079
///
8180
/// <https://url.spec.whatwg.org/#host-parsing>
8281
pub fn parse(input: &str) -> Result<Self, ParseError> {
82+
Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned())
83+
}
84+
85+
/// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
86+
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
87+
Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned())
88+
}
89+
}
90+
91+
impl<'a> Host<Cow<'a, str>> {
92+
pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
8393
if input.starts_with('[') {
8494
if !input.ends_with(']') {
8595
return Err(ParseError::InvalidIpv6Address);
8696
}
8797
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
8898
}
8999
let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
100+
let domain: Cow<'a, [u8]> = match domain {
101+
Cow::Owned(v) => Cow::Owned(v),
102+
// if borrowed then we can use the original cow
103+
Cow::Borrowed(_) => match input {
104+
Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()),
105+
Cow::Owned(input) => Cow::Owned(input.into_bytes()),
106+
},
107+
};
90108

91-
let domain = Self::domain_to_ascii(&domain)?;
109+
let domain = idna::domain_to_ascii_from_cow(domain, idna::AsciiDenyList::URL)?;
92110

93111
if domain.is_empty() {
94112
return Err(ParseError::EmptyHost);
@@ -98,12 +116,11 @@ impl Host<String> {
98116
let address = parse_ipv4addr(&domain)?;
99117
Ok(Host::Ipv4(address))
100118
} else {
101-
Ok(Host::Domain(domain.to_string()))
119+
Ok(Host::Domain(domain))
102120
}
103121
}
104122

105-
// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
106-
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
123+
pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
107124
if input.starts_with('[') {
108125
if !input.ends_with(']') {
109126
return Err(ParseError::InvalidIpv6Address);
@@ -137,14 +154,21 @@ impl Host<String> {
137154
Err(ParseError::InvalidDomainCharacter)
138155
} else {
139156
Ok(Host::Domain(
140-
utf8_percent_encode(input, CONTROLS).to_string(),
157+
match utf8_percent_encode(&input, CONTROLS).into() {
158+
Cow::Owned(v) => Cow::Owned(v),
159+
// if we're borrowing, then we can return the original Cow
160+
Cow::Borrowed(_) => input,
161+
},
141162
))
142163
}
143164
}
144165

145-
/// convert domain with idna
146-
fn domain_to_ascii(domain: &[u8]) -> Result<Cow<'_, str>, ParseError> {
147-
idna::domain_to_ascii_cow(domain, idna::AsciiDenyList::URL).map_err(Into::into)
166+
pub(crate) fn into_owned(self) -> Host<String> {
167+
match self {
168+
Host::Domain(s) => Host::Domain(s.into_owned()),
169+
Host::Ipv4(ip) => Host::Ipv4(ip),
170+
Host::Ipv6(ip) => Host::Ipv6(ip),
171+
}
148172
}
149173
}
150174

url/src/lib.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ use crate::net::IpAddr;
174174
))]
175175
use crate::net::{SocketAddr, ToSocketAddrs};
176176
use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
177+
use alloc::borrow::Cow;
177178
use alloc::borrow::ToOwned;
178179
use alloc::str;
179180
use alloc::string::{String, ToString};
@@ -2037,9 +2038,9 @@ impl Url {
20372038
}
20382039
}
20392040
if SchemeType::from(self.scheme()).is_special() {
2040-
self.set_host_internal(Host::parse(host_substr)?, None);
2041+
self.set_host_internal(Host::parse_cow(host_substr.into())?, None);
20412042
} else {
2042-
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
2043+
self.set_host_internal(Host::parse_opaque_cow(host_substr.into())?, None);
20432044
}
20442045
} else if self.has_host() {
20452046
if scheme_type.is_special() && !scheme_type.is_file() {
@@ -2075,7 +2076,7 @@ impl Url {
20752076
}
20762077

20772078
/// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
2078-
fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
2079+
fn set_host_internal(&mut self, host: Host<Cow<'_, str>>, opt_new_port: Option<Option<u16>>) {
20792080
let old_suffix_pos = if opt_new_port.is_some() {
20802081
self.path_start
20812082
} else {
@@ -3011,7 +3012,7 @@ fn path_to_file_url_segments_windows(
30113012
serialization.push(':');
30123013
}
30133014
Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
3014-
let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
3015+
let host = Host::parse_cow(server.to_str().ok_or(())?.into()).map_err(|_| ())?;
30153016
write!(serialization, "{}", host).unwrap();
30163017
host_end = to_u32(serialization.len()).unwrap();
30173018
host_internal = host.into();

url/src/parser.rs

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
// option. This file may not be copied, modified, or distributed
77
// except according to those terms.
88

9+
use alloc::borrow::Cow;
910
use alloc::string::String;
10-
use alloc::string::ToString;
1111
use core::fmt::{self, Formatter, Write};
1212
use core::str;
1313

@@ -329,6 +329,10 @@ impl Iterator for Input<'_> {
329329
fn next(&mut self) -> Option<char> {
330330
self.chars.by_ref().find(|&c| !ascii_tab_or_new_line(c))
331331
}
332+
333+
fn size_hint(&self) -> (usize, Option<usize>) {
334+
(0, Some(self.chars.as_str().len()))
335+
}
332336
}
333337

334338
pub struct Parser<'a> {
@@ -987,7 +991,7 @@ impl<'a> Parser<'a> {
987991
pub fn parse_host(
988992
mut input: Input<'_>,
989993
scheme_type: SchemeType,
990-
) -> ParseResult<(Host<String>, Input<'_>)> {
994+
) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
991995
if scheme_type.is_file() {
992996
return Parser::get_file_host(input);
993997
}
@@ -1018,34 +1022,34 @@ impl<'a> Parser<'a> {
10181022
}
10191023
bytes += c.len_utf8();
10201024
}
1021-
let replaced: String;
10221025
let host_str;
10231026
{
10241027
let host_input = input.by_ref().take(non_ignored_chars);
10251028
if has_ignored_chars {
1026-
replaced = host_input.collect();
1027-
host_str = &*replaced
1029+
host_str = Cow::Owned(host_input.collect());
10281030
} else {
10291031
for _ in host_input {}
1030-
host_str = &input_str[..bytes]
1032+
host_str = Cow::Borrowed(&input_str[..bytes]);
10311033
}
10321034
}
10331035
if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
10341036
return Err(ParseError::EmptyHost);
10351037
}
10361038
if !scheme_type.is_special() {
1037-
let host = Host::parse_opaque(host_str)?;
1039+
let host = Host::parse_opaque_cow(host_str)?;
10381040
return Ok((host, input));
10391041
}
1040-
let host = Host::parse(host_str)?;
1042+
let host = Host::parse_cow(host_str)?;
10411043
Ok((host, input))
10421044
}
10431045

1044-
fn get_file_host(input: Input<'_>) -> ParseResult<(Host<String>, Input<'_>)> {
1046+
fn get_file_host(input: Input<'_>) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
10451047
let (_, host_str, remaining) = Parser::file_host(input)?;
10461048
let host = match Host::parse(&host_str)? {
1047-
Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
1048-
host => host,
1049+
Host::Domain(ref d) if d == "localhost" => Host::Domain(Cow::Borrowed("")),
1050+
Host::Domain(s) => Host::Domain(Cow::Owned(s)),
1051+
Host::Ipv4(ip) => Host::Ipv4(ip),
1052+
Host::Ipv6(ip) => Host::Ipv6(ip),
10491053
};
10501054
Ok((host, remaining))
10511055
}
@@ -1060,7 +1064,7 @@ impl<'a> Parser<'a> {
10601064
has_host = false;
10611065
HostInternal::None
10621066
} else {
1063-
match Host::parse(&host_str)? {
1067+
match Host::parse_cow(host_str)? {
10641068
Host::Domain(ref d) if d == "localhost" => {
10651069
has_host = false;
10661070
HostInternal::None
@@ -1075,7 +1079,7 @@ impl<'a> Parser<'a> {
10751079
Ok((has_host, host, remaining))
10761080
}
10771081

1078-
pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> {
1082+
pub fn file_host(input: Input<'_>) -> ParseResult<(bool, Cow<'_, str>, Input<'_>)> {
10791083
// Undo the Input abstraction here to avoid allocating in the common case
10801084
// where the host part of the input does not contain any tab or newline
10811085
let input_str = input.chars.as_str();
@@ -1090,23 +1094,21 @@ impl<'a> Parser<'a> {
10901094
}
10911095
bytes += c.len_utf8();
10921096
}
1093-
let replaced: String;
10941097
let host_str;
10951098
let mut remaining = input.clone();
10961099
{
10971100
let host_input = remaining.by_ref().take(non_ignored_chars);
10981101
if has_ignored_chars {
1099-
replaced = host_input.collect();
1100-
host_str = &*replaced
1102+
host_str = Cow::Owned(host_input.collect());
11011103
} else {
11021104
for _ in host_input {}
1103-
host_str = &input_str[..bytes]
1105+
host_str = Cow::Borrowed(&input_str[..bytes]);
11041106
}
11051107
}
1106-
if is_windows_drive_letter(host_str) {
1107-
return Ok((false, "".to_string(), input));
1108+
if is_windows_drive_letter(&host_str) {
1109+
return Ok((false, "".into(), input));
11081110
}
1109-
Ok((true, host_str.to_string(), remaining))
1111+
Ok((true, host_str, remaining))
11101112
}
11111113

11121114
pub fn parse_port<P>(

url/src/quirks.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
161161
let scheme = url.scheme();
162162
let scheme_type = SchemeType::from(scheme);
163163
if scheme_type == SchemeType::File && new_host.is_empty() {
164-
url.set_host_internal(Host::Domain(String::new()), None);
164+
url.set_host_internal(Host::Domain("".into()), None);
165165
return Ok(());
166166
}
167167

@@ -208,7 +208,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
208208
let input = Input::new_no_trim(new_hostname);
209209
let scheme_type = SchemeType::from(url.scheme());
210210
if scheme_type == SchemeType::File && new_hostname.is_empty() {
211-
url.set_host_internal(Host::Domain(String::new()), None);
211+
url.set_host_internal(Host::Domain("".into()), None);
212212
return Ok(());
213213
}
214214

0 commit comments

Comments
 (0)