Skip to content

Commit 3cc7e69

Browse files
committed
idna: switch to stack-allocated simple Errors struct
Presumably preserving the amount of errors of each type is less important than avoiding the allocations resulting from the use of Vec.
1 parent 5267062 commit 3cc7e69

File tree

1 file changed

+52
-70
lines changed

1 file changed

+52
-70
lines changed

idna/src/uts46.rs

Lines changed: 52 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ fn find_char(codepoint: char) -> &'static Mapping {
8282
.unwrap()
8383
}
8484

85-
fn map_char(codepoint: char, config: Config, output: &mut String, errors: &mut Vec<Error>) {
85+
fn map_char(codepoint: char, config: Config, output: &mut String, errors: &mut Errors) {
8686
if let '.' | '-' | 'a'..='z' | '0'..='9' = codepoint {
8787
output.push(codepoint);
8888
return;
@@ -100,18 +100,18 @@ fn map_char(codepoint: char, config: Config, output: &mut String, errors: &mut V
100100
}
101101
}
102102
Mapping::Disallowed => {
103-
errors.push(Error::DisallowedCharacter);
103+
errors.disallowed_character = true;
104104
output.push(codepoint);
105105
}
106106
Mapping::DisallowedStd3Valid => {
107107
if config.use_std3_ascii_rules {
108-
errors.push(Error::DisallowedByStd3AsciiRules);
108+
errors.disallowed_by_std3_ascii_rules = true;
109109
}
110110
output.push(codepoint)
111111
}
112112
Mapping::DisallowedStd3Mapped(ref slice) => {
113113
if config.use_std3_ascii_rules {
114-
errors.push(Error::DisallowedMappedInStd3);
114+
errors.disallowed_mapped_in_std3 = true;
115115
}
116116
output.push_str(decode_slice(slice))
117117
}
@@ -298,7 +298,7 @@ fn is_valid(label: &str, config: Config) -> bool {
298298
}
299299

300300
/// http://www.unicode.org/reports/tr46/#Processing
301-
fn processing(domain: &str, config: Config, errors: &mut Vec<Error>) -> String {
301+
fn processing(domain: &str, config: Config) -> (String, Errors) {
302302
// Weed out the simple cases: only allow all lowercase ASCII characters and digits where none
303303
// of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
304304
let (mut prev, mut simple, mut puny_prefix) = ('?', !domain.is_empty(), 0);
@@ -331,12 +331,13 @@ fn processing(domain: &str, config: Config, errors: &mut Vec<Error>) -> String {
331331
prev = c;
332332
}
333333
if simple {
334-
return domain.to_owned();
334+
return (domain.to_owned(), Errors::default());
335335
}
336336

337+
let mut errors = Errors::default();
337338
let mut mapped = String::with_capacity(domain.len());
338339
for c in domain.chars() {
339-
map_char(c, config, &mut mapped, errors)
340+
map_char(c, config, &mut mapped, &mut errors)
340341
}
341342
let mut normalized = String::with_capacity(mapped.len());
342343
normalized.extend(mapped.nfc());
@@ -365,7 +366,7 @@ fn processing(domain: &str, config: Config, errors: &mut Vec<Error>) -> String {
365366
}
366367
None => {
367368
has_bidi_labels = true;
368-
errors.push(Error::PunycodeError);
369+
errors.punycode = true;
369370
}
370371
}
371372
} else {
@@ -390,10 +391,10 @@ fn processing(domain: &str, config: Config, errors: &mut Vec<Error>) -> String {
390391
}
391392

392393
if !valid {
393-
errors.push(Error::ValidityCriteria);
394+
errors.validity_criteria = true;
394395
}
395396

396-
validated
397+
(validated, errors)
397398
}
398399

399400
#[derive(Clone, Copy)]
@@ -447,10 +448,10 @@ impl Config {
447448

448449
/// http://www.unicode.org/reports/tr46/#ToASCII
449450
pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
450-
let mut errors = Vec::new();
451451
let mut result = String::new();
452452
let mut first = true;
453-
for label in processing(domain, self, &mut errors).split('.') {
453+
let (domain, mut errors) = processing(domain, self);
454+
for label in domain.split('.') {
454455
if !first {
455456
result.push('.');
456457
}
@@ -463,7 +464,9 @@ impl Config {
463464
result.push_str(PUNYCODE_PREFIX);
464465
result.push_str(&x);
465466
}
466-
None => errors.push(Error::PunycodeError),
467+
None => {
468+
errors.punycode = true;
469+
}
467470
}
468471
}
469472
}
@@ -475,61 +478,20 @@ impl Config {
475478
&*result
476479
};
477480
if domain.is_empty() || domain.split('.').any(|label| label.is_empty()) {
478-
errors.push(Error::TooShortForDns)
481+
errors.too_short_for_dns = true;
479482
}
480483
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
481-
errors.push(Error::TooLongForDns)
484+
errors.too_long_for_dns = true;
482485
}
483486
}
484-
if errors.is_empty() {
485-
Ok(result)
486-
} else {
487-
Err(Errors(errors))
488-
}
487+
488+
Result::from(errors).map(|()| result)
489489
}
490490

491491
/// http://www.unicode.org/reports/tr46/#ToUnicode
492492
pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
493-
let mut errors = Vec::new();
494-
let domain = processing(domain, self, &mut errors);
495-
let errors = if errors.is_empty() {
496-
Ok(())
497-
} else {
498-
Err(Errors(errors))
499-
};
500-
(domain, errors)
501-
}
502-
}
503-
504-
#[allow(clippy::enum_variant_names)]
505-
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
506-
enum Error {
507-
PunycodeError,
508-
509-
// https://unicode.org/reports/tr46/#Validity_Criteria
510-
ValidityCriteria,
511-
DisallowedByStd3AsciiRules,
512-
DisallowedMappedInStd3,
513-
DisallowedCharacter,
514-
TooLongForDns,
515-
TooShortForDns,
516-
}
517-
impl Error {
518-
fn as_str(&self) -> &str {
519-
match self {
520-
Error::PunycodeError => "punycode error",
521-
Error::ValidityCriteria => "failed UTS #46 validity criteria",
522-
Error::DisallowedByStd3AsciiRules => "disallowed ASCII character",
523-
Error::DisallowedMappedInStd3 => "disallowed mapped ASCII character",
524-
Error::DisallowedCharacter => "disallowed non-ASCII character",
525-
Error::TooLongForDns => "too long for DNS",
526-
Error::TooShortForDns => "too short for DNS",
527-
}
528-
}
529-
}
530-
impl fmt::Display for Error {
531-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
532-
f.write_str(self.as_str())
493+
let (domain, errors) = processing(domain, self);
494+
(domain, errors.into())
533495
}
534496
}
535497

@@ -548,22 +510,42 @@ fn is_bidi_domain(s: &str) -> bool {
548510

549511
/// Errors recorded during UTS #46 processing.
550512
///
551-
/// This is opaque for now, only indicating the presence of at least one error.
513+
/// This is opaque for now, indicating what types of errors have been encountered at least once.
552514
/// More details may be exposed in the future.
553-
#[derive(Debug)]
554-
pub struct Errors(Vec<Error>);
515+
#[derive(Debug, Default)]
516+
pub struct Errors {
517+
punycode: bool,
518+
// https://unicode.org/reports/tr46/#Validity_Criteria
519+
validity_criteria: bool,
520+
disallowed_by_std3_ascii_rules: bool,
521+
disallowed_mapped_in_std3: bool,
522+
disallowed_character: bool,
523+
too_long_for_dns: bool,
524+
too_short_for_dns: bool,
525+
}
526+
527+
impl From<Errors> for Result<(), Errors> {
528+
fn from(e: Errors) -> Result<(), Errors> {
529+
let failed = e.punycode
530+
|| e.validity_criteria
531+
|| e.disallowed_by_std3_ascii_rules
532+
|| e.disallowed_mapped_in_std3
533+
|| e.disallowed_character
534+
|| e.too_long_for_dns
535+
|| e.too_short_for_dns;
536+
if !failed {
537+
Ok(())
538+
} else {
539+
Err(e)
540+
}
541+
}
542+
}
555543

556544
impl StdError for Errors {}
557545

558546
impl fmt::Display for Errors {
559547
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
560-
for (i, err) in self.0.iter().enumerate() {
561-
if i > 0 {
562-
f.write_str(", ")?;
563-
}
564-
f.write_str(err.as_str())?;
565-
}
566-
Ok(())
548+
fmt::Debug::fmt(self, f)
567549
}
568550
}
569551

0 commit comments

Comments
 (0)