diff --git a/netwerk/base/rust-helper/Cargo.toml b/netwerk/base/rust-helper/Cargo.toml new file mode 100644 index 0000000000000..c4597b86e34b4 --- /dev/null +++ b/netwerk/base/rust-helper/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "netwerk_helper" +version = "0.0.1" +authors = ["Jeff Hemphill "] + +[dependencies] +nserror = { path = "../../../xpcom/rust/nserror" } +nsstring = { path = "../../../xpcom/rust/nsstring" } diff --git a/netwerk/base/rust-helper/src/helper.h b/netwerk/base/rust-helper/src/helper.h new file mode 100644 index 0000000000000..cf2ad61f7b492 --- /dev/null +++ b/netwerk/base/rust-helper/src/helper.h @@ -0,0 +1,14 @@ +#ifndef RUST_NS_NET_HELPER +#define RUST_NS_NET_HELPER + +#include "nsError.h" +#include "nsString.h" + +extern "C" { + +nsresult +rust_prepare_accept_languages(const nsACString* i_accept_languages, + nsACString* o_accept_languages); +} + +#endif // RUST_NS_NET_HELPER diff --git a/netwerk/base/rust-helper/src/lib.rs b/netwerk/base/rust-helper/src/lib.rs new file mode 100644 index 0000000000000..47f7a3ab7e348 --- /dev/null +++ b/netwerk/base/rust-helper/src/lib.rs @@ -0,0 +1,143 @@ +use std::ascii::AsciiExt; + +extern crate nserror; +use self::nserror::*; + +extern crate nsstring; +use self::nsstring::nsACString; + +/// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h +static HTTP_LWS: &'static [u8] = &[' ' as u8, '\t' as u8]; + +/// Trim leading whitespace, trailing whitespace, and quality-value +/// from a token. +fn trim_token(token: &[u8]) -> &[u8] { + // Trim left whitespace + let ltrim = token.iter() + .take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c)) + .count(); + + // Trim right whitespace + // remove "; q=..." if present + let rtrim = token[ltrim..] + .iter() + .take_while(|c| **c != (';' as u8) && HTTP_LWS.iter().all(|ws| ws != *c)) + .count(); + + &token[ltrim..ltrim + rtrim] +} + +#[no_mangle] +#[allow(non_snake_case)] +/// Allocates an nsACString that contains a ISO 639 language list +/// notated with HTTP "q" values for output with an HTTP Accept-Language +/// header. Previous q values will be stripped because the order of +/// the langs implies the q value. The q values are calculated by dividing +/// 1.0 amongst the number of languages present. +/// +/// Ex: passing: "en, ja" +/// returns: "en,ja;q=0.5" +/// +/// passing: "en, ja, fr_CA" +/// returns: "en,ja;q=0.7,fr_CA;q=0.3" +pub extern "C" fn rust_prepare_accept_languages<'a, 'b>(i_accept_languages: &'a nsACString, + o_accept_languages: &'b mut nsACString) + -> nsresult { + if i_accept_languages.is_empty() { + return NS_OK; + } + + let make_tokens = || { + i_accept_languages.split(|c| *c == (',' as u8)) + .map(|token| trim_token(token)) + .filter(|token| token.len() != 0) + }; + + let n = make_tokens().count(); + + for (count_n, i_token) in make_tokens().enumerate() { + + // delimiter if not first item + if count_n != 0 { + o_accept_languages.append(","); + } + + let token_pos = o_accept_languages.len(); + o_accept_languages.append(&i_token as &[u8]); + + { + let o_token = o_accept_languages.to_mut(); + canonicalize_language_tag(&mut o_token[token_pos..]); + } + + // Divide the quality-values evenly among the languages. + let q = 1.0 - count_n as f32 / n as f32; + + let u: u32 = ((q + 0.005) * 100.0) as u32; + // Only display q-value if less than 1.00. + if u < 100 { + // With a small number of languages, one decimal place is + // enough to prevent duplicate q-values. + // Also, trailing zeroes do not add any information, so + // they can be removed. + if n < 10 || u % 10 == 0 { + let u = (u + 5) / 10; + o_accept_languages.append(&format!(";q=0.{}", u)); + } else { + // Values below 10 require zero padding. + o_accept_languages.append(&format!(";q=0.{:02}", u)); + } + } + } + + NS_OK +} + +/// Defines a consistent capitalization for a given language string. +/// +/// # Arguments +/// * `token` - a narrow char slice describing a language. +/// +/// Valid language tags are of the form +/// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN" +/// +/// Language tags are defined in the +/// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to +/// the spec: +/// +/// > At all times, language tags and their subtags, including private +/// > use and extensions, are to be treated as case insensitive: there +/// > exist conventions for the capitalization of some of the subtags, +/// > but these MUST NOT be taken to carry meaning. +/// +/// So why is this code even here? See bug 1108183, I guess. +fn canonicalize_language_tag(token: &mut [u8]) { + for c in token.iter_mut() { + *c = AsciiExt::to_ascii_lowercase(c); + } + + let sub_tags = token.split_mut(|c| *c == ('-' as u8)); + for (i, mut sub_tag) in sub_tags.enumerate() { + if i == 0 { + // ISO 639-1 language code, like the "en" in "en-US" + continue; + } + + match sub_tag.len() { + // Singleton tag, like "x" or "i". These signify a + // non-standard language, so we stop capitalizing after + // these. + 1 => break, + // ISO 3166-1 Country code, like "US" + 2 => { + sub_tag[0] = AsciiExt::to_ascii_uppercase(&sub_tag[0]); + sub_tag[1] = AsciiExt::to_ascii_uppercase(&sub_tag[1]); + }, + // ISO 15924 script code, like "Nkoo" + 4 => { + sub_tag[0] = AsciiExt::to_ascii_uppercase(&sub_tag[0]); + }, + _ => {}, + }; + } +} diff --git a/netwerk/protocol/http/nsHttpHandler.cpp b/netwerk/protocol/http/nsHttpHandler.cpp index 662a28cda74ee..755b7e8e77b0d 100644 --- a/netwerk/protocol/http/nsHttpHandler.cpp +++ b/netwerk/protocol/http/nsHttpHandler.cpp @@ -9,6 +9,7 @@ #include "prsystem.h" +#include "nsError.h" #include "nsHttp.h" #include "nsHttpHandler.h" #include "nsHttpChannel.h" @@ -58,6 +59,7 @@ #include "nsIXULRuntime.h" #include "nsCharSeparatedTokenizer.h" #include "nsRFPService.h" +#include "rust-helper/src/helper.h" #include "mozilla/net/NeckoChild.h" #include "mozilla/net/NeckoParent.h" @@ -1908,51 +1910,6 @@ nsHttpHandler::PrefsChanged(nsIPrefBranch *prefs, const char *pref) #undef MULTI_PREF_CHANGED } - -/** - * Currently, only regularizes the case of subtags. - */ -static void -CanonicalizeLanguageTag(char *languageTag) -{ - char *s = languageTag; - while (*s != '\0') { - *s = nsCRT::ToLower(*s); - s++; - } - - s = languageTag; - bool isFirst = true; - bool seenSingleton = false; - while (*s != '\0') { - char *subTagEnd = strchr(s, '-'); - if (subTagEnd == nullptr) { - subTagEnd = strchr(s, '\0'); - } - - if (isFirst) { - isFirst = false; - } else if (seenSingleton) { - // Do nothing - } else { - size_t subTagLength = subTagEnd - s; - if (subTagLength == 1) { - seenSingleton = true; - } else if (subTagLength == 2) { - *s = nsCRT::ToUpper(*s); - *(s + 1) = nsCRT::ToUpper(*(s + 1)); - } else if (subTagLength == 4) { - *s = nsCRT::ToUpper(*s); - } - } - - s = subTagEnd; - if (*s != '\0') { - s++; - } - } -} - /** * Allocates a C string into that contains a ISO 639 language list * notated with HTTP "q" values for output with a HTTP Accept-Language @@ -1972,78 +1929,9 @@ PrepareAcceptLanguages(const char *i_AcceptLanguages, nsACString &o_AcceptLangua if (!i_AcceptLanguages) return NS_OK; - uint32_t n, count_n, size, wrote; - double q, dec; - char *p, *p2, *token, *q_Accept, *o_Accept; - const char *comma; - int32_t available; - - o_Accept = strdup(i_AcceptLanguages); - if (!o_Accept) - return NS_ERROR_OUT_OF_MEMORY; - for (p = o_Accept, n = size = 0; '\0' != *p; p++) { - if (*p == ',') n++; - size++; - } - - available = size + ++n * 11 + 1; - q_Accept = new char[available]; - if (!q_Accept) { - free(o_Accept); - return NS_ERROR_OUT_OF_MEMORY; - } - *q_Accept = '\0'; - q = 1.0; - dec = q / (double) n; - count_n = 0; - p2 = q_Accept; - for (token = nsCRT::strtok(o_Accept, ",", &p); - token != nullptr; - token = nsCRT::strtok(p, ",", &p)) - { - token = net_FindCharNotInSet(token, HTTP_LWS); - char* trim; - trim = net_FindCharInSet(token, ";" HTTP_LWS); - if (trim != nullptr) // remove "; q=..." if present - *trim = '\0'; - - if (*token != '\0') { - CanonicalizeLanguageTag(token); - - comma = count_n++ != 0 ? "," : ""; // delimiter if not first item - uint32_t u = QVAL_TO_UINT(q); - - // Only display q-value if less than 1.00. - if (u < 100) { - const char *qval_str; - - // With a small number of languages, one decimal place is enough to prevent duplicate q-values. - // Also, trailing zeroes do not add any information, so they can be removed. - if ((n < 10) || ((u % 10) == 0)) { - u = (u + 5) / 10; - qval_str = "%s%s;q=0.%u"; - } else { - // Values below 10 require zero padding. - qval_str = "%s%s;q=0.%02u"; - } - - wrote = snprintf(p2, available, qval_str, comma, token, u); - } else { - wrote = snprintf(p2, available, "%s%s", comma, token); - } - - q -= dec; - p2 += wrote; - available -= wrote; - MOZ_ASSERT(available > 0, "allocated string not long enough"); - } - } - free(o_Accept); - - o_AcceptLanguages.Assign((const char *) q_Accept); - delete [] q_Accept; - - return NS_OK; + const nsAutoCString ns_accept_languages(i_AcceptLanguages); + return rust_prepare_accept_languages(&ns_accept_languages, + &o_AcceptLanguages); } nsresult diff --git a/netwerk/test/unit/test_header_Accept-Language_case.js b/netwerk/test/unit/test_header_Accept-Language_case.js index 369aab51bb9bf..2bd5c09aa0397 100644 --- a/netwerk/test/unit/test_header_Accept-Language_case.js +++ b/netwerk/test/unit/test_header_Accept-Language_case.js @@ -22,6 +22,8 @@ function run_test() { ["de,en-us,en", "de,en-US;q=0.7,en;q=0.3"], ["en-US, en", "en-US,en;q=0.5"], ["EN-US;q=0.2, EN", "en-US,en;q=0.5"], + ["en ;q=0.8, de ", "en,de;q=0.5"], + [",en,", "en"], ]; for (let i = 0; i < testData.length; i++) { diff --git a/toolkit/library/gtest/rust/Cargo.lock b/toolkit/library/gtest/rust/Cargo.lock index 9d8376d02e183..c772e4554506e 100644 --- a/toolkit/library/gtest/rust/Cargo.lock +++ b/toolkit/library/gtest/rust/Cargo.lock @@ -563,6 +563,7 @@ dependencies = [ "geckoservo 0.0.1", "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "mp4parse_capi 0.8.0", + "netwerk_helper 0.0.1", "nserror 0.1.0", "nsstring 0.1.0", "rust_url_capi 0.0.1", @@ -849,6 +850,14 @@ dependencies = [ "ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "netwerk_helper" +version = "0.0.1" +dependencies = [ + "nserror 0.1.0", + "nsstring 0.1.0", +] + [[package]] name = "nodrop" version = "0.1.9" diff --git a/toolkit/library/rust/Cargo.lock b/toolkit/library/rust/Cargo.lock index afab29187b2a8..d603dd85bf45b 100644 --- a/toolkit/library/rust/Cargo.lock +++ b/toolkit/library/rust/Cargo.lock @@ -562,6 +562,7 @@ dependencies = [ "geckoservo 0.0.1", "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "mp4parse_capi 0.8.0", + "netwerk_helper 0.0.1", "nserror 0.1.0", "nsstring 0.1.0", "rust_url_capi 0.0.1", @@ -844,6 +845,14 @@ dependencies = [ "ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "netwerk_helper" +version = "0.0.1" +dependencies = [ + "nserror 0.1.0", + "nsstring 0.1.0", +] + [[package]] name = "nodrop" version = "0.1.9" diff --git a/toolkit/library/rust/shared/Cargo.toml b/toolkit/library/rust/shared/Cargo.toml index 4e07bdbbe1d22..2397166d0ec58 100644 --- a/toolkit/library/rust/shared/Cargo.toml +++ b/toolkit/library/rust/shared/Cargo.toml @@ -10,6 +10,7 @@ geckoservo = { path = "../../../../servo/ports/geckolib", optional = true } mp4parse_capi = { path = "../../../../media/libstagefright/binding/mp4parse_capi" } nsstring = { path = "../../../../xpcom/rust/nsstring" } nserror = { path = "../../../../xpcom/rust/nserror" } +netwerk_helper = { path = "../../../../netwerk/base/rust-helper" } rust_url_capi = { path = "../../../../netwerk/base/rust-url-capi" } webrender_bindings = { path = "../../../../gfx/webrender_bindings", optional = true } cubeb-pulse = { path = "../../../../media/libcubeb/cubeb-pulse-rs", optional = true, features=["pulse-dlopen"] } diff --git a/toolkit/library/rust/shared/lib.rs b/toolkit/library/rust/shared/lib.rs index 7b4b7b686bc65..91c54564f0038 100644 --- a/toolkit/library/rust/shared/lib.rs +++ b/toolkit/library/rust/shared/lib.rs @@ -9,6 +9,7 @@ extern crate mp4parse_capi; extern crate nsstring; extern crate nserror; extern crate rust_url_capi; +extern crate netwerk_helper; #[cfg(feature = "quantum_render")] extern crate webrender_bindings; #[cfg(feature = "cubeb_pulse_rust")]