Revert "Implement non-transitional IDNA processing behind a flag"

This reverts commit 60447d2. Reason for revert: Thread safety Original change's description: > Implement non-transitional IDNA processing behind a flag > > This CL adds a flag called UseIDNA2008NonTransitional that enables > non-transitional IDNA processing for URLs. When enabled, hostnames > that contain deviation characters will no longer be mapped and will > be usable as is. These deviation characters are: > - U+00DF LATIN SMALL LETTER SHARP S > - U+03C2 GREEK SMALL LETTER FINAL SIGMA > - U+200D ZERO WIDTH JOINER > - U+200C ZERO WIDTH NON-JOINER > > See https://unicode.org/reports/tr46/#Deviations for details. > > As an example, http://faß.de is presently mapped to http://fass.de > with IDNA 2008 transitional processing. When the flag is enabled, > ß will no longer be mapped to ss and the URL will remain unchanged. > > Bug: 694157 > Change-Id: I37df9aea5520374d1fce829b0d6ee22d9bdf32ec > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4018995 > Reviewed-by: Emily Stark <estark@chromium.org> > Reviewed-by: Charlie Harrison <csharrison@chromium.org> > Commit-Queue: Mustafa Emre Acer <meacer@chromium.org> > Reviewed-by: Kent Tamura <tkent@chromium.org> > Cr-Commit-Position: refs/heads/main@{#1070571} Bug: 694157 Change-Id: I3c509230dc8fad7c1e696007d4ac8f918964b995 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4024340 Reviewed-by: Kent Tamura <tkent@chromium.org> Reviewed-by: Charlie Harrison <csharrison@chromium.org> Reviewed-by: Emily Stark <estark@chromium.org> Commit-Queue: Mustafa Emre Acer <meacer@chromium.org> Cr-Commit-Position: refs/heads/main@{#1071085}
qweri0p · Nov 14, 2022 · e544837 · e544837
1 parent b483999
commit e544837
Show file tree

Hide file tree

Showing 25 changed files with 169 additions and 6,351 deletions.
diff --git a/components/url_formatter/spoof_checks/idn_spoof_checker.cc b/components/url_formatter/spoof_checks/idn_spoof_checker.cc
@@ -22,7 +22,6 @@
 #include "third_party/icu/source/i18n/unicode/regex.h"
 #include "third_party/icu/source/i18n/unicode/translit.h"
 #include "third_party/icu/source/i18n/unicode/uspoof.h"
-#include "url/url_features.h"
 
 namespace url_formatter {
 
@@ -376,10 +375,8 @@ IDNSpoofChecker::Result IDNSpoofChecker::SafeToDisplayAsUnicode(
   // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted
   // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as
   // such. See http://crbug.com/595263 .
-  if (!url::IsUsingIDNA2008NonTransitional() &&
-      deviation_characters_.containsSome(label_string)) {
+  if (deviation_characters_.containsSome(label_string))
     return Result::kDeviationCharacters;
-  }
 
   // Disallow Icelandic confusables for domains outside Iceland's ccTLD (.is).
   if (label_string.length() > 1 && top_level_domain != "is" &&

diff --git a/components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc b/components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc
@@ -10,14 +10,11 @@
 #include "base/strings/string_piece.h"
 #include "base/strings/stringprintf.h"
 #include "base/strings/utf_string_conversions.h"
-#include "base/test/scoped_feature_list.h"
 #include "build/build_config.h"
 #include "components/url_formatter/spoof_checks/skeleton_generator.h"
 #include "components/url_formatter/url_formatter.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "url/gurl.h"
-#include "url/url_features.h"
-#include "url/url_idna_icu.h"
 
 namespace url_formatter {
 
@@ -822,6 +819,19 @@ const IDNTestCase kIdnCases[] = {
     {"xn--foog-opf.com", u"foog\u05b4.com", kUnsafe},    // Latin + Hebrew NSM
     {"xn--shb5495f.com", u"\uac00\u0650.com", kUnsafe},  // Hang + Arabic NSM
 
+    // 4 Deviation characters between IDNA 2003 and IDNA 2008
+    // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma
+    // and the latter two are mapped away. However, the punycode form should
+    // remain in punycode.
+    // U+00DF(sharp-s)
+    {"xn--fu-hia.de", u"fu\u00df.de", kUnsafe},
+    // U+03C2(final-sigma)
+    {"xn--mxac2c.gr", u"\u03b1\u03b2\u03c2.gr", kUnsafe},
+    // U+200C(ZWNJ)
+    {"xn--h2by8byc123p.in", u"\u0924\u094d\u200c\u0930\u093f.in", kUnsafe},
+    // U+200C(ZWJ)
+    {"xn--11b6iy14e.in", u"\u0915\u094d\u200d.in", kUnsafe},
+
     // Math Monospace Small A. When entered in Unicode, it's canonicalized to
     // 'a'. The punycode form should remain in punycode.
     {"xn--bc-9x80a.xyz", u"\U0001d68abc.xyz", kInvalid},
@@ -1108,19 +1118,8 @@ bool IsPunycode(const std::u16string& s) {
 
 }  // namespace
 
-class IDNSpoofCheckerTest : public ::testing::Test,
-                            public ::testing::WithParamInterface<bool> {
+class IDNSpoofCheckerTest : public ::testing::Test {
  protected:
-  IDNSpoofCheckerTest() {
-    if (GetParam()) {
-      scoped_feature_list_.InitAndEnableFeature(
-          url::kUseIDNA2008NonTransitional);
-    } else {
-      scoped_feature_list_.InitAndDisableFeature(
-          url::kUseIDNA2008NonTransitional);
-    }
-  }
-
   void SetUp() override {
     IDNSpoofChecker::HuffmanTrieParams trie_params{
         test::kTopDomainsHuffmanTree, sizeof(test::kTopDomainsHuffmanTree),
@@ -1129,89 +1128,53 @@ class IDNSpoofCheckerTest : public ::testing::Test,
     IDNSpoofChecker::SetTrieParamsForTesting(trie_params);
   }
 
-  void TearDown() override {
-    IDNSpoofChecker::RestoreTrieParamsForTesting();
-    url::ResetUIDNAForTesting();
-  }
+  void TearDown() override { IDNSpoofChecker::RestoreTrieParamsForTesting(); }
+};
+
+// Test that a domain entered as punycode is decoded to unicode if safe,
+// otherwise is left in punycode.
+//
+// TODO(crbug.com/1036523): This should also check if a domain entered as
+// unicode is properly decoded or not-decoded. This is important in cases where
+// certain unicode characters are canonicalized to other characters.
+// E.g. Mathematical Monospace Small A (U+1D68A) is canonicalized to "a" when
+// used in a domain name.
+TEST_F(IDNSpoofCheckerTest, IDNToUnicode) {
+  for (size_t i = 0; i < std::size(kIdnCases); i++) {
+    SCOPED_TRACE(
+        base::StringPrintf("input #%zu: \"%s\"", i, kIdnCases[i].input));
 
-  void RunIDNToUnicodeTest(const IDNTestCase& test) {
     // Sanity check to ensure that the unicode output matches the input. Bypass
     // all spoof checks by doing an unsafe conversion.
     const IDNConversionResult unsafe_result =
-        UnsafeIDNToUnicodeWithDetails(test.input);
+        UnsafeIDNToUnicodeWithDetails(kIdnCases[i].input);
 
     // Ignore inputs that can't be converted even with unsafe conversion because
     // they contain certain characters not allowed in IDNs. E.g. U+24DF (Latin
     // CIRCLED LATIN SMALL LETTER P) in hostname causes the conversion to fail
     // before reaching spoof checks.
-    if (test.expected_result != kInvalid) {
+    if (kIdnCases[i].expected_result != kInvalid) {
       // Also ignore domains that need to remain partially in punycode, such
       // as ѕсоре-рау.한국 where scope-pay is a Cyrillic whole-script
       // confusable but 한국 is safe. This would require adding yet another
       // field to the the test struct.
-      if (!IsPunycode(test.unicode_output)) {
-        EXPECT_EQ(unsafe_result.result, test.unicode_output);
+      if (!IsPunycode(kIdnCases[i].unicode_output)) {
+        EXPECT_EQ(unsafe_result.result, kIdnCases[i].unicode_output);
       }
     } else {
       // Invalid punycode should not be converted.
-      EXPECT_EQ(unsafe_result.result, base::ASCIIToUTF16(test.input));
+      EXPECT_EQ(unsafe_result.result, base::ASCIIToUTF16(kIdnCases[i].input));
     }
 
-    const std::u16string output(IDNToUnicode(test.input));
-    const std::u16string expected(test.expected_result == kSafe
-                                      ? test.unicode_output
-                                      : base::ASCIIToUTF16(test.input));
+    const std::u16string output(IDNToUnicode(kIdnCases[i].input));
+    const std::u16string expected(kIdnCases[i].expected_result == kSafe
+                                      ? kIdnCases[i].unicode_output
+                                      : base::ASCIIToUTF16(kIdnCases[i].input));
     EXPECT_EQ(expected, output);
   }
-
- private:
-  base::test::ScopedFeatureList scoped_feature_list_;
-};
-
-INSTANTIATE_TEST_SUITE_P(All, IDNSpoofCheckerTest, ::testing::Bool());
-
-// Test that a domain entered as punycode is decoded to unicode if safe,
-// otherwise is left in punycode.
-//
-// TODO(crbug.com/1036523): This should also check if a domain entered as
-// unicode is properly decoded or not-decoded. This is important in cases where
-// certain unicode characters are canonicalized to other characters.
-// E.g. Mathematical Monospace Small A (U+1D68A) is canonicalized to "a" when
-// used in a domain name.
-TEST_P(IDNSpoofCheckerTest, IDNToUnicode) {
-  for (const auto& test : kIdnCases) {
-    RunIDNToUnicodeTest(test);
-  }
-}
-
-// Same as IDNToUnicode but only tests hostnames with deviation characters.
-TEST_P(IDNSpoofCheckerTest, IDNToUnicodeDeviationCharacters) {
-  const Result kExpectedSafety = GetParam() ? kSafe : kUnsafe;
-
-  // Tests for 4 Deviation characters between IDNA 2003 and IDNA 2008. When
-  // entered in Unicode:
-  // - In Transitional mode, the first two are mapped to 'ss' and Greek sigma
-  //   and the latter two are mapped away. However, the punycode form should
-  //   remain in punycode.
-  // - In Non-Transitional mode, none of the characters should be mapped and
-  //   the hostnames should be considered safe.
-  const IDNTestCase kTestCases[] = {
-      // U+00DF(sharp-s)
-      {"xn--fu-hia.de", u"fu\u00df.de", kExpectedSafety},
-      // U+03C2(final-sigma)
-      {"xn--mxac2c.gr", u"\u03b1\u03b2\u03c2.gr", kExpectedSafety},
-      // U+200C(ZWNJ)
-      {"xn--h2by8byc123p.in", u"\u0924\u094d\u200c\u0930\u093f.in",
-       kExpectedSafety},
-      // U+200C(ZWJ)
-      {"xn--11b6iy14e.in", u"\u0915\u094d\u200d.in", kExpectedSafety},
-  };
-  for (const auto& test : kTestCases) {
-    RunIDNToUnicodeTest(test);
-  }
 }
 
-TEST_P(IDNSpoofCheckerTest, GetSimilarTopDomain) {
+TEST_F(IDNSpoofCheckerTest, GetSimilarTopDomain) {
   struct TestCase {
     const char16_t* const hostname;
     const char* const expected_top_domain;
@@ -1244,7 +1207,7 @@ TEST_P(IDNSpoofCheckerTest, GetSimilarTopDomain) {
   }
 }
 
-TEST_P(IDNSpoofCheckerTest, LookupSkeletonInTopDomains) {
+TEST_F(IDNSpoofCheckerTest, LookupSkeletonInTopDomains) {
   {
     TopDomainEntry entry =
         IDNSpoofChecker().LookupSkeletonInTopDomains("d4OOO.corn");

diff --git a/third_party/blink/renderer/core/html/forms/email_input_type.cc b/third_party/blink/renderer/core/html/forms/email_input_type.cc
@@ -54,10 +54,7 @@ const char kEmailPattern[] =
 
 // RFC5321 says the maximum total length of a domain name is 255 octets.
 const int32_t kMaximumDomainNameLength = 255;
-
 // Use the same option as in url/url_canon_icu.cc
-// TODO(crbug.com/694157): Change the options if UseIDNA2008NonTransitional flag
-// is enabled.
 const int32_t kIdnaConversionOption = UIDNA_CHECK_BIDI;
 
 }  // namespace

diff --git a/third_party/blink/web_tests/VirtualTestSuites b/third_party/blink/web_tests/VirtualTestSuites
@@ -1258,13 +1258,6 @@
              "--enable-blink-features=DocumentPictureInPictureAPI"]
   },
 
-  {
-    "prefix": "idna-2008",
-    "platforms": ["Linux"],
-    "bases": [ "external/wpt/url", "fast/url"],
-    "args": ["--enable-features=UseIDNA2008NonTransitional"]
-  },
-
   "Tests in the virtual/origin-agent-cluster-default suite. These tests",
   "require isolation behaviour as in the actual browser and are therefore",
   "incompatible with auto-wpt-origin-isolation. Thus they are only run in",

diff --git a/third_party/blink/web_tests/virtual/idna-2008/README.md b/third_party/blink/web_tests/virtual/idna-2008/README.md