|
14 | 14 | // MARK: - Private extensions for parsing encoding names
|
15 | 15 |
|
16 | 16 | private extension Unicode.Scalar {
|
17 |
| - var _isASCIINumeric: Bool { |
18 |
| - return ("0"..."9").contains(self) |
19 |
| - } |
20 |
| - |
21 |
| - var _asciiNumericValue: Int { |
22 |
| - assert(_isASCIINumeric) |
23 |
| - return Int(self.value - 0x30) |
24 |
| - } |
25 |
| - |
26 | 17 | /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace".
|
27 | 18 | ///
|
28 | 19 | /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace
|
@@ -99,68 +90,6 @@ extension StringEncodingNameTokenizer where Self: ~Copyable {
|
99 | 90 | }
|
100 | 91 | }
|
101 | 92 |
|
102 |
| -/// ICU-independent parser that follows [Charset Alias Matching](https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching). |
103 |
| -private struct UTS22Tokenizer: StringEncodingNameTokenizer, ~Copyable { |
104 |
| - enum Token: Equatable { |
105 |
| - case numeric(Int) |
106 |
| - case alphabet(ASCIICaseInsensitiveUnicodeScalar) |
107 |
| - } |
108 |
| - |
109 |
| - enum Error: Swift.Error { |
110 |
| - case tooLargeNumericValue |
111 |
| - } |
112 |
| - |
113 |
| - let scalars: String.UnicodeScalarView |
114 |
| - |
115 |
| - private var _currentIndex: String.UnicodeScalarView.Index |
116 |
| - |
117 |
| - init(name: String) { |
118 |
| - self.scalars = name.unicodeScalars |
119 |
| - self._currentIndex = scalars.startIndex |
120 |
| - } |
121 |
| - |
122 |
| - mutating func nextToken() throws -> Token? { |
123 |
| - guard _currentIndex < scalars.endIndex else { |
124 |
| - return nil |
125 |
| - } |
126 |
| - |
127 |
| - let scalar = scalars[_currentIndex] |
128 |
| - switch scalar { |
129 |
| - case "0"..."9": |
130 |
| - // Parse a numeric value ignoring leading zeros. |
131 |
| - // |
132 |
| - // NOTE: To prevent the value from overflow, a threhold is set here. |
133 |
| - // The max number of digits to be expected is 8 as of now: i.g. `csISO42JISC62261978`. |
134 |
| - // It wouldn't matter to throw an error in practice when the value is too large. |
135 |
| - |
136 |
| - let threshold: Int = 999_999_999 |
137 |
| - var value = scalar._asciiNumericValue |
138 |
| - scalars.formIndex(after: &_currentIndex) |
139 |
| - while _currentIndex < scalars.endIndex { |
140 |
| - let currentScalar = scalars[_currentIndex] |
141 |
| - guard currentScalar._isASCIINumeric else { |
142 |
| - break |
143 |
| - } |
144 |
| - value = value * 10 + currentScalar._asciiNumericValue |
145 |
| - if value > threshold { |
146 |
| - throw Error.tooLargeNumericValue |
147 |
| - } |
148 |
| - scalars.formIndex(after: &_currentIndex) |
149 |
| - } |
150 |
| - return .numeric(value) |
151 |
| - case "A"..."Z", "a"..."z": |
152 |
| - scalars.formIndex(after: &_currentIndex) |
153 |
| - return .alphabet(ASCIICaseInsensitiveUnicodeScalar(scalar)) |
154 |
| - default: |
155 |
| - scalars.formIndex(after: &_currentIndex) |
156 |
| - if _currentIndex < scalars.endIndex { |
157 |
| - return try nextToken() |
158 |
| - } |
159 |
| - return nil |
160 |
| - } |
161 |
| - } |
162 |
| -} |
163 |
| - |
164 | 93 |
|
165 | 94 | /// A parser that tokenizes a string into `ASCIICaseInsensitiveUnicodeScalar`s.
|
166 | 95 | private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable {
|
|
0 commit comments