2
2
// The .NET Foundation licenses this file to you under the MIT license.
3
3
4
4
using System . Buffers . Binary ;
5
+ using System . Diagnostics ;
6
+ using System . Numerics ;
7
+ using System . Runtime . CompilerServices ;
5
8
6
- namespace System
9
+ namespace System . Net
7
10
{
8
11
internal static partial class IPv4AddressHelper
9
12
{
10
13
internal const long Invalid = - 1 ;
11
14
private const long MaxIPv4Value = uint . MaxValue ; // the native parser cannot handle MaxIPv4Value, only MaxIPv4Value - 1
15
+
12
16
private const int Octal = 8 ;
13
17
private const int Decimal = 10 ;
14
18
private const int Hex = 16 ;
15
19
16
20
private const int NumberOfLabels = 4 ;
17
21
22
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
23
+ internal static ushort ToUShort < TChar > ( TChar value )
24
+ where TChar : unmanaged, IBinaryInteger < TChar >
25
+ {
26
+ Debug . Assert ( typeof ( TChar ) == typeof ( char ) || typeof ( TChar ) == typeof ( byte ) ) ;
27
+
28
+ return typeof ( TChar ) == typeof ( char )
29
+ ? ( char ) ( object ) value
30
+ : ( byte ) ( object ) value ;
31
+ }
32
+
18
33
// Only called from the IPv6Helper, only parse the canonical format
19
- internal static int ParseHostNumber ( ReadOnlySpan < char > str , int start , int end )
34
+ internal static int ParseHostNumber < TChar > ( ReadOnlySpan < TChar > str , int start , int end )
35
+ where TChar : unmanaged, IBinaryInteger < TChar >
20
36
{
37
+ Debug . Assert ( typeof ( TChar ) == typeof ( char ) || typeof ( TChar ) == typeof ( byte ) ) ;
38
+
21
39
Span < byte > numbers = stackalloc byte [ NumberOfLabels ] ;
22
40
23
41
for ( int i = 0 ; i < numbers . Length ; ++ i )
24
42
{
25
43
int b = 0 ;
26
- char ch ;
44
+ int ch ;
27
45
28
- for ( ; ( start < end ) && ( ch = str [ start ] ) != '.' && ch != ':' ; ++ start )
46
+ for ( ; ( start < end ) && ( ch = ToUShort ( str [ start ] ) ) != '.' && ch != ':' ; ++ start )
29
47
{
30
48
b = ( b * 10 ) + ch - '0' ;
31
49
}
@@ -79,7 +97,8 @@ internal static int ParseHostNumber(ReadOnlySpan<char> str, int start, int end)
79
97
//
80
98
81
99
//Remark: MUST NOT be used unless all input indexes are verified and trusted.
82
- internal static unsafe bool IsValid ( char * name , int start , ref int end , bool allowIPv6 , bool notImplicitFile , bool unknownScheme )
100
+ internal static unsafe bool IsValid < TChar > ( TChar * name , int start , ref int end , bool allowIPv6 , bool notImplicitFile , bool unknownScheme )
101
+ where TChar : unmanaged, IBinaryInteger < TChar >
83
102
{
84
103
// IPv6 can only have canonical IPv4 embedded. Unknown schemes will not attempt parsing of non-canonical IPv4 addresses.
85
104
if ( allowIPv6 || unknownScheme )
@@ -105,32 +124,45 @@ internal static unsafe bool IsValid(char* name, int start, ref int end, bool all
105
124
// / "2" %x30-34 DIGIT ; 200-249
106
125
// / "25" %x30-35 ; 250-255
107
126
//
108
- internal static unsafe bool IsValidCanonical ( char * name , int start , ref int end , bool allowIPv6 , bool notImplicitFile )
127
+ internal static unsafe bool IsValidCanonical < TChar > ( TChar * name , int start , ref int end , bool allowIPv6 , bool notImplicitFile )
128
+ where TChar : unmanaged, IBinaryInteger < TChar >
109
129
{
130
+ Debug . Assert ( typeof ( TChar ) == typeof ( char ) || typeof ( TChar ) == typeof ( byte ) ) ;
131
+
110
132
int dots = 0 ;
111
- int number = 0 ;
133
+ long number = 0 ;
112
134
bool haveNumber = false ;
113
135
bool firstCharIsZero = false ;
114
136
115
137
while ( start < end )
116
138
{
117
- char ch = name [ start ] ;
139
+ int ch = ToUShort ( name [ start ] ) ;
140
+
118
141
if ( allowIPv6 )
119
142
{
120
- // for ipv4 inside ipv6 the terminator is either ScopeId , prefix or ipv6 terminator
143
+ // For an IPv4 address nested inside an IPv6 address, the terminator is either the IPv6 address terminator (']') , prefix ('/') or ScopeId ('%')
121
144
if ( ch == ']' || ch == '/' || ch == '%' )
145
+ {
122
146
break ;
147
+ }
123
148
}
124
149
else if ( ch == '/' || ch == '\\ ' || ( notImplicitFile && ( ch == ':' || ch == '?' || ch == '#' ) ) )
125
150
{
151
+ // For a normal IPv4 address, the terminator is the prefix ('/' or its counterpart, '\'). If notImplicitFile is set, the terminator
152
+ // is one of the characters which signify the start of the rest of the URI - the port number (':'), query string ('?') or fragment ('#')
153
+
126
154
break ;
127
155
}
128
156
129
- if ( char . IsAsciiDigit ( ch ) )
157
+ // An explicit cast to an unsigned integer forces character values preceding '0' to underflow, eliminating one comparison below.
158
+ uint parsedCharacter = ( uint ) ( ch - '0' ) ;
159
+
160
+ if ( parsedCharacter < IPv4AddressHelper . Decimal )
130
161
{
131
- if ( ! haveNumber && ( ch == '0' ) )
162
+ // A number starting with zero should be interpreted in base 8 / octal
163
+ if ( ! haveNumber && parsedCharacter == 0 )
132
164
{
133
- if ( ( start + 1 < end ) && name [ start + 1 ] == '0' )
165
+ if ( ( start + 1 < end ) && name [ start + 1 ] == TChar . CreateTruncating ( '0' ) )
134
166
{
135
167
// 00 is not allowed as a prefix.
136
168
return false ;
@@ -140,14 +172,16 @@ internal static unsafe bool IsValidCanonical(char* name, int start, ref int end,
140
172
}
141
173
142
174
haveNumber = true ;
143
- number = number * 10 + ( name [ start ] - '0' ) ;
144
- if ( number > 255 )
175
+ number = number * IPv4AddressHelper . Decimal + parsedCharacter ;
176
+ if ( number > byte . MaxValue )
145
177
{
146
178
return false ;
147
179
}
148
180
}
149
181
else if ( ch == '.' )
150
182
{
183
+ // If the current character is not an integer, it may be the IPv4 component separator ('.')
184
+
151
185
if ( ! haveNumber || ( number > 0 && firstCharIsZero ) )
152
186
{
153
187
// 0 is not allowed to prefix a number.
@@ -176,68 +210,63 @@ internal static unsafe bool IsValidCanonical(char* name, int start, ref int end,
176
210
// Return Invalid (-1) for failures.
177
211
// If the address has less than three dots, only the rightmost section is assumed to contain the combined value for
178
212
// the missing sections: 0xFF00FFFF == 0xFF.0x00.0xFF.0xFF == 0xFF.0xFFFF
179
- internal static unsafe long ParseNonCanonical ( char * name , int start , ref int end , bool notImplicitFile )
213
+ internal static unsafe long ParseNonCanonical < TChar > ( TChar * name , int start , ref int end , bool notImplicitFile )
214
+ where TChar : unmanaged, IBinaryInteger < TChar >
180
215
{
181
- int numberBase = Decimal ;
182
- char ch ;
183
- long * parts = stackalloc long [ 4 ] ;
216
+ Debug . Assert ( typeof ( TChar ) == typeof ( char ) || typeof ( TChar ) == typeof ( byte ) ) ;
217
+
218
+ int numberBase = IPv4AddressHelper . Decimal ;
219
+ int ch = 0 ;
220
+ long * parts = stackalloc long [ 3 ] ; // One part per octet. Final octet doesn't have a terminator, so is stored in currentValue.
184
221
long currentValue = 0 ;
185
222
bool atLeastOneChar = false ;
186
223
187
224
// Parse one dotted section at a time
188
225
int dotCount = 0 ; // Limit 3
189
226
int current = start ;
227
+
190
228
for ( ; current < end ; current ++ )
191
229
{
192
- ch = name [ current ] ;
230
+ ch = ToUShort ( name [ current ] ) ;
193
231
currentValue = 0 ;
194
232
195
- // Figure out what base this section is in
196
- numberBase = Decimal ;
233
+ // Figure out what base this section is in, default to base 10.
234
+ // A number starting with zero should be interpreted in base 8 / octal
235
+ // If the number starts with 0x, it should be interpreted in base 16 / hex
236
+ numberBase = IPv4AddressHelper . Decimal ;
237
+
197
238
if ( ch == '0' )
198
239
{
199
- numberBase = Octal ;
200
240
current ++ ;
201
241
atLeastOneChar = true ;
202
242
if ( current < end )
203
243
{
204
- ch = name [ current ] ;
244
+ ch = ToUShort ( name [ current ] ) ;
245
+
205
246
if ( ch == 'x' || ch == 'X' )
206
247
{
207
- numberBase = Hex ;
248
+ numberBase = IPv4AddressHelper . Hex ;
249
+
208
250
current ++ ;
209
251
atLeastOneChar = false ;
210
252
}
253
+ else
254
+ {
255
+ numberBase = IPv4AddressHelper . Octal ;
256
+ }
211
257
}
212
258
}
213
259
214
260
// Parse this section
215
261
for ( ; current < end ; current ++ )
216
262
{
217
- ch = name [ current ] ;
218
- int digitValue ;
263
+ ch = ToUShort ( name [ current ] ) ;
264
+ int digitValue = HexConverter . FromChar ( ch ) ;
219
265
220
- if ( ( numberBase == Decimal || numberBase == Hex ) && char . IsAsciiDigit ( ch ) )
221
- {
222
- digitValue = ch - '0' ;
223
- }
224
- else if ( numberBase == Octal && '0' <= ch && ch <= '7' )
225
- {
226
- digitValue = ch - '0' ;
227
- }
228
- else if ( numberBase == Hex && 'a' <= ch && ch <= 'f' )
229
- {
230
- digitValue = ch + 10 - 'a' ;
231
- }
232
- else if ( numberBase == Hex && 'A' <= ch && ch <= 'F' )
233
- {
234
- digitValue = ch + 10 - 'A' ;
235
- }
236
- else
266
+ if ( digitValue >= numberBase )
237
267
{
238
268
break ; // Invalid/terminator
239
269
}
240
-
241
270
currentValue = ( currentValue * numberBase ) + digitValue ;
242
271
243
272
if ( currentValue > MaxIPv4Value ) // Overflow
@@ -248,10 +277,10 @@ internal static unsafe long ParseNonCanonical(char* name, int start, ref int end
248
277
atLeastOneChar = true ;
249
278
}
250
279
251
- if ( current < end && name [ current ] == '.' )
280
+ if ( current < end && ch == '.' )
252
281
{
253
282
if ( dotCount >= 3 // Max of 3 dots and 4 segments
254
- || ! atLeastOneChar // No empty segmets : 1...1
283
+ || ! atLeastOneChar // No empty segments : 1...1
255
284
// Only the last segment can be more than 255 (if there are less than 3 dots)
256
285
|| currentValue > 0xFF )
257
286
{
@@ -262,7 +291,7 @@ internal static unsafe long ParseNonCanonical(char* name, int start, ref int end
262
291
atLeastOneChar = false ;
263
292
continue ;
264
293
}
265
- // We don't get here unless We find an invalid character or a terminator
294
+ // We don't get here unless we find an invalid character or a terminator
266
295
break ;
267
296
}
268
297
@@ -275,8 +304,11 @@ internal static unsafe long ParseNonCanonical(char* name, int start, ref int end
275
304
{
276
305
// end of string, allowed
277
306
}
278
- else if ( ( ch = name [ current ] ) == '/' || ch == '\\ ' || ( notImplicitFile && ( ch == ':' || ch == '?' || ch == '#' ) ) )
307
+ else if ( ch == '/' || ch == '\\ ' || ( notImplicitFile && ( ch == ':' || ch == '?' || ch == '#' ) ) )
279
308
{
309
+ // For a normal IPv4 address, the terminator is the prefix ('/' or its counterpart, '\'). If notImplicitFile is set, the terminator
310
+ // is one of the characters which signify the start of the rest of the URI - the port number (':'), query string ('?') or fragment ('#')
311
+
280
312
end = current ;
281
313
}
282
314
else
@@ -285,35 +317,35 @@ internal static unsafe long ParseNonCanonical(char* name, int start, ref int end
285
317
return Invalid ;
286
318
}
287
319
288
- parts [ dotCount ] = currentValue ;
289
-
290
- // Parsed, reassemble and check for overflows
320
+ // Parsed, reassemble and check for overflows in the last part. Previous parts have already been checked in the loop
291
321
switch ( dotCount )
292
322
{
293
323
case 0 : // 0xFFFFFFFF
294
- if ( parts [ 0 ] > MaxIPv4Value )
295
- {
296
- return Invalid ;
297
- }
298
- return parts [ 0 ] ;
324
+ return currentValue ;
299
325
case 1 : // 0xFF.0xFFFFFF
300
- if ( parts [ 1 ] > 0xffffff )
326
+ Debug . Assert ( parts [ 0 ] <= 0xFF ) ;
327
+ if ( currentValue > 0xffffff )
301
328
{
302
329
return Invalid ;
303
330
}
304
- return ( parts [ 0 ] << 24 ) | ( parts [ 1 ] & 0xffffff ) ;
331
+ return ( parts [ 0 ] << 24 ) | currentValue ;
305
332
case 2 : // 0xFF.0xFF.0xFFFF
306
- if ( parts [ 2 ] > 0xffff )
333
+ Debug . Assert ( parts [ 0 ] <= 0xFF ) ;
334
+ Debug . Assert ( parts [ 1 ] <= 0xFF ) ;
335
+ if ( currentValue > 0xffff )
307
336
{
308
337
return Invalid ;
309
338
}
310
- return ( parts [ 0 ] << 24 ) | ( ( parts [ 1 ] & 0xff ) << 16 ) | ( parts [ 2 ] & 0xffff ) ;
339
+ return ( parts [ 0 ] << 24 ) | ( parts [ 1 ] << 16 ) | currentValue ;
311
340
case 3 : // 0xFF.0xFF.0xFF.0xFF
312
- if ( parts [ 3 ] > 0xff )
341
+ Debug . Assert ( parts [ 0 ] <= 0xFF ) ;
342
+ Debug . Assert ( parts [ 1 ] <= 0xFF ) ;
343
+ Debug . Assert ( parts [ 2 ] <= 0xFF ) ;
344
+ if ( currentValue > 0xff )
313
345
{
314
346
return Invalid ;
315
347
}
316
- return ( parts [ 0 ] << 24 ) | ( ( parts [ 1 ] & 0xff ) << 16 ) | ( ( parts [ 2 ] & 0xff ) << 8 ) | ( parts [ 3 ] & 0xff ) ;
348
+ return ( parts [ 0 ] << 24 ) | ( parts [ 1 ] << 16 ) | ( parts [ 2 ] << 8 ) | currentValue ;
317
349
default :
318
350
return Invalid ;
319
351
}
0 commit comments