Skip to content

Commit 90cc6df

Browse files
Fix CompareInfo weightless code point handling, plus other improvements (#1514)
* Create spanified and Rune-accepting overloads of CompareInfo APIs * Remove much of the duplicated code throughout CompareInfo * Remove "empty string" optimizations that were causing incorrect comparisons against weightless code points * Improve error detection around some edge cases
1 parent 8dc1329 commit 90cc6df

File tree

25 files changed

+1656
-1002
lines changed

25 files changed

+1656
-1002
lines changed

src/libraries/Common/src/Interop/Interop.Collation.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ internal static partial class Globalization
2323
internal static extern unsafe int IndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, int* matchLengthPtr);
2424

2525
[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_LastIndexOf")]
26-
internal static extern unsafe int LastIndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options);
26+
internal static extern unsafe int LastIndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, int* matchLengthPtr);
2727

2828
[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IndexOfOrdinalIgnoreCase")]
2929
internal static extern unsafe int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast);

src/libraries/Common/src/Interop/Windows/Kernel32/Interop.Globalization.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ internal static unsafe partial class Kernel32
5353
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
5454
internal static extern int LocaleNameToLCID(string lpName, uint dwFlags);
5555

56-
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
56+
[DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
5757
internal static extern int LCMapStringEx(
5858
string? lpLocaleName,
5959
uint dwMapFlags,

src/libraries/Common/tests/Tests/System/StringTests.cs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2516,7 +2516,15 @@ public static void EqualsTest(string s1, object obj, StringComparison comparison
25162516
Assert.Equal(s1.GetHashCode(), s1.GetHashCode());
25172517
}
25182518

2519-
Assert.Equal(expected, s1.AsSpan().Equals(s2.AsSpan(), comparisonType));
2519+
if (string.IsNullOrEmpty(s1) && string.IsNullOrEmpty(s2))
2520+
{
2521+
// null strings are normalized to empty spans
2522+
Assert.True(s1.AsSpan().Equals(s2.AsSpan(), comparisonType));
2523+
}
2524+
else
2525+
{
2526+
Assert.Equal(expected, s1.AsSpan().Equals(s2.AsSpan(), comparisonType));
2527+
}
25202528
}
25212529

25222530
public static IEnumerable<object[]> Equals_EncyclopaediaData()
@@ -6779,6 +6787,19 @@ public static void StartEndWithTest(string source, string start, string end, str
67796787
Assert.Equal(expected, source.EndsWith(end, ignoreCase, ci));
67806788
}
67816789

6790+
[Theory]
6791+
[InlineData("", StringComparison.InvariantCulture, true)]
6792+
[InlineData("", StringComparison.Ordinal, true)]
6793+
[InlineData(ZeroWidthJoiner, StringComparison.InvariantCulture, true)]
6794+
[InlineData(ZeroWidthJoiner, StringComparison.Ordinal, false)]
6795+
public static void StartEndWith_ZeroWeightValue(string value, StringComparison comparison, bool expectedStartsAndEndsWithResult)
6796+
{
6797+
Assert.Equal(expectedStartsAndEndsWithResult, string.Empty.StartsWith(value, comparison));
6798+
Assert.Equal(expectedStartsAndEndsWithResult, string.Empty.EndsWith(value, comparison));
6799+
Assert.Equal(expectedStartsAndEndsWithResult ? 0 : -1, string.Empty.IndexOf(value, comparison));
6800+
Assert.Equal(expectedStartsAndEndsWithResult ? 0 : -1, string.Empty.LastIndexOf(value, comparison));
6801+
}
6802+
67826803
[Fact]
67836804
public static void StartEndNegativeTest()
67846805
{

src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,20 @@ int32_t GlobalizationNative_CompareString(
445445

446446
if (U_SUCCESS(err))
447447
{
448+
// Workaround for https://unicode-org.atlassian.net/projects/ICU/issues/ICU-9396
449+
// The ucol_strcoll routine on some older versions of ICU doesn't correctly
450+
// handle nullptr inputs. We'll play defensively and always flow a non-nullptr.
451+
452+
UChar dummyChar = 0;
453+
if (lpStr1 == NULL)
454+
{
455+
lpStr1 = &dummyChar;
456+
}
457+
if (lpStr2 == NULL)
458+
{
459+
lpStr2 = &dummyChar;
460+
}
461+
448462
result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length);
449463
}
450464

@@ -464,7 +478,28 @@ int32_t GlobalizationNative_IndexOf(
464478
int32_t options,
465479
int32_t* pMatchedLength)
466480
{
481+
assert(cwTargetLength > 0);
482+
467483
int32_t result = USEARCH_DONE;
484+
485+
// It's possible somebody passed us (source = <empty>, target = <non-empty>).
486+
// ICU's usearch_* APIs don't handle empty source inputs properly. However,
487+
// if this occurs the user really just wanted us to perform an equality check.
488+
// We can't short-circuit the operation because depending on the collation in
489+
// use, certain code points may have zero weight, which means that empty
490+
// strings may compare as equal to non-empty strings.
491+
492+
if (cwSourceLength == 0)
493+
{
494+
result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options);
495+
if (result == UCOL_EQUAL && pMatchedLength != NULL)
496+
{
497+
*pMatchedLength = cwTargetLength;
498+
}
499+
500+
return (result == UCOL_EQUAL) ? 0 : -1;
501+
}
502+
468503
UErrorCode err = U_ZERO_ERROR;
469504
const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
470505

@@ -499,9 +534,31 @@ int32_t GlobalizationNative_LastIndexOf(
499534
int32_t cwTargetLength,
500535
const UChar* lpSource,
501536
int32_t cwSourceLength,
502-
int32_t options)
537+
int32_t options,
538+
int32_t* pMatchedLength)
503539
{
540+
assert(cwTargetLength > 0);
541+
504542
int32_t result = USEARCH_DONE;
543+
544+
// It's possible somebody passed us (source = <empty>, target = <non-empty>).
545+
// ICU's usearch_* APIs don't handle empty source inputs properly. However,
546+
// if this occurs the user really just wanted us to perform an equality check.
547+
// We can't short-circuit the operation because depending on the collation in
548+
// use, certain code points may have zero weight, which means that empty
549+
// strings may compare as equal to non-empty strings.
550+
551+
if (cwSourceLength == 0)
552+
{
553+
result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options);
554+
if (result == UCOL_EQUAL && pMatchedLength != NULL)
555+
{
556+
*pMatchedLength = cwTargetLength;
557+
}
558+
559+
return (result == UCOL_EQUAL) ? 0 : -1;
560+
}
561+
505562
UErrorCode err = U_ZERO_ERROR;
506563
const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
507564

@@ -512,6 +569,13 @@ int32_t GlobalizationNative_LastIndexOf(
512569
if (U_SUCCESS(err))
513570
{
514571
result = usearch_last(pSearch, &err);
572+
573+
// if the search was successful,
574+
// we'll try to get the matched string length.
575+
if (result != USEARCH_DONE && pMatchedLength != NULL)
576+
{
577+
*pMatchedLength = usearch_getMatchedLength(pSearch);
578+
}
515579
usearch_close(pSearch);
516580
}
517581
}
@@ -771,14 +835,16 @@ static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCod
771835
int32_t idx = usearch_last(pSearch, pErrorCode);
772836
if (idx != USEARCH_DONE)
773837
{
774-
if ((idx + usearch_getMatchedLength(pSearch)) == patternLength)
838+
int32_t matchEnd = idx + usearch_getMatchedLength(pSearch);
839+
assert(matchEnd <= textLength);
840+
841+
if (matchEnd == textLength)
775842
{
776843
result = TRUE;
777844
}
778845
else
779846
{
780-
int32_t matchEnd = idx + usearch_getMatchedLength(pSearch);
781-
int32_t remainingStringLength = patternLength - matchEnd;
847+
int32_t remainingStringLength = textLength - matchEnd;
782848

783849
result = CanIgnoreAllCollationElements(pCollator, pText + matchEnd, remainingStringLength);
784850
}

src/libraries/Native/Unix/System.Globalization.Native/pal_collation.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ PALEXPORT int32_t GlobalizationNative_LastIndexOf(SortHandle* pSortHandle,
3737
int32_t cwTargetLength,
3838
const UChar* lpSource,
3939
int32_t cwSourceLength,
40-
int32_t options);
40+
int32_t options,
41+
int32_t* pMatchedLength);
4142

4243
PALEXPORT int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase(const UChar* lpTarget,
4344
int32_t cwTargetLength,

src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System.Buffers;
56
using System.Collections.Generic;
67
using Xunit;
78

@@ -207,6 +208,13 @@ public static IEnumerable<object[]> Compare_TestData()
207208
yield return new object[] { s_invariantCompare, "Test's", null, CompareOptions.None, 1 };
208209
yield return new object[] { s_invariantCompare, null, null, CompareOptions.None, 0 };
209210

211+
yield return new object[] { s_invariantCompare, "", "Tests", CompareOptions.None, -1 };
212+
yield return new object[] { s_invariantCompare, "Tests", "", CompareOptions.None, 1 };
213+
214+
yield return new object[] { s_invariantCompare, null, "", CompareOptions.None, -1 };
215+
yield return new object[] { s_invariantCompare, "", null, CompareOptions.None, 1 };
216+
yield return new object[] { s_invariantCompare, "", "", CompareOptions.None, 0 };
217+
210218
yield return new object[] { s_invariantCompare, new string('a', 5555), new string('a', 5555), CompareOptions.None, 0 };
211219
yield return new object[] { s_invariantCompare, "foobar", "FooB\u00C0R", CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase, 0 };
212220
yield return new object[] { s_invariantCompare, "foobar", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, -1 };
@@ -362,6 +370,26 @@ public void Compare_Advanced(CompareInfo compareInfo, string string1, int offset
362370
// Use Compare(string, int, int, string, int, int, CompareOptions)
363371
Assert.Equal(expected, Math.Sign(compareInfo.Compare(string1, offset1, length1, string2, offset2, length2, options)));
364372
Assert.Equal(-expected, Math.Sign(compareInfo.Compare(string2, offset2, length2, string1, offset1, length1, options)));
373+
374+
// Now test the span-based versions - use BoundedMemory to detect buffer overruns
375+
// We can't run this test for null inputs since they implicitly convert to empty span
376+
377+
if (string1 != null && string2 != null)
378+
{
379+
RunSpanCompareTest(compareInfo, string1.AsSpan(offset1, length1), string2.AsSpan(offset2, length2), options, expected);
380+
}
381+
382+
static void RunSpanCompareTest(CompareInfo compareInfo, ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options, int expected)
383+
{
384+
using BoundedMemory<char> string1BoundedMemory = BoundedMemory.AllocateFromExistingData(string1);
385+
string1BoundedMemory.MakeReadonly();
386+
387+
using BoundedMemory<char> string2BoundedMemory = BoundedMemory.AllocateFromExistingData(string2);
388+
string2BoundedMemory.MakeReadonly();
389+
390+
Assert.Equal(expected, Math.Sign(compareInfo.Compare(string1, string2, options)));
391+
Assert.Equal(-expected, Math.Sign(compareInfo.Compare(string2, string1, options)));
392+
}
365393
}
366394

367395
[Fact]

src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System.Buffers;
56
using System.Collections.Generic;
7+
using System.Text;
68
using Xunit;
79

810
namespace System.Globalization.Tests
@@ -63,6 +65,10 @@ public static IEnumerable<object[]> IndexOf_TestData()
6365
yield return new object[] { s_invariantCompare, "TestFooBA\u0300R", "FooB\u00C0R", 0, 11, CompareOptions.IgnoreNonSpace, 4 };
6466
yield return new object[] { s_invariantCompare, "o\u0308", "o", 0, 2, CompareOptions.None, -1 };
6567

68+
// Weightless characters
69+
yield return new object[] { s_invariantCompare, "", "\u200d", 0, 0, CompareOptions.None, 0 };
70+
yield return new object[] { s_invariantCompare, "hello", "\u200d", 1, 3, CompareOptions.IgnoreCase, 1 };
71+
6672
// Ignore symbols
6773
yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5 };
6874
yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1 };
@@ -192,7 +198,27 @@ public void IndexOf_String(CompareInfo compareInfo, string source, string value,
192198
// Use int MemoryExtensions.IndexOf(this ReadOnlySpan<char>, ReadOnlySpan<char>, StringComparison)
193199
Assert.Equal((expected == -1) ? -1 : (expected - startIndex), source.AsSpan(startIndex, count).IndexOf(value.AsSpan(), stringComparison));
194200
}
195-
}
201+
202+
// Now test the span-based versions - use BoundedMemory to detect buffer overruns
203+
204+
RunSpanIndexOfTest(compareInfo, source.AsSpan(startIndex, count), value, options, (expected < 0) ? expected : expected - startIndex);
205+
206+
static void RunSpanIndexOfTest(CompareInfo compareInfo, ReadOnlySpan<char> source, ReadOnlySpan<char> value, CompareOptions options, int expected)
207+
{
208+
using BoundedMemory<char> sourceBoundedMemory = BoundedMemory.AllocateFromExistingData(source);
209+
sourceBoundedMemory.MakeReadonly();
210+
211+
using BoundedMemory<char> valueBoundedMemory = BoundedMemory.AllocateFromExistingData(value);
212+
valueBoundedMemory.MakeReadonly();
213+
214+
Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
215+
216+
if (TryCreateRuneFrom(value, out Rune rune))
217+
{
218+
Assert.Equal(expected, compareInfo.IndexOf(sourceBoundedMemory.Span, rune, options)); // try the Rune-based version
219+
}
220+
}
221+
}
196222

197223
private static void IndexOf_Char(CompareInfo compareInfo, string source, char value, int startIndex, int count, CompareOptions options, int expected)
198224
{
@@ -331,14 +357,11 @@ public void IndexOf_Invalid()
331357
AssertExtensions.Throws<ArgumentOutOfRangeException>("count", () => s_invariantCompare.IndexOf("Test", 'a', 2, 4, CompareOptions.None));
332358
}
333359

334-
[Fact]
335-
public static void IndexOf_MinusOneCompatability()
360+
// Attempts to create a Rune from the entirety of a given text buffer.
361+
private static bool TryCreateRuneFrom(ReadOnlySpan<char> text, out Rune value)
336362
{
337-
// This behavior was for .NET Framework 1.1 compatability.
338-
// Allowing empty source strings with invalid offsets was quickly outed.
339-
// with invalid offsets.
340-
Assert.Equal(0, s_invariantCompare.IndexOf("", "", -1, CompareOptions.None));
341-
Assert.Equal(-1, s_invariantCompare.IndexOf("", "a", -1, CompareOptions.None));
363+
return Rune.DecodeFromUtf16(text, out value, out int charsConsumed) == OperationStatus.Done
364+
&& charsConsumed == text.Length;
342365
}
343366
}
344367
}

src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System.Buffers;
56
using System.Collections.Generic;
67
using Xunit;
78

@@ -56,6 +57,9 @@ public static IEnumerable<object[]> IsPrefix_TestData()
5657
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, true };
5758
yield return new object[] { s_invariantCompare, "o\u0000\u0308", "o", CompareOptions.None, true };
5859

60+
// Weightless comparisons
61+
yield return new object[] { s_invariantCompare, "", "\u200d", CompareOptions.None, true };
62+
5963
// Surrogates
6064
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.None, true };
6165
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.IgnoreCase, true };
@@ -102,6 +106,16 @@ public void IsPrefix(CompareInfo compareInfo, string source, string value, Compa
102106
Assert.Equal(expected, source.StartsWith(value, stringComparison));
103107
Assert.Equal(expected, source.AsSpan().StartsWith(value.AsSpan(), stringComparison));
104108
}
109+
110+
// Now test the span version - use BoundedMemory to detect buffer overruns
111+
112+
using BoundedMemory<char> sourceBoundedMemory = BoundedMemory.AllocateFromExistingData<char>(source);
113+
sourceBoundedMemory.MakeReadonly();
114+
115+
using BoundedMemory<char> valueBoundedMemory = BoundedMemory.AllocateFromExistingData<char>(value);
116+
valueBoundedMemory.MakeReadonly();
117+
118+
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
105119
}
106120

107121
[Fact]

src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System.Buffers;
56
using System.Collections.Generic;
67
using Xunit;
78

@@ -63,6 +64,9 @@ public static IEnumerable<object[]> IsSuffix_TestData()
6364
yield return new object[] { s_invariantCompare, "o\u0308o", "o", CompareOptions.None, true };
6465
yield return new object[] { s_invariantCompare, "o\u0308o", "o", CompareOptions.Ordinal, true };
6566

67+
// Weightless comparisons
68+
yield return new object[] { s_invariantCompare, "", "\u200d", CompareOptions.None, true };
69+
6670
// Surrogates
6771
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.None, true };
6872
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800\uDC00", CompareOptions.IgnoreCase, true };
@@ -104,6 +108,16 @@ public void IsSuffix(CompareInfo compareInfo, string source, string value, Compa
104108
Assert.Equal(expected, source.EndsWith(value, stringComparison));
105109
Assert.Equal(expected, source.AsSpan().EndsWith(value.AsSpan(), stringComparison));
106110
}
111+
112+
// Now test the span version - use BoundedMemory to detect buffer overruns
113+
114+
using BoundedMemory<char> sourceBoundedMemory = BoundedMemory.AllocateFromExistingData<char>(source);
115+
sourceBoundedMemory.MakeReadonly();
116+
117+
using BoundedMemory<char> valueBoundedMemory = BoundedMemory.AllocateFromExistingData<char>(value);
118+
valueBoundedMemory.MakeReadonly();
119+
120+
Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
107121
}
108122

109123
[Fact]

0 commit comments

Comments
 (0)