dotnet · mkhamoyan · Jun 21, 2023 · May 30, 2023 · May 31, 2023 · May 31, 2023
diff --git a/docs/design/features/globalization-hybrid-mode.md b/docs/design/features/globalization-hybrid-mode.md
@@ -277,7 +277,7 @@ new CultureInfo("de-DE").CompareInfo.IndexOf("strasse", "stra\u00DFe", 0, Compar
 
 For OSX platforms we are using native apis instead of ICU data.
 
-**String comparison**
+## String comparison
 
 Affected public APIs:
 - CompareInfo.Compare,
@@ -292,44 +292,122 @@ The number of `CompareOptions` and `NSStringCompareOptions` combinations are lim
 
 - `None`:
 
-`CompareOptions.None` is mapped to `NSStringCompareOptions.NSLiteralSearch`
+   `CompareOptions.None` is mapped to `NSStringCompareOptions.NSLiteralSearch`
 
-There are some behaviour changes. Below are examples of such cases.
+   There are some behaviour changes. Below are examples of such cases.
 
-| **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** |                       **comments**                      |
-|:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:|
-|   `\u3042` あ   |   `\u30A1` ァ   |   None  |             1            |    -1   |     hiragana and katakana characters are ordered differently compared to ICU    |
-|   `\u304D\u3083` きゃ  |   `\u30AD\u30E3` キャ |     None     |             1            |    -1   | hiragana and katakana characters are ordered differently compared to ICU  |
-|   `\u304D\u3083` きゃ  |   `\u30AD\u3083` キゃ  |     None     |             1           |    -1   |  hiragana and katakana characters are ordered differently compared to ICU  |
-|   `\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C` ばびﾌﾞﾍﾞぼ  |   `\u30D0\u30D3\u3076\u30D9\uFF8E\uFF9E` バビぶベﾎﾞ  |     None     |   1  |  -1  | hiragana and katakana characters are ordered differently compared to ICU   |
-|   `\u3060` だ  |   `\u30C0` ダ  |     None     |   1  |  -1  |   hiragana and katakana characters are ordered differently compared to ICU |
-|   `\u00C0` À  |   `A\u0300` À  |     None     |   1  |  0  |   This is not same character for native api |
+   | **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** |                       **comments**                      |
+   |:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:|
+   |   `\u3042` あ   |   `\u30A1` ァ   |   None  |             1            |    -1   |     hiragana and katakana characters are ordered differently compared to ICU    |
+   |   `\u304D\u3083` きゃ  |   `\u30AD\u30E3` キャ |     None     |             1            |    -1   | hiragana and katakana characters are ordered differently compared to ICU  |
+   |   `\u304D\u3083` きゃ  |   `\u30AD\u3083` キゃ  |     None     |             1           |    -1   |  hiragana and katakana characters are ordered differently compared to ICU  |
+   |   `\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C` ばびﾌﾞﾍﾞぼ  |   `\u30D0\u30D3\u3076\u30D9\uFF8E\uFF9E` バビぶベﾎﾞ  |     None     |   1  |  -1  | hiragana and katakana characters are ordered differently compared to ICU   |
+   |   `\u3060` だ  |   `\u30C0` ダ  |     None     |   1  |  -1  |   hiragana and katakana characters are ordered differently compared to ICU |
 
 - `StringSort` :
 
-`CompareOptions.StringSort` is mapped to `NSStringCompareOptions.NSLiteralSearch` .ICU's default is to use "StringSort", i.e. nonalphanumeric symbols come before alphanumeric. That is how works also `NSLiteralSearch`.
+   `CompareOptions.StringSort` is mapped to `NSStringCompareOptions.NSLiteralSearch` .ICU's default is to use "StringSort", i.e. nonalphanumeric symbols come before alphanumeric. That is how works also `NSLiteralSearch`.
 
 - `IgnoreCase`:
 
-`CompareOptions.IgnoreCase` is mapped to `NSStringCompareOptions.NSCaseInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch`
+   `CompareOptions.IgnoreCase` is mapped to `NSStringCompareOptions.NSCaseInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch`
 
-There are some behaviour changes. Below are examples of such cases.
+   There are some behaviour changes. Below are examples of such cases.
 
-| **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** |                       **comments**                      |
-|:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:|
-|   `\u3060` だ |   `\u30C0` ダ  |     IgnoreCase     |   1  |  -1  |  hiragana and katakana characters are ordered differently compared to ICU  |
-|   `\u00C0` À |   `a\u0300` à  |     IgnoreCase     |   1  |  0  |  This is related to above mentioned case under `CompareOptions.None` i.e. `\u00C0` À !=  À `A\u0300`   |
+   | **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** |                       **comments**                      |
+   |:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:|
+   |   `\u3060` だ |   `\u30C0` ダ  |     IgnoreCase     |   1  |  -1  |  hiragana and katakana characters are ordered differently compared to ICU  |
 
 - `IgnoreNonSpace`:
 
-`CompareOptions.IgnoreNonSpace` is mapped to `NSStringCompareOptions.NSDiacriticInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch`
+   `CompareOptions.IgnoreNonSpace` is mapped to `NSStringCompareOptions.NSDiacriticInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch`
 
 - `IgnoreWidth`:
 
-`CompareOptions.IgnoreWidth` is mapped to `NSStringCompareOptions.NSWidthInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch`
+   `CompareOptions.IgnoreWidth` is mapped to `NSStringCompareOptions.NSWidthInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch`
 
 - All combinations that contain below `CompareOptions` always throw `PlatformNotSupportedException`:
 
-`IgnoreSymbols`,
+   `IgnoreSymbols`,
+
+   `IgnoreKanaType`,
+
+## String starts with / ends with
+
+Affected public APIs:
+- CompareInfo.IsPrefix
+- CompareInfo.IsSuffix
+- String.StartsWith
+- String.EndsWith
+
+Mapped to Apple Native API `compare:options:range:locale:`(https://developer.apple.com/documentation/foundation/nsstring/1414561-compare?language=objc)
+Apple Native API does not expose locale-sensitive endsWith/startsWith function. As a workaround, both strings get normalized and weightless characters are removed. Resulting strings are cut to the same length and comparison is performed. As we are normalizing strings to be able to cut them, we cannot calculate the match length on the original strings. Methods that calculate this information throw PlatformNotSupported exception:
+
+- [CompareInfo.IsPrefix](https://learn.microsoft.com/dotnet/api/system.globalization.compareinfo.isprefix)
+- [CompareInfo.IsSuffix](https://learn.microsoft.com/dotnet/api/system.globalization.compareinfo.issuffix)
+
+- `IgnoreSymbols`
+
+   As there is no IgnoreSymbols equivalent in NSStringCompareOptions all `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException`
+
+## String indexing
+
+Affected public APIs:
+- CompareInfo.IndexOf
+- CompareInfo.LastIndexOf
+- String.IndexOf
+- String.LastIndexOf
+
+Mapped to Apple Native API `rangeOfString:options:range:locale:`(https://developer.apple.com/documentation/foundation/nsstring/1417348-rangeofstring?language=objc)
+
+In `rangeOfString:options:range:locale:` objects are compared by checking the Unicode canonical equivalence of their code point sequences.
+In cases where search string contains diaeresis and has different normalization form than in source string result can be incorrect.
+
+Characters in general are represented by unicode code points, and some characters can be represented in a single code point or by combining multiple characters (like diacritics/diaeresis). Normalization Form C will look to compress characters to their single code point format if they were originally represented as a sequence of multiple code points. Normalization Form D does the opposite and expands characters into their multiple code point formats if possible.
+
+`NSString` `rangeOfString:options:range:locale:` uses canonical equivalence to find the position of the `searchString` within the `sourceString`, however, it does not automatically handle comparison of precomposed (single code point representation) or decomposed (most code points representation). Because the `searchString` and `sourceString` can be of differing formats, to properly find the index, we need to ensure that the searchString is in the same form as the sourceString by checking the `rangeOfString:options:range:locale:` using every single normalization form.
+
+Here are the covered cases with diaeresis:
+  1. Search string contains diaeresis and has same normalization form as in source string.
+  2. Search string contains diaeresis but with source string they have same letters with different char lengths but substring is normalized in source.
+
+     a. search string `normalizing to form C` is substring of source string. example: search string: `U\u0308` source string:  `Source is \u00DC` => matchLength is 1
+
+     b. search string `normalizing to form D` is substring of source string. example: search string: `\u00FC` source string: `Source is \u0075\u0308` => matchLength is 2
+
+Not covered case:
+
+      Search string contains diaeresis and with source string they have same letters with different char lengths but substring is not
+      normalized in source. example: search string: `U\u0308 and \u00FC` source string: `Source is a\u0308\u0308a and \u0075\u0308`
+      as it is visible from example normalizaing search string to form C or D will not help to find substring in source string.
+
+- `IgnoreSymbols`
+
+   As there is no IgnoreSymbols equivalent in NSStringCompareOptions all `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException`
+
+- Some letters consist of more than one grapheme.
+
+   Apple Native Api does not guarantee that string will be segmented by letters but by graphemes. E.g. in `cs-CZ` and `sk-SK` "ch" is 1 letter, 2 graphemes. The following code with `HybridGlobalization` switched off returns -1 (not found) while with `HybridGlobalization` switched on, it returns 1.
+
+   ``` C#
+   new CultureInfo("sk-SK").CompareInfo.IndexOf("ch", "h"); // -1 or 1
+   ```
+
+- Some graphemes have multi-grapheme equivalents.
+   E.g. in `de-DE` ß (%u00DF) is one letter and one grapheme and "ss" is one letter and is recognized as two graphemes. Apple Native API's equivalent of `IgnoreNonSpace` treats them as the same letter when comparing. Similar case: ǳ (%u01F3) and dz.
+
+   Using `IgnoreNonSpace` for these two with `HybridGlobalization` off, also returns 0 (they are equal). However, the workaround used in `HybridGlobalization` will compare them grapheme-by-grapheme and will return -1.
+
+   ``` C#
+   new CultureInfo("de-DE").CompareInfo.IndexOf("strasse", "stra\u00DFe", 0, CompareOptions.IgnoreNonSpace); // 0 or -1
+   ```
+
+
+## SortKey
+
+Affected public APIs:
+- CompareInfo.GetSortKey
+- CompareInfo.GetSortKeyLength
+- CompareInfo.GetHashCode
 
-`IgnoreKanaType`,
+Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
diff --git a/src/libraries/Common/src/Interop/Interop.Collation.OSX.cs b/src/libraries/Common/src/Interop/Interop.Collation.OSX.cs
@@ -12,5 +12,16 @@ internal static partial class Globalization
     {
         [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_CompareStringNative", StringMarshalling = StringMarshalling.Utf16)]
         internal static unsafe partial int CompareStringNative(string localeName, int lNameLen, char* lpStr1, int cwStr1Len, char* lpStr2, int cwStr2Len, CompareOptions options);
+
+        [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_EndsWithNative", StringMarshalling = StringMarshalling.Utf16)]
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        internal static unsafe partial int EndsWithNative(string localeName, int lNameLen, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options);
+
+        [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_IndexOfNative", StringMarshalling = StringMarshalling.Utf16)]
+        internal static unsafe partial NSRange IndexOfNative(string localeName, int lNameLen, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, [MarshalAs(UnmanagedType.Bool)] bool fromBeginning);
+
+        [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_StartsWithNative", StringMarshalling = StringMarshalling.Utf16)]
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        internal static unsafe partial int StartsWithNative(string localeName, int lNameLen, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options);
     }
 }
diff --git a/src/libraries/Common/src/Interop/OSX/NSRange.cs b/src/libraries/Common/src/Interop/OSX/NSRange.cs
@@ -0,0 +1,11 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+using System.Runtime.InteropServices;
-using System.Runtime.InteropServices;
-using System.Runtime.InteropServices;
+namespace System.Globalization
-namespace System.Globalization
+internal static partial class Interop
-namespace System.Globalization
+internal static partial class Interop
+{
+    internal struct NSRange
+    {
+        public int Location;
+        public int Length;
+    }
+}
diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs
@@ -192,10 +192,10 @@ public static IEnumerable<object[]> Compare_TestData()
             yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, -1 };
             yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.IgnoreCase, -1 };
 
-            yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : 0 };
+            yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.None, 0 };
             yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.Ordinal, 1 };
             yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.None, 1 };
-            yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.IgnoreCase, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : 0 };
+            yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.IgnoreCase, 0 };
             yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.Ordinal, 1 };
             yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.OrdinalIgnoreCase, 1 };
             yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, -1 };

diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs
@@ -33,7 +33,7 @@ public static IEnumerable<object[]> IndexOf_TestData()
             yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", 0, 12, CompareOptions.Ordinal, -1, 0 };
 
             // Slovak
-            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX)
             {
                 yield return new object[] { s_slovakCompare, "ch", "h", 0, 2, CompareOptions.None, -1, 0 };
                  // Android has its own ICU, which doesn't work well with slovak
@@ -82,7 +82,7 @@ public static IEnumerable<object[]> IndexOf_TestData()
             yield return new object[] { s_invariantCompare, "hello", "\u200d", 1, 3, CompareOptions.IgnoreCase, 1, 0 };
 
             // Ignore symbols
-            if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
+            if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX)
                 yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5, 6 };
             yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1, 0 };
             yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 0, 13, CompareOptions.None, 2, 2 };
@@ -142,8 +142,11 @@ public static IEnumerable<object[]> IndexOf_TestData()
             {
                 yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, 4, 7 };
                 yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, 4, 6 };
-                yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, supportedIgnoreNonSpaceOption, 3, 2 };
-                yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 0, 7, supportedIgnoreNonSpaceOption, 3, 1 };
+                if (!PlatformDetection.IsHybridGlobalizationOnOSX)
+                {
+                    yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, supportedIgnoreNonSpaceOption, 3, 2 };
+                    yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 0, 7, supportedIgnoreNonSpaceOption, 3, 1 };
+                }
             }
             yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "xtra\u00DFe", 0, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 };
             yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Xtrasse", 0, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 };