dotnet
diff --git a/‎src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems‎
Lines changed: 3 additions & 0 deletions b/‎src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8.cs‎
Lines changed: 48 additions & 0 deletions b/‎src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8.cs‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎src/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs‎
Lines changed: 33 additions & 0 deletions b/‎src/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs‎
Lines changed: 33 additions & 0 deletions
@@ -732,7 +732,10 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF32Encoding.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF7Encoding.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF8Encoding.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Utf8Utility.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Utf8Utility.Validation.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\ValueStringBuilder.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\TimeSpan.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\ThreadAttributes.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Threading\AbandonedMutexException.cs" />
 
@@ -0,0 +1,48 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+
+namespace System.Text.Unicode
+{
+    /// <summary>
+    /// Provides facilities for inspecting, transcoding, and manipulating UTF-8 data.
+    /// </summary>
+    public static class Utf8
+    {
+        /// <summary>
+        /// Determines whether <paramref name="source"/> represents a well-formed UTF-8 sequence.
+        /// </summary>
+        /// <returns>
+        /// <see langword="true"/> if the sequence is well-formed UTF-8; <see langword="false"/> otherwise.
+        /// </returns>
+        /// <remarks>
+        /// Returns <see langword="true"/> if given an empty input.
+        /// </remarks>
+        public static bool IsWellFormed(ReadOnlySpan<byte> source)
+        {
+            return Utf8Utility.IsWellFormedSequence(source);
+        }
+
+        /// <summary>
+        /// Returns the index of the first byte in <paramref name="source"/> that represents the start of an
+        /// invalid UTF-8 subsequence, along with the UTF-16 code unit count and <see cref="Rune"/> count of
+        /// the sequence.
+        /// </summary>
+        /// <returns>
+        /// A non-negative integer representing the index of the first byte in <paramref name="source"/> that
+        /// begins an invalid UTF-8 subsequence, or -1 if <paramref name="source"/> is well-formed.
+        /// </returns>
+        /// <remarks>
+        /// <paramref name="utf16CharCount"/> and <paramref name="runeCount"/> represent the UTF-16 code unit count
+        /// and the <see cref="Rune"/> count from the beginning of the <paramref name="source"/> buffer up until
+        /// the reported first invalid subsequence. If <paramref name="source"/> is well-formed, <paramref name="utf16CharCount"/>
+        /// and <paramref name="runeCount"/> represent the respective counts for the entire buffer.
+        /// </remarks>
+        public static int GetIndexOfFirstInvalidByte(ReadOnlySpan<byte> source, out int utf16CharCount, out int runeCount)
+        {
+            return Utf8Utility.GetIndexOfFirstInvalidSubsequence(source, out utf16CharCount, out runeCount);
+        }
+    }
+}
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Diagnostics;
 using System.Runtime.CompilerServices;
 
 namespace System.Text
@@ -155,6 +156,38 @@ public static int GetUtf8SequenceLength(uint value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool IsSurrogateCodePoint(uint value) => IsInRangeInclusive(value, 0xD800U, 0xDFFFU);
 
+        /// <summary>
+        /// Returns <see langword="true"/> iff <paramref name="value"/> is a UTF-8 continuation byte;
+        /// i.e., has binary representation 10xxxxxx, where x is any bit.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool IsUtf8ContinuationByte(in byte value)
+        {
+            // This API takes its input as a readonly ref so that the JITter can emit "cmp ModRM" statements
+            // directly rather than bounce a temporary through a register. That is, we want the JIT to be
+            // able to emit a single "cmp byte ptr [data], C0h" statement if we're querying a memory location
+            // to see if it's a continuation byte. Data that's already enregistered will go through the
+            // normal "cmp reg, C0h" code paths, perhaps with some extra unnecessary "movzx" instructions.
+
+            // The below check takes advantage of the two's complement representation of negative numbers.
+            // [ 0b1000_0000, 0b1011_1111 ] is [ -127 (sbyte.MinValue), -65 ]
+
+            return ((sbyte)value < -64);
+        }
+
+        /// <summary>
+        /// Returns <see langword="true"/> iff <paramref name="value"/> is the 32-bit expansion of a UTF-8
+        /// continuation byte; i.e., is in the range 0x80 to 0xBF, inclusive.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool IsUtf8ContinuationByte(uint value)
+        {
+            // TODO: This should really be using a single 8-bit cmp instruction rather than a 32-bit lea, cmp.
+
+            Debug.Assert(value <= byte.MaxValue);
+            return IsInRangeInclusive(value, 0x80U, 0xBFU);
+        }
+
         /// <summary>
         /// Returns <see langword="true"/> iff <paramref name="codePoint"/> is a valid Unicode code
         /// point, i.e., is in [ U+0000..U+10FFFF ], inclusive.