Skip to content

Commit f00121f

Browse files
huoyaoyuantannergoodingdanmoseleyjeffhandleyCopilot
authored
Add BFloat16 (#98643)
* Add api for BFloat16 * Creating * Equals and GetHashCode * Comparison * Constants and comment * Xml doc * Using rounding for cast * Ref source * Simple tests * Conversion tests * Stripping sign is redundant * Fix test copied from Half * Fix conversion test cases * Constants and well-known values * Categorizing methods * Reorder conversion members * Operators batch 1 * Operators batch 2 * TryConvert * Operators batch 3 * Parsing and formatting * Add comments about how to determine parse and format info * Add missing interface implementations * NumberBufferLength * Add more comment * Correct MinFastFloatDecimalExponent * Add explicit conversion to * Explicit convert from * Fullfill casting operators * Fullfill some formatting * Apply suggestions from code review Co-authored-by: Tanner Gooding <tagoo@outlook.com> * Generic DiyFp * Generic Grisu3 * Generic Dragon4 * Add MaxRoundTripDigits to MaxPrecisionCustomFormat to FormatInfo * Generic FormatFloat * Adapt with existing FP types * Adapt formatting traits * Use generic format and delete Number.BFloat16 * Update ref source * Enable constant value tests * IsFinite/IsNaN * IsPositive/IsNegative/IsSubnormal * ToDouble * Fix test case * Add double conversion test * Parse tests * Formatting tests * RoundTripping tests * Port float->Half conversion algorithm to double->BFloat16 to handle ULP rounding. * Port function tests from Half * Convert the precesion of test cases. * Align with TryWriteBig/LittleEndian * Remove redundant 'partial' * Use DefaultParseStyle * Fill conversion in signed integer * Fill conversion in unsigned integer and floating point * Add conversion for S.R.Numerics * Use float member function instead of MathF * Fill conversion in decimal * Add conversion for NFloat * Use soft rounding for uint->bf16 * Generic math rounding from unsigned and signed integer * Cleanup helper methods * Add integer rounding tests * Move helpers and fix comment * Update comment * Fix ILogB * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Move conversion interface to BFloat16 * Add missing conversion to Half * Remove NFloat conversion * Tune CreateSingle/CreateDouble * Adjust constant naming * Use <summary> for math expression --------- Co-authored-by: Tanner Gooding <tagoo@outlook.com> Co-authored-by: Dan Moseley <danmose@microsoft.com> Co-authored-by: Jeff Handley <jeffhandley@users.noreply.github.com> Co-authored-by: Tanner Gooding <tagoo@microsoft.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent e36d92e commit f00121f

File tree

15 files changed

+5007
-29
lines changed

15 files changed

+5007
-29
lines changed

src/libraries/System.Private.CoreLib/src/Resources/Strings.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4379,6 +4379,9 @@
43794379
<data name="NotSupported_EmitDebugInfo" xml:space="preserve">
43804380
<value>Emitting debug info is not supported for this member.</value>
43814381
</data>
4382+
<data name="Arg_MustBeBFloat16" xml:space="preserve">
4383+
<value>Object must be of type BFloat16.</value>
4384+
</data>
43824385
<data name="NotSupported_ReferenceEnumOrPrimitiveTypeRequired" xml:space="preserve">
43834386
<value>The specified type must be a reference type, a primitive type, or an enum type.</value>
43844387
</data>

src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,7 @@
603603
<Compile Include="$(MSBuildThisFileDirectory)System\Number.Grisu3.cs" />
604604
<Compile Include="$(MSBuildThisFileDirectory)System\Number.NumberToFloatingPointBits.cs" />
605605
<Compile Include="$(MSBuildThisFileDirectory)System\Number.Parsing.cs" />
606+
<Compile Include="$(MSBuildThisFileDirectory)System\Numerics\BFloat16.cs" />
606607
<Compile Include="$(MSBuildThisFileDirectory)System\Numerics\BitOperations.cs" />
607608
<Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Matrix3x2.cs" />
608609
<Compile Include="$(MSBuildThisFileDirectory)System\Numerics\Matrix3x2.Impl.cs" />

src/libraries/System.Private.CoreLib/src/System/BitConverter.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Diagnostics;
5+
using System.Numerics;
56
using System.Runtime.CompilerServices;
67
using System.Runtime.InteropServices;
78
using System.Runtime.Intrinsics;
@@ -947,6 +948,10 @@ public static bool ToBoolean(ReadOnlySpan<byte> value)
947948
[MethodImpl(MethodImplOptions.AggressiveInlining)]
948949
public static Half Int16BitsToHalf(short value) => new Half((ushort)(value));
949950

951+
internal static short BFloat16BitsToInt16(BFloat16 value) => (short)value._value;
952+
953+
internal static BFloat16 Int16BitsToBFloat16(short value) => new BFloat16((ushort)(value));
954+
950955
/// <summary>
951956
/// Converts the specified double-precision floating point number to a 64-bit unsigned integer.
952957
/// </summary>
@@ -1000,5 +1005,9 @@ public static bool ToBoolean(ReadOnlySpan<byte> value)
10001005
[CLSCompliant(false)]
10011006
[MethodImpl(MethodImplOptions.AggressiveInlining)]
10021007
public static Half UInt16BitsToHalf(ushort value) => new Half(value);
1008+
1009+
internal static ushort BFloat16BitsToUInt16(BFloat16 value) => value._value;
1010+
1011+
internal static BFloat16 UInt16BitsToBFloat16(ushort value) => new BFloat16(value);
10031012
}
10041013
}

src/libraries/System.Private.CoreLib/src/System/Double.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ internal static ulong ExtractTrailingSignificandFromBits(ulong bits)
162162
return bits & TrailingSignificandMask;
163163
}
164164

165+
internal static double CreateDouble(bool sign, ushort exp, ulong sig) => BitConverter.UInt64BitsToDouble((sign ? SignMask : 0UL) + ((ulong)exp << BiasedExponentShift) + sig);
166+
165167
/// <summary>Determines whether the specified value is finite (zero, subnormal, or normal).</summary>
166168
/// <remarks>This effectively checks the value is not NaN and not infinite.</remarks>
167169
[NonVersionable]

src/libraries/System.Private.CoreLib/src/System/Half.cs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ public static explicit operator Half(float value)
738738
const uint SingleBiasedExponentMask = float.BiasedExponentMask;
739739
// Exponent displacement #2
740740
const uint Exponent13 = 0x0680_0000u;
741-
// Maximum value that is not Infinity in Half
741+
// The maximum infinitely precise value that will round down to MaxValue
742742
const float MaxHalfValueBelowInfinity = 65520.0f;
743743
// Mask for exponent bits in Half
744744
const uint ExponentMask = BiasedExponentMask;
@@ -1015,7 +1015,7 @@ public static explicit operator double(Half value)
10151015
exp -= 1;
10161016
}
10171017

1018-
return CreateDouble(sign, (ushort)(exp + 0x3F0), (ulong)sig << 42);
1018+
return double.CreateDouble(sign, (ushort)(exp + 0x3F0), (ulong)sig << 42);
10191019
}
10201020

10211021
/// <summary>Explicitly converts a half-precision floating-point value to its nearest representable <see cref="float" /> value.</summary>
@@ -1177,10 +1177,6 @@ private static double CreateDoubleNaN(bool sign, ulong significand)
11771177
return BitConverter.UInt64BitsToDouble(signInt | NaNBits | sigInt);
11781178
}
11791179

1180-
private static float CreateSingle(bool sign, byte exp, uint sig) => BitConverter.UInt32BitsToSingle(((sign ? 1U : 0U) << float.SignShift) + ((uint)exp << float.BiasedExponentShift) + sig);
1181-
1182-
private static double CreateDouble(bool sign, ushort exp, ulong sig) => BitConverter.UInt64BitsToDouble(((sign ? 1UL : 0UL) << double.SignShift) + ((ulong)exp << double.BiasedExponentShift) + sig);
1183-
11841180
#endregion
11851181

11861182
//
@@ -1360,7 +1356,7 @@ int IFloatingPoint<Half>.GetExponentShortestBitLength()
13601356
int IFloatingPoint<Half>.GetSignificandByteCount() => sizeof(ushort);
13611357

13621358
/// <inheritdoc cref="IFloatingPoint{TSelf}.GetSignificandBitLength()" />
1363-
int IFloatingPoint<Half>.GetSignificandBitLength() => 11;
1359+
int IFloatingPoint<Half>.GetSignificandBitLength() => SignificandLength;
13641360

13651361
/// <inheritdoc cref="IFloatingPoint{TSelf}.TryWriteExponentBigEndian(Span{byte}, out int)" />
13661362
bool IFloatingPoint<Half>.TryWriteExponentBigEndian(Span<byte> destination, out int bytesWritten)
@@ -2339,7 +2335,7 @@ public static bool TryParse(ReadOnlySpan<byte> utf8Text, NumberStyles style, IFo
23392335
static int IBinaryFloatParseAndFormatInfo<Half>.NumberBufferLength => Number.HalfNumberBufferLength;
23402336

23412337
static ulong IBinaryFloatParseAndFormatInfo<Half>.ZeroBits => 0;
2342-
static ulong IBinaryFloatParseAndFormatInfo<Half>.InfinityBits => 0x7C00;
2338+
static ulong IBinaryFloatParseAndFormatInfo<Half>.InfinityBits => PositiveInfinityBits;
23432339

23442340
static ulong IBinaryFloatParseAndFormatInfo<Half>.NormalMantissaMask => (1UL << SignificandLength) - 1;
23452341
static ulong IBinaryFloatParseAndFormatInfo<Half>.DenormalMantissaMask => TrailingSignificandMask;
@@ -2351,15 +2347,15 @@ public static bool TryParse(ReadOnlySpan<byte> utf8Text, NumberStyles style, IFo
23512347
static int IBinaryFloatParseAndFormatInfo<Half>.MaxDecimalExponent => 5;
23522348

23532349
static int IBinaryFloatParseAndFormatInfo<Half>.ExponentBias => ExponentBias;
2354-
static ushort IBinaryFloatParseAndFormatInfo<Half>.ExponentBits => 5;
2350+
static ushort IBinaryFloatParseAndFormatInfo<Half>.ExponentBits => BiasedExponentLength;
23552351

23562352
static int IBinaryFloatParseAndFormatInfo<Half>.OverflowDecimalExponent => (MaxExponent + (2 * SignificandLength)) / 3;
2357-
static int IBinaryFloatParseAndFormatInfo<Half>.InfinityExponent => 0x1F;
2353+
static int IBinaryFloatParseAndFormatInfo<Half>.InfinityExponent => MaxBiasedExponent;
23582354

23592355
static ushort IBinaryFloatParseAndFormatInfo<Half>.NormalMantissaBits => SignificandLength;
23602356
static ushort IBinaryFloatParseAndFormatInfo<Half>.DenormalMantissaBits => TrailingSignificandLength;
23612357

2362-
static int IBinaryFloatParseAndFormatInfo<Half>.MinFastFloatDecimalExponent => -8;
2358+
static int IBinaryFloatParseAndFormatInfo<Half>.MinFastFloatDecimalExponent => -26;
23632359
static int IBinaryFloatParseAndFormatInfo<Half>.MaxFastFloatDecimalExponent => 4;
23642360

23652361
static int IBinaryFloatParseAndFormatInfo<Half>.MinExponentRoundToEven => -21;

src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ internal interface IBinaryIntegerParseAndFormatInfo<TSelf> : IBinaryInteger<TSel
5050
internal interface IBinaryFloatParseAndFormatInfo<TSelf> : IBinaryFloatingPointIeee754<TSelf>, IMinMaxValue<TSelf>
5151
where TSelf : unmanaged, IBinaryFloatParseAndFormatInfo<TSelf>
5252
{
53+
/// <summary>
54+
/// Ceiling(Log10(5^(Abs(MinBinaryExponent) - 1))) + NormalMantissaBits + 1 + 1
55+
/// </summary>
5356
static abstract int NumberBufferLength { get; }
5457

5558
static abstract ulong ZeroBits { get; }
@@ -61,7 +64,14 @@ internal interface IBinaryFloatParseAndFormatInfo<TSelf> : IBinaryFloatingPointI
6164
static abstract int MinBinaryExponent { get; }
6265
static abstract int MaxBinaryExponent { get; }
6366

67+
/// <summary>
68+
/// Floor(Log10(Epsilon))
69+
/// </summary>
6470
static abstract int MinDecimalExponent { get; }
71+
72+
/// <summary>
73+
/// Ceiling(Log10(MaxValue))
74+
/// </summary>
6575
static abstract int MaxDecimalExponent { get; }
6676

6777
static abstract int ExponentBias { get; }
@@ -73,29 +83,53 @@ internal interface IBinaryFloatParseAndFormatInfo<TSelf> : IBinaryFloatingPointI
7383
static abstract ushort NormalMantissaBits { get; }
7484
static abstract ushort DenormalMantissaBits { get; }
7585

86+
/// <summary>
87+
/// Ceiling(Log10(2^(MinBinaryExponent - 1 - DenormalMantissaBits - 64)))
88+
/// </summary>
7689
static abstract int MinFastFloatDecimalExponent { get; }
90+
91+
/// <summary>
92+
/// MaxDecimalExponent - 1
93+
/// </summary>
7794
static abstract int MaxFastFloatDecimalExponent { get; }
7895

96+
/// <summary>
97+
/// -Floor(Log5(2^(64 - NormalMantissaBits)))
98+
/// </summary>
7999
static abstract int MinExponentRoundToEven { get; }
100+
101+
/// <summary>
102+
/// Floor(Log5(2^(NormalMantissaBits + 1)))
103+
/// </summary>
80104
static abstract int MaxExponentRoundToEven { get; }
81105

106+
/// <summary>
107+
/// Max(n) when 10^n can be precisely represented
108+
/// </summary>
82109
static abstract int MaxExponentFastPath { get; }
83110
static abstract ulong MaxMantissaFastPath { get; }
84111

85112
static abstract TSelf BitsToFloat(ulong bits);
86113

87114
static abstract ulong FloatToBits(TSelf value);
88115

89-
// Maximum number of digits required to guarantee that any given floating point
90-
// number can roundtrip. Some numbers may require less, but none will require more.
116+
/// <summary>
117+
/// Maximum number of digits required to guarantee that any given floating point
118+
/// number can roundtrip. Some numbers may require less, but none will require more.
119+
/// </summary>
120+
/// <remarks>
121+
/// Ceiling(Log10(2^NormalMantissaBits)) + 1
122+
/// </remarks>
91123
static abstract int MaxRoundTripDigits { get; }
92124

93-
// SinglePrecisionCustomFormat and DoublePrecisionCustomFormat are used to ensure that
94-
// custom format strings return the same string as in previous releases when the format
95-
// would return x digits or less (where x is the value of the corresponding constant).
96-
// In order to support more digits, we would need to update ParseFormatSpecifier to pre-parse
97-
// the format and determine exactly how many digits are being requested and whether they
98-
// represent "significant digits" or "digits after the decimal point".
125+
/// <summary>
126+
/// MaxPrecisionCustomFormat is used to ensure that
127+
/// custom format strings return the same string as in previous releases when the format
128+
/// would return x digits or less (where x is the value of the corresponding constant).
129+
/// In order to support more digits, we would need to update ParseFormatSpecifier to pre-parse
130+
/// the format and determine exactly how many digits are being requested and whether they
131+
/// represent "significant digits" or "digits after the decimal point".
132+
/// </summary>
99133
static abstract int MaxPrecisionCustomFormat { get; }
100134
}
101135

0 commit comments

Comments
 (0)