Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit b4f99f2

Browse files
grant-djkotas
authored andcommitted
Consolidate implementation of Rotate and PopCount (#22584)
* Perf: BitOps.LeadingZeroCount * Remove redundant MSIL cast, conv.u8 * Use local functions for SoftwareFallback * Target BIT32/64
1 parent 87c28eb commit b4f99f2

File tree

8 files changed

+155
-72
lines changed

8 files changed

+155
-72
lines changed

src/System.Private.CoreLib/shared/System/BitOps.cs

Lines changed: 136 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313

1414
namespace System
1515
{
16+
/// <summary>
17+
/// Utility methods for intrinsic bit-twiddling operations.
18+
/// The methods use hardware intrinsics when available on the underlying platform,
19+
/// otherwise they use optimized software fallbacks.
20+
/// </summary>
1621
internal static class BitOps
1722
{
1823
// C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants)
@@ -53,11 +58,11 @@ public static int TrailingZeroCount(uint value)
5358
{
5459
if (Bmi1.IsSupported)
5560
{
56-
// Note that TZCNT contract specifies 0->32
61+
// TZCNT contract is 0->32
5762
return (int)Bmi1.TrailingZeroCount(value);
5863
}
5964

60-
// Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
65+
// Unguarded fallback contract is 0->0
6166
if (value == 0)
6267
{
6368
return 32;
@@ -67,8 +72,8 @@ public static int TrailingZeroCount(uint value)
6772
return Unsafe.AddByteOffset(
6873
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u
6974
ref MemoryMarshal.GetReference(s_TrailingZeroCountDeBruijn),
70-
// long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
71-
(IntPtr)(int)(((uint)((value & -value) * 0x077CB531u)) >> 27)); // shift over long also expensive on 32-bit
75+
// uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
76+
(IntPtr)(int)(((value & (uint)-(int)value) * 0x077CB531u) >> 27)); // Multi-cast mitigates redundant conv.u8
7277
}
7378

7479
/// <summary>
@@ -90,7 +95,7 @@ public static int TrailingZeroCount(ulong value)
9095
{
9196
if (Bmi1.X64.IsSupported)
9297
{
93-
// Note that TZCNT contract specifies 0->64
98+
// TZCNT contract is 0->64
9499
return (int)Bmi1.X64.TrailingZeroCount(value);
95100
}
96101

@@ -114,17 +119,17 @@ public static int LeadingZeroCount(uint value)
114119
{
115120
if (Lzcnt.IsSupported)
116121
{
117-
// Note that LZCNT contract specifies 0->32
122+
// LZCNT contract is 0->32
118123
return (int)Lzcnt.LeadingZeroCount(value);
119124
}
120125

121-
// Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
126+
// Unguarded fallback contract is 0->31
122127
if (value == 0)
123128
{
124129
return 32;
125130
}
126131

127-
return 31 - Log2(value);
132+
return 31 - Log2SoftwareFallback(value);
128133
}
129134

130135
/// <summary>
@@ -137,7 +142,7 @@ public static int LeadingZeroCount(ulong value)
137142
{
138143
if (Lzcnt.X64.IsSupported)
139144
{
140-
// Note that LZCNT contract specifies 0->64
145+
// LZCNT contract is 0->64
141146
return (int)Lzcnt.X64.LeadingZeroCount(value);
142147
}
143148

@@ -168,30 +173,32 @@ public static int Log2(uint value)
168173
// 1000.. 0 31-0 31
169174
if (Lzcnt.IsSupported)
170175
{
171-
// Enforce conventional contract 0->0 (since Log(0) is undefined)
176+
// Enforce conventional contract 0->0 (Log(0) is undefined)
172177
if (value == 0)
173178
{
174179
return 0;
175180
}
176181

177-
// Note that LZCNT contract specifies 0->32
182+
// LZCNT contract is 0->32
178183
return 31 - (int)Lzcnt.LeadingZeroCount(value);
179184
}
180185

181-
// Already has contract 0->0, without branching
186+
// Fallback contract is 0->0
182187
return Log2SoftwareFallback(value);
183188
}
184189

185190
/// <summary>
186191
/// Returns the integer (floor) log of the specified value, base 2.
187192
/// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
188-
/// Does not incur branching.
193+
/// Does not directly use any hardware intrinsics, nor does it incur branching.
189194
/// </summary>
190195
/// <param name="value">The value.</param>
191196
private static int Log2SoftwareFallback(uint value)
192197
{
193198
// No AggressiveInlining due to large method size
199+
// Has conventional contract 0->0 (Log(0) is undefined)
194200

201+
// Fill trailing zeros with ones, eg 00010010 becomes 00011111
195202
value |= value >> 01;
196203
value |= value >> 02;
197204
value |= value >> 04;
@@ -202,7 +209,7 @@ private static int Log2SoftwareFallback(uint value)
202209
return Unsafe.AddByteOffset(
203210
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_1100_0100_1010_1100_1101_1101u
204211
ref MemoryMarshal.GetReference(s_Log2DeBruijn),
205-
// long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
212+
// uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
206213
(IntPtr)(int)((value * 0x07C4ACDDu) >> 27));
207214
}
208215

@@ -216,13 +223,13 @@ public static int Log2(ulong value)
216223
{
217224
if (Lzcnt.X64.IsSupported)
218225
{
219-
// Enforce conventional contract 0->0 (since Log(0) is undefined)
226+
// Enforce conventional contract 0->0 (Log(0) is undefined)
220227
if (value == 0)
221228
{
222229
return 0;
223230
}
224231

225-
// Note that LZCNT contract specifies 0->64
232+
// LZCNT contract is 0->64
226233
return 63 - (int)Lzcnt.X64.LeadingZeroCount(value);
227234
}
228235

@@ -235,5 +242,118 @@ public static int Log2(ulong value)
235242

236243
return 32 + Log2(hi);
237244
}
245+
246+
/// <summary>
247+
/// Rotates the specified value left by the specified number of bits.
248+
/// Similar in behavior to the x86 instruction ROL.
249+
/// </summary>
250+
/// <param name="value">The value to rotate.</param>
251+
/// <param name="offset">The number of bits to rotate by.
252+
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
253+
/// <returns>The rotated value.</returns>
254+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
255+
public static uint RotateLeft(uint value, int offset)
256+
=> (value << offset) | (value >> (32 - offset));
257+
258+
/// <summary>
259+
/// Rotates the specified value left by the specified number of bits.
260+
/// Similar in behavior to the x86 instruction ROL.
261+
/// </summary>
262+
/// <param name="value">The value to rotate.</param>
263+
/// <param name="offset">The number of bits to rotate by.
264+
/// Any value outside the range [0..63] is treated as congruent mod 64.</param>
265+
/// <returns>The rotated value.</returns>
266+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
267+
public static ulong RotateLeft(ulong value, int offset)
268+
=> (value << offset) | (value >> (64 - offset));
269+
270+
/// <summary>
271+
/// Rotates the specified value right by the specified number of bits.
272+
/// Similar in behavior to the x86 instruction ROR.
273+
/// </summary>
274+
/// <param name="value">The value to rotate.</param>
275+
/// <param name="offset">The number of bits to rotate by.
276+
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
277+
/// <returns>The rotated value.</returns>
278+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
279+
public static uint RotateRight(uint value, int offset)
280+
=> (value >> offset) | (value << (32 - offset));
281+
282+
/// <summary>
283+
/// Rotates the specified value right by the specified number of bits.
284+
/// Similar in behavior to the x86 instruction ROR.
285+
/// </summary>
286+
/// <param name="value">The value to rotate.</param>
287+
/// <param name="offset">The number of bits to rotate by.
288+
/// Any value outside the range [0..63] is treated as congruent mod 64.</param>
289+
/// <returns>The rotated value.</returns>
290+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
291+
public static ulong RotateRight(ulong value, int offset)
292+
=> (value >> offset) | (value << (64 - offset));
293+
294+
/// <summary>
295+
/// Returns the population count (number of bits set) of a mask.
296+
/// Similar in behavior to the x86 instruction POPCNT.
297+
/// </summary>
298+
/// <param name="value">The value.</param>
299+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
300+
public static int PopCount(uint value)
301+
{
302+
if (Popcnt.IsSupported)
303+
{
304+
return (int)Popcnt.PopCount(value);
305+
}
306+
307+
return SoftwareFallback(value);
308+
309+
int SoftwareFallback(uint v)
310+
{
311+
const uint c1 = 0x_55555555u;
312+
const uint c2 = 0x_33333333u;
313+
const uint c3 = 0x_0F0F0F0Fu;
314+
const uint c4 = 0x_01010101u;
315+
316+
v = v - ((v >> 1) & c1);
317+
v = (v & c2) + ((v >> 2) & c2);
318+
v = (((v + (v >> 4)) & c3) * c4) >> 24;
319+
320+
return (int)v;
321+
}
322+
}
323+
324+
/// <summary>
325+
/// Returns the population count (number of bits set) of a mask.
326+
/// Similar in behavior to the x86 instruction POPCNT.
327+
/// </summary>
328+
/// <param name="value">The value.</param>
329+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
330+
public static int PopCount(ulong value)
331+
{
332+
if (Popcnt.X64.IsSupported)
333+
{
334+
return (int)Popcnt.X64.PopCount(value);
335+
}
336+
337+
#if BIT32
338+
return PopCount((uint)value) // lo
339+
+ PopCount((uint)(value >> 32)); // hi
340+
#else
341+
return SoftwareFallback(value);
342+
343+
int SoftwareFallback(ulong v)
344+
{
345+
const ulong c1 = 0x_55555555_55555555ul;
346+
const ulong c2 = 0x_33333333_33333333ul;
347+
const ulong c3 = 0x_0F0F0F0F_0F0F0F0Ful;
348+
const ulong c4 = 0x_01010101_01010101ul;
349+
350+
v = v - ((v >> 1) & c1);
351+
v = (v & c2) + ((v >> 2) & c2);
352+
v = (((v + (v >> 4)) & c3) * c4) >> 56;
353+
354+
return (int)v;
355+
}
356+
#endif
357+
}
238358
}
239359
}

src/System.Private.CoreLib/shared/System/Buffers/Binary/Reader.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,8 @@ public static uint ReverseEndianness(uint value)
105105
// Testing shows that throughput increases if the AND
106106
// is performed before the ROL / ROR.
107107

108-
uint mask_xx_zz = (value & 0x00FF00FFU);
109-
uint mask_ww_yy = (value & 0xFF00FF00U);
110-
return ((mask_xx_zz >> 8) | (mask_xx_zz << 24))
111-
+ ((mask_ww_yy << 8) | (mask_ww_yy >> 24));
108+
return BitOps.RotateRight(value & 0x00FF00FFu, 8) // xx zz
109+
+ BitOps.RotateLeft(value & 0xFF00FF00u, 8); // ww yy
112110
}
113111

114112
/// <summary>

src/System.Private.CoreLib/shared/System/Buffers/Text/FormattingHelpers.CountDigits.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,9 @@ public static int CountDigits(uint value)
103103
[MethodImpl(MethodImplOptions.AggressiveInlining)]
104104
public static int CountHexDigits(ulong value)
105105
{
106-
int right = 64 - BitOps.LeadingZeroCount(value | 1);
107-
return (right + 3) >> 2;
106+
return (64 - BitOps.LeadingZeroCount(value | 1) + 3) >> 2;
108107
}
109108

110-
111109
// Counts the number of trailing '0' digits in a decimal number.
112110
// e.g., value = 0 => retVal = 0, valueWithoutTrailingZeros = 0
113111
// value = 1234 => retVal = 0, valueWithoutTrailingZeros = 1234

src/System.Private.CoreLib/shared/System/Diagnostics/Tracing/EventSource.cs

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,7 +1579,7 @@ private void Drain()
15791579
{
15801580
for (int i = 16; i != 80; i++)
15811581
{
1582-
this.w[i] = Rol1((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]));
1582+
this.w[i] = BitOps.RotateLeft((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]), 1);
15831583
}
15841584

15851585
unchecked
@@ -1594,28 +1594,28 @@ private void Drain()
15941594
{
15951595
const uint k = 0x5A827999;
15961596
uint f = (b & c) | ((~b) & d);
1597-
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
1597+
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
15981598
}
15991599

16001600
for (int i = 20; i != 40; i++)
16011601
{
16021602
uint f = b ^ c ^ d;
16031603
const uint k = 0x6ED9EBA1;
1604-
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
1604+
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
16051605
}
16061606

16071607
for (int i = 40; i != 60; i++)
16081608
{
16091609
uint f = (b & c) | (b & d) | (c & d);
16101610
const uint k = 0x8F1BBCDC;
1611-
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
1611+
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
16121612
}
16131613

16141614
for (int i = 60; i != 80; i++)
16151615
{
16161616
uint f = b ^ c ^ d;
16171617
const uint k = 0xCA62C1D6;
1618-
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
1618+
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
16191619
}
16201620

16211621
this.w[80] += a;
@@ -1628,21 +1628,6 @@ private void Drain()
16281628
this.length += 512; // 64 bytes == 512 bits
16291629
this.pos = 0;
16301630
}
1631-
1632-
private static uint Rol1(uint input)
1633-
{
1634-
return (input << 1) | (input >> 31);
1635-
}
1636-
1637-
private static uint Rol5(uint input)
1638-
{
1639-
return (input << 5) | (input >> 27);
1640-
}
1641-
1642-
private static uint Rol30(uint input)
1643-
{
1644-
return (input << 30) | (input >> 2);
1645-
}
16461631
}
16471632

16481633
private static Guid GenerateGuidFromName(string name)

src/System.Private.CoreLib/shared/System/HashCode.cs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -252,10 +252,6 @@ public static int Combine<T1, T2, T3, T4, T5, T6, T7, T8>(T1 value1, T2 value2,
252252
return (int)hash;
253253
}
254254

255-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
256-
private static uint Rol(uint value, int count)
257-
=> (value << count) | (value >> (32 - count));
258-
259255
[MethodImpl(MethodImplOptions.AggressiveInlining)]
260256
private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4)
261257
{
@@ -268,23 +264,19 @@ private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v
268264
[MethodImpl(MethodImplOptions.AggressiveInlining)]
269265
private static uint Round(uint hash, uint input)
270266
{
271-
hash += input * Prime2;
272-
hash = Rol(hash, 13);
273-
hash *= Prime1;
274-
return hash;
267+
return BitOps.RotateLeft(hash + input * Prime2, 13) * Prime1;
275268
}
276269

277270
[MethodImpl(MethodImplOptions.AggressiveInlining)]
278271
private static uint QueueRound(uint hash, uint queuedValue)
279272
{
280-
hash += queuedValue * Prime3;
281-
return Rol(hash, 17) * Prime4;
273+
return BitOps.RotateLeft(hash + queuedValue * Prime3, 17) * Prime4;
282274
}
283275

284276
[MethodImpl(MethodImplOptions.AggressiveInlining)]
285277
private static uint MixState(uint v1, uint v2, uint v3, uint v4)
286278
{
287-
return Rol(v1, 1) + Rol(v2, 7) + Rol(v3, 12) + Rol(v4, 18);
279+
return BitOps.RotateLeft(v1, 1) + BitOps.RotateLeft(v2, 7) + BitOps.RotateLeft(v3, 12) + BitOps.RotateLeft(v4, 18);
288280
}
289281

290282
private static uint MixEmptyState()

0 commit comments

Comments
 (0)