Skip to content

Commit 4481298

Browse files
Wraith2radekdoulik
authored andcommitted
Add xarch blsi (dotnet#66193)
* implement blsi * add bmi intrinsics test projects * add using System for Console.
1 parent 5f3e8ce commit 4481298

File tree

6 files changed

+169
-1
lines changed

6 files changed

+169
-1
lines changed

src/coreclr/jit/instrsxarch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BA
593593
// BMI1
594594
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
595595
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
596-
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
596+
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
597597
INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
598598
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
599599
INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract

src/coreclr/jit/lower.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ class Lowering final : public Phase
348348
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
349349
void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
350350
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
351+
GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode);
351352
GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
352353
#elif defined(TARGET_ARM64)
353354
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);

src/coreclr/jit/lowerxarch.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,12 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
187187
{
188188
return replacementNode->gtNext;
189189
}
190+
191+
replacementNode = TryLowerAndOpToExtractLowestSetBit(binOp);
192+
if (replacementNode != nullptr)
193+
{
194+
return replacementNode->gtNext;
195+
}
190196
}
191197
#endif
192198

@@ -3823,6 +3829,84 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
38233829
return blsrNode;
38243830
}
38253831

3832+
//----------------------------------------------------------------------------------------------
3833+
// Lowering::TryLowerAndOpToExtractLowestSetIsolatedBit: Lowers a tree AND(X, NEG(X)) to
3834+
// HWIntrinsic::ExtractLowestSetBit
3835+
//
3836+
// Arguments:
3837+
// andNode - GT_AND node of integral type
3838+
//
3839+
// Return Value:
3840+
// Returns the replacement node if one is created else nullptr indicating no replacement
3841+
//
3842+
// Notes:
3843+
// Performs containment checks on the replacement node if one is created
3844+
GenTree* Lowering::TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode)
3845+
{
3846+
GenTree* opNode = nullptr;
3847+
GenTree* negNode = nullptr;
3848+
if (andNode->gtGetOp1()->OperIs(GT_NEG))
3849+
{
3850+
negNode = andNode->gtGetOp1();
3851+
opNode = andNode->gtGetOp2();
3852+
}
3853+
else if (andNode->gtGetOp2()->OperIs(GT_NEG))
3854+
{
3855+
negNode = andNode->gtGetOp2();
3856+
opNode = andNode->gtGetOp1();
3857+
}
3858+
3859+
if (opNode == nullptr)
3860+
{
3861+
return nullptr;
3862+
}
3863+
3864+
GenTree* negOp = negNode->AsUnOp()->gtGetOp1();
3865+
if (!negOp->OperIs(GT_LCL_VAR) || !opNode->OperIs(GT_LCL_VAR) ||
3866+
(negOp->AsLclVar()->GetLclNum() != opNode->AsLclVar()->GetLclNum()))
3867+
{
3868+
return nullptr;
3869+
}
3870+
3871+
NamedIntrinsic intrinsic;
3872+
if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64))
3873+
{
3874+
intrinsic = NamedIntrinsic::NI_BMI1_X64_ExtractLowestSetBit;
3875+
}
3876+
else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))
3877+
{
3878+
intrinsic = NamedIntrinsic::NI_BMI1_ExtractLowestSetBit;
3879+
}
3880+
else
3881+
{
3882+
return nullptr;
3883+
}
3884+
3885+
LIR::Use use;
3886+
if (!BlockRange().TryGetUse(andNode, &use))
3887+
{
3888+
return nullptr;
3889+
}
3890+
3891+
GenTreeHWIntrinsic* blsiNode = comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), opNode, intrinsic);
3892+
3893+
JITDUMP("Lower: optimize AND(X, NEG(X)))\n");
3894+
DISPNODE(andNode);
3895+
JITDUMP("to:\n");
3896+
DISPNODE(blsiNode);
3897+
3898+
use.ReplaceWith(blsiNode);
3899+
3900+
BlockRange().InsertBefore(andNode, blsiNode);
3901+
BlockRange().Remove(andNode);
3902+
BlockRange().Remove(negNode);
3903+
BlockRange().Remove(negOp);
3904+
3905+
ContainCheckHWIntrinsic(blsiNode);
3906+
3907+
return blsiNode;
3908+
}
3909+
38263910
//----------------------------------------------------------------------------------------------
38273911
// Lowering::TryLowerAndOpToAndNot: Lowers a tree AND(X, NOT(Y)) to HWIntrinsic::AndNot
38283912
//
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
using System;
2+
using System.Runtime.CompilerServices;
3+
4+
namespace BMI1Intrinsics
5+
{
6+
internal class Program
7+
{
8+
private static int _errorCode = 100;
9+
10+
static int Main(string[] args)
11+
{
12+
// bmi1 expression are folded to to hwintrinsics that return identical results
13+
14+
var values = new (uint input1, uint input2, uint andnExpected, uint blsiExpected, uint blsrExpected, uint blmskExpected)[] {
15+
(0, 0, 0, 0 ,0 ,0),
16+
(1, 0, 1, 1 ,0 ,0xfffffffe),
17+
(uint.MaxValue / 2, 0, 0x7fffffff, 0x1 ,0x7ffffffe ,0xfffffffe),
18+
((uint.MaxValue / 2) - 1, 0, 0x7FFFFFFE, 2 ,0x7FFFFFFC ,0xFFFFFFFC),
19+
((uint.MaxValue / 2) + 1, 0, 0x80000000, 0x80000000 ,0 ,0),
20+
(uint.MaxValue - 1, 0, 0xFFFFFFFE, 2 ,0xFFFFFFFC ,0xFFFFFFFC),
21+
(uint.MaxValue , 0, 0xFFFFFFFF, 1 ,0xFFFFFFFE ,0xFFFFFFFE),
22+
(0xAAAAAAAA,0xAAAAAAAA,0,2,0xAAAAAAA8,0xFFFFFFFC),
23+
(0xAAAAAAAA,0x55555555,0xAAAAAAAA,2,0xAAAAAAA8,0xFFFFFFFC),
24+
};
25+
26+
foreach (var value in values)
27+
{
28+
Test(value.input1, AndNot(value.input1, value.input2), value.andnExpected, nameof(AndNot));
29+
Test(value.input1, ExtractLowestSetIsolatedBit(value.input1), value.blsiExpected, nameof(ExtractLowestSetIsolatedBit));
30+
Test(value.input1, ResetLowestSetBit(value.input1), value.blsrExpected, nameof(ResetLowestSetBit));
31+
Test(value.input1, GetMaskUpToLowestSetBit(value.input1), value.blmskExpected, nameof(GetMaskUpToLowestSetBit));
32+
}
33+
34+
return _errorCode;
35+
}
36+
37+
[MethodImpl(MethodImplOptions.NoInlining)]
38+
private static uint AndNot(uint x, uint y) => x & (~y); // bmi1 andn
39+
40+
[MethodImpl(MethodImplOptions.NoInlining)]
41+
private static uint ExtractLowestSetIsolatedBit(uint x) => (uint)(x & (-x)); // bmi1 blsi
42+
43+
[MethodImpl(MethodImplOptions.NoInlining)]
44+
private static uint ResetLowestSetBit(uint x) => x & (x - 1); // bmi1 blsr
45+
46+
[MethodImpl(MethodImplOptions.NoInlining)]
47+
private static uint GetMaskUpToLowestSetBit(uint x) => (uint)(x ^ (-x)); // bmi1 blmsk
48+
49+
[MethodImpl(MethodImplOptions.NoInlining)]
50+
private static void Test(uint input, uint output, uint expected,string callerName)
51+
{
52+
if (output != expected)
53+
{
54+
Console.WriteLine($"{callerName} failed.");
55+
Console.WriteLine($"Input: {input:X}");
56+
Console.WriteLine($"Output: {output:X}");
57+
Console.WriteLine($"Expected: {expected:X}");
58+
59+
_errorCode++;
60+
}
61+
}
62+
}
63+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<OutputType>Exe</OutputType>
4+
<DebugType>None</DebugType>
5+
<Optimize>True</Optimize>
6+
</PropertyGroup>
7+
<ItemGroup>
8+
<Compile Include="BMI1Intrinsics.cs" />
9+
</ItemGroup>
10+
</Project>
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<OutputType>Exe</OutputType>
4+
<DebugType>None</DebugType>
5+
<Optimize />
6+
</PropertyGroup>
7+
<ItemGroup>
8+
<Compile Include="BMI1Intrinsics.cs" />
9+
</ItemGroup>
10+
</Project>

0 commit comments

Comments
 (0)