Skip to content

Commit 8392660

Browse files
committed
Consider instruction input size when considering embedded mask optimization
Some instructions, such as instructions `vinserti32x4`, `vinserti32x8`, and `vinserti64x2`, have specific mask sizes assumed, which don't match the simd base type size of the intrinsics which generate them. Check the instruction "input size" and reject embedded mask optimization if it is mismatched. There are a few diffs in the HWINTRINSIC test cases, where this optimization doesn't kick in anymore. The diffs look correct. Fixes #114921
1 parent 9e490ef commit 8392660

File tree

3 files changed

+119
-5
lines changed

3 files changed

+119
-5
lines changed

src/coreclr/jit/lowerxarch.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10573,7 +10573,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
1057310573
//
1057410574
// The managed API surface we expose doesn't directly support TYP_MASK
1057510575
// and we don't directly expose overloads for APIs like `vaddps` which
10576-
// support embedded masking. Instead, we have decide to do pattern
10576+
// support embedded masking. Instead, we have decided to do pattern
1057710577
// recognition over the relevant ternary select APIs which functionally
1057810578
// execute `cond ? selectTrue : selectFalse` on a per element basis.
1057910579
//
@@ -10598,14 +10598,37 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
1059810598
// TODO-AVX512-CQ: Ensure we can support embedded operations on RMW intrinsics
1059910599
isEmbeddedMask = false;
1060010600
}
10601+
else
10602+
{
10603+
uint32_t maskSize = genTypeSize(simdBaseType);
10604+
var_types op2SimdBaseType = op2->AsHWIntrinsic()->GetSimdBaseType();
10605+
uint32_t operSize = genTypeSize(op2SimdBaseType);
10606+
10607+
if (maskSize != operSize)
10608+
{
10609+
isEmbeddedMask = false;
10610+
}
10611+
else
10612+
{
10613+
// Check the op2 instruction input size to see if it's the same as the
10614+
// mask size.
10615+
10616+
NamedIntrinsic op2IntrinsicId = op2->AsHWIntrinsic()->GetHWIntrinsicId();
10617+
instruction ins =
10618+
HWIntrinsicInfo::lookupIns(op2IntrinsicId, op2SimdBaseType);
10619+
assert(ins != INS_invalid);
10620+
unsigned inputSize = CodeGenInterface::instInputSize(ins);
10621+
if (maskSize != inputSize)
10622+
{
10623+
isEmbeddedMask = false;
10624+
}
10625+
}
10626+
}
1060110627
}
1060210628

1060310629
if (isEmbeddedMask)
1060410630
{
10605-
uint32_t maskSize = genTypeSize(simdBaseType);
10606-
uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType());
10607-
10608-
if ((maskSize == operSize) && IsInvariantInRange(op2, node))
10631+
if (IsInvariantInRange(op2, node))
1060910632
{
1061010633
MakeSrcContained(node, op2);
1061110634
op2->MakeEmbMaskOp();
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
//
4+
// Generated by Fuzzlyn v2.5 on 2025-04-22 17:32:36
5+
// Run on X64 Windows
6+
// Seed: 7915602115310323123-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
7+
// Reduced from 123.1 KiB to 0.5 KiB in 00:00:46
8+
// Debug: Outputs <0, 0, 0, 0, 0, 0, 0, 0>
9+
// Release: Outputs <0, 0, 0, 0, -1, -1, -1, -1>
10+
11+
using System;
12+
using System.Numerics;
13+
using System.Runtime.Intrinsics;
14+
using System.Runtime.Intrinsics.X86;
15+
using Xunit;
16+
17+
public class Runtime_114921
18+
{
19+
public static Vector512<long> s_4 = Vector512.Create<long>(-1);
20+
public static Vector128<long> s_8;
21+
22+
[Fact]
23+
public static void Problem1()
24+
{
25+
if (Avx512F.IsSupported)
26+
{
27+
var vr1 = Vector512.Create<long>(0);
28+
s_4 = Avx512F.BlendVariable(s_4, Avx512F.InsertVector128(vr1, s_8, 0), s_4);
29+
System.Console.WriteLine(s_4);
30+
Assert.Equal(Vector512.Create(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), s_4);
31+
}
32+
}
33+
}
34+
35+
// Generated by Fuzzlyn v2.5 on 2025-04-22 17:37:13
36+
// Run on X64 Windows
37+
// Seed: 14731447107126414231-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
38+
// Reduced from 217.7 KiB to 1.0 KiB in 00:02:50
39+
// Debug: Outputs <9223372036854775807, 0, 0, 0, 0, 0, 0, 0>
40+
// Release: Outputs <4294967295, 0, 0, 0, 0, 0, 0, 0>
41+
42+
public struct S2
43+
{
44+
public Vector128<long> F0;
45+
public S2(Vector128<long> f0) : this()
46+
{
47+
F0 = f0;
48+
}
49+
}
50+
51+
public class Runtime_114921_2
52+
{
53+
public static IRuntime s_rt;
54+
55+
[Fact]
56+
public static void Problem2()
57+
{
58+
if (Avx512F.IsSupported)
59+
{
60+
s_rt = new Runtime();
61+
long vr6 = default(long);
62+
S2 vr7 = new S2(Vector128.CreateScalar(9223372036854775807L));
63+
Vector512<long> vr14 = default(Vector512<long>);
64+
var vr9 = Vector512.Create<long>(vr6);
65+
var vr10 = vr7.F0;
66+
var vr11 = Avx512F.InsertVector128(vr9, vr10, 0);
67+
var vr12 = Vector512.CreateScalar(-9223372036854775808L);
68+
var vr13 = Avx512F.BlendVariable(vr14, vr11, vr12);
69+
s_rt.WriteLine(vr13);
70+
Assert.Equal(Vector512.Create(9223372036854775807L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), vr13);
71+
}
72+
}
73+
}
74+
75+
public interface IRuntime
76+
{
77+
void WriteLine<T>(T value);
78+
}
79+
80+
public class Runtime : IRuntime
81+
{
82+
public void WriteLine<T>(T value) => System.Console.WriteLine(value);
83+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<Optimize>True</Optimize>
4+
</PropertyGroup>
5+
<ItemGroup>
6+
<Compile Include="$(MSBuildProjectName).cs" />
7+
</ItemGroup>
8+
</Project>

0 commit comments

Comments
 (0)