Skip to content

Commit af1262c

Browse files
Allow the user to control the MaxVectorTBitWidth (#85551)
* Expose DOTNET_MaxVectorTBitWidth and an undocumented DOTNET_PreferredVectorBitWidth * Ensure SPMI keeps a getMaxVectorTBitWidth implementation * Fix the non-xarch vm build * Remove getMaxVectorTBitWidth from the JIT/EE interface, it's no longer needed * Move SetCpuInfo down into the EEJitManager constructor * Remove getXarchCpuInfo in favor of passing `JIT_FLAG_VECTOR512_THROTTLING` * Make sure CORINFO_XARCH_CPU is fully removed * Have ENCODE_VERIFY_TYPE_LAYOUT not fail-fast for Vector<T> size differences * Only encode types containing Vector<T> as check, not verify * Remove changes that were extracted to separate PRs * Ensure that the optimistic flags are a strict superset of the supported flags * Make VectorT128/256/512 proper instruction sets and only allow one to be active at a time * Don't allow avxvnni to be "optimistic" since that brings in avx2 * Ensure we handle HWIntrinsics being disabled * Ensure that the Vector<T> size ISAs are covered by FromInstructionSet * Ensure that `getMaxVectorByteLength` being 0 is handled * Ensure NAOT startup can correctly check for the VectorT size bits * Have BlkOpKindUnroll account for SIMD being disabled * Ensure InstructionSet_VectorT128 is set in the fallback path for PAL_GetJitCpuCapabilityFlags
1 parent 6328b23 commit af1262c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+651
-269
lines changed

src/coreclr/inc/clrconfigvalues.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -346,12 +346,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitRegisterFP, W("JitRegisterFP"), 3, "Control
346346
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitELTHookEnabled, W("JitELTHookEnabled"), 0, "On ARM, setting this will emit Enter/Leave/TailCall callbacks")
347347
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitMemStats, W("JitMemStats"), 0, "Display JIT memory usage statistics")
348348
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100, "Max # of MapSelect's considered for a particular top-level invocation.")
349-
#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
350-
#define EXTERNAL_FeatureSIMD_Default 1
351-
#else // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
352-
#define EXTERNAL_FeatureSIMD_Default 0
353-
#endif // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
354-
RETAIL_CONFIG_DWORD_INFO(INTERNAL_SIMD16ByteOnly, W("SIMD16ByteOnly"), 0, "Limit maximum SIMD vector length to 16 bytes (used by x64_arm64_altjit)")
355349
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynamicMethodDebugInfo"), 0, "Specifies whether debug info should be generated and tracked for dynamic methods")
356350

357351
#ifdef FEATURE_MULTICOREJIT
@@ -734,15 +728,17 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame"
734728
#endif
735729
#endif
736730

731+
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum width, in bits, that Vector<T> is allowed to be. A value less than 128 is treated as the system default.")
732+
737733
//
738734
// Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h
739735
//
740736
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
741737
//TODO: should implement LoongArch64's features.
742738
//TODO-RISCV64-CQ: should implement RISCV64's features.
743-
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
739+
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
744740
#else
745-
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
741+
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
746742
#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
747743

748744
#if defined(TARGET_AMD64) || defined(TARGET_X86)

src/coreclr/inc/corinfoinstructionset.h

Lines changed: 110 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
enum CORINFO_InstructionSet
1515
{
1616
InstructionSet_ILLEGAL = 0,
17-
InstructionSet_NONE = 63,
17+
InstructionSet_NONE = 127,
1818
#ifdef TARGET_ARM64
1919
InstructionSet_ArmBase=1,
2020
InstructionSet_AdvSimd=2,
@@ -29,14 +29,15 @@ enum CORINFO_InstructionSet
2929
InstructionSet_Vector128=11,
3030
InstructionSet_Dczva=12,
3131
InstructionSet_Rcpc=13,
32-
InstructionSet_ArmBase_Arm64=14,
33-
InstructionSet_AdvSimd_Arm64=15,
34-
InstructionSet_Aes_Arm64=16,
35-
InstructionSet_Crc32_Arm64=17,
36-
InstructionSet_Dp_Arm64=18,
37-
InstructionSet_Rdm_Arm64=19,
38-
InstructionSet_Sha1_Arm64=20,
39-
InstructionSet_Sha256_Arm64=21,
32+
InstructionSet_VectorT128=14,
33+
InstructionSet_ArmBase_Arm64=15,
34+
InstructionSet_AdvSimd_Arm64=16,
35+
InstructionSet_Aes_Arm64=17,
36+
InstructionSet_Crc32_Arm64=18,
37+
InstructionSet_Dp_Arm64=19,
38+
InstructionSet_Rdm_Arm64=20,
39+
InstructionSet_Sha1_Arm64=21,
40+
InstructionSet_Sha256_Arm64=22,
4041
#endif // TARGET_ARM64
4142
#ifdef TARGET_AMD64
4243
InstructionSet_X86Base=1,
@@ -71,35 +72,38 @@ enum CORINFO_InstructionSet
7172
InstructionSet_AVX512DQ_VL=30,
7273
InstructionSet_AVX512VBMI=31,
7374
InstructionSet_AVX512VBMI_VL=32,
74-
InstructionSet_X86Base_X64=33,
75-
InstructionSet_SSE_X64=34,
76-
InstructionSet_SSE2_X64=35,
77-
InstructionSet_SSE3_X64=36,
78-
InstructionSet_SSSE3_X64=37,
79-
InstructionSet_SSE41_X64=38,
80-
InstructionSet_SSE42_X64=39,
81-
InstructionSet_AVX_X64=40,
82-
InstructionSet_AVX2_X64=41,
83-
InstructionSet_AES_X64=42,
84-
InstructionSet_BMI1_X64=43,
85-
InstructionSet_BMI2_X64=44,
86-
InstructionSet_FMA_X64=45,
87-
InstructionSet_LZCNT_X64=46,
88-
InstructionSet_PCLMULQDQ_X64=47,
89-
InstructionSet_POPCNT_X64=48,
90-
InstructionSet_AVXVNNI_X64=49,
91-
InstructionSet_MOVBE_X64=50,
92-
InstructionSet_X86Serialize_X64=51,
93-
InstructionSet_AVX512F_X64=52,
94-
InstructionSet_AVX512F_VL_X64=53,
95-
InstructionSet_AVX512BW_X64=54,
96-
InstructionSet_AVX512BW_VL_X64=55,
97-
InstructionSet_AVX512CD_X64=56,
98-
InstructionSet_AVX512CD_VL_X64=57,
99-
InstructionSet_AVX512DQ_X64=58,
100-
InstructionSet_AVX512DQ_VL_X64=59,
101-
InstructionSet_AVX512VBMI_X64=60,
102-
InstructionSet_AVX512VBMI_VL_X64=61,
75+
InstructionSet_VectorT128=33,
76+
InstructionSet_VectorT256=34,
77+
InstructionSet_VectorT512=35,
78+
InstructionSet_X86Base_X64=36,
79+
InstructionSet_SSE_X64=37,
80+
InstructionSet_SSE2_X64=38,
81+
InstructionSet_SSE3_X64=39,
82+
InstructionSet_SSSE3_X64=40,
83+
InstructionSet_SSE41_X64=41,
84+
InstructionSet_SSE42_X64=42,
85+
InstructionSet_AVX_X64=43,
86+
InstructionSet_AVX2_X64=44,
87+
InstructionSet_AES_X64=45,
88+
InstructionSet_BMI1_X64=46,
89+
InstructionSet_BMI2_X64=47,
90+
InstructionSet_FMA_X64=48,
91+
InstructionSet_LZCNT_X64=49,
92+
InstructionSet_PCLMULQDQ_X64=50,
93+
InstructionSet_POPCNT_X64=51,
94+
InstructionSet_AVXVNNI_X64=52,
95+
InstructionSet_MOVBE_X64=53,
96+
InstructionSet_X86Serialize_X64=54,
97+
InstructionSet_AVX512F_X64=55,
98+
InstructionSet_AVX512F_VL_X64=56,
99+
InstructionSet_AVX512BW_X64=57,
100+
InstructionSet_AVX512BW_VL_X64=58,
101+
InstructionSet_AVX512CD_X64=59,
102+
InstructionSet_AVX512CD_VL_X64=60,
103+
InstructionSet_AVX512DQ_X64=61,
104+
InstructionSet_AVX512DQ_VL_X64=62,
105+
InstructionSet_AVX512VBMI_X64=63,
106+
InstructionSet_AVX512VBMI_VL_X64=64,
103107
#endif // TARGET_AMD64
104108
#ifdef TARGET_X86
105109
InstructionSet_X86Base=1,
@@ -134,43 +138,46 @@ enum CORINFO_InstructionSet
134138
InstructionSet_AVX512DQ_VL=30,
135139
InstructionSet_AVX512VBMI=31,
136140
InstructionSet_AVX512VBMI_VL=32,
137-
InstructionSet_X86Base_X64=33,
138-
InstructionSet_SSE_X64=34,
139-
InstructionSet_SSE2_X64=35,
140-
InstructionSet_SSE3_X64=36,
141-
InstructionSet_SSSE3_X64=37,
142-
InstructionSet_SSE41_X64=38,
143-
InstructionSet_SSE42_X64=39,
144-
InstructionSet_AVX_X64=40,
145-
InstructionSet_AVX2_X64=41,
146-
InstructionSet_AES_X64=42,
147-
InstructionSet_BMI1_X64=43,
148-
InstructionSet_BMI2_X64=44,
149-
InstructionSet_FMA_X64=45,
150-
InstructionSet_LZCNT_X64=46,
151-
InstructionSet_PCLMULQDQ_X64=47,
152-
InstructionSet_POPCNT_X64=48,
153-
InstructionSet_AVXVNNI_X64=49,
154-
InstructionSet_MOVBE_X64=50,
155-
InstructionSet_X86Serialize_X64=51,
156-
InstructionSet_AVX512F_X64=52,
157-
InstructionSet_AVX512F_VL_X64=53,
158-
InstructionSet_AVX512BW_X64=54,
159-
InstructionSet_AVX512BW_VL_X64=55,
160-
InstructionSet_AVX512CD_X64=56,
161-
InstructionSet_AVX512CD_VL_X64=57,
162-
InstructionSet_AVX512DQ_X64=58,
163-
InstructionSet_AVX512DQ_VL_X64=59,
164-
InstructionSet_AVX512VBMI_X64=60,
165-
InstructionSet_AVX512VBMI_VL_X64=61,
141+
InstructionSet_VectorT128=33,
142+
InstructionSet_VectorT256=34,
143+
InstructionSet_VectorT512=35,
144+
InstructionSet_X86Base_X64=36,
145+
InstructionSet_SSE_X64=37,
146+
InstructionSet_SSE2_X64=38,
147+
InstructionSet_SSE3_X64=39,
148+
InstructionSet_SSSE3_X64=40,
149+
InstructionSet_SSE41_X64=41,
150+
InstructionSet_SSE42_X64=42,
151+
InstructionSet_AVX_X64=43,
152+
InstructionSet_AVX2_X64=44,
153+
InstructionSet_AES_X64=45,
154+
InstructionSet_BMI1_X64=46,
155+
InstructionSet_BMI2_X64=47,
156+
InstructionSet_FMA_X64=48,
157+
InstructionSet_LZCNT_X64=49,
158+
InstructionSet_PCLMULQDQ_X64=50,
159+
InstructionSet_POPCNT_X64=51,
160+
InstructionSet_AVXVNNI_X64=52,
161+
InstructionSet_MOVBE_X64=53,
162+
InstructionSet_X86Serialize_X64=54,
163+
InstructionSet_AVX512F_X64=55,
164+
InstructionSet_AVX512F_VL_X64=56,
165+
InstructionSet_AVX512BW_X64=57,
166+
InstructionSet_AVX512BW_VL_X64=58,
167+
InstructionSet_AVX512CD_X64=59,
168+
InstructionSet_AVX512CD_VL_X64=60,
169+
InstructionSet_AVX512DQ_X64=61,
170+
InstructionSet_AVX512DQ_VL_X64=62,
171+
InstructionSet_AVX512VBMI_X64=63,
172+
InstructionSet_AVX512VBMI_VL_X64=64,
166173
#endif // TARGET_X86
167174

168175
};
169176

170177
struct CORINFO_InstructionSetFlags
171178
{
172179
private:
173-
static const int32_t FlagsFieldCount = 1;
180+
static const int32_t FlagsFieldCount = 2;
174181
static const int32_t BitsPerFlagsField = sizeof(uint64_t) * 8;
175182
uint64_t _flags[FlagsFieldCount] = { };
176183

@@ -404,6 +411,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
404411
resultflags.RemoveInstructionSet(InstructionSet_Vector64);
405412
if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
406413
resultflags.RemoveInstructionSet(InstructionSet_Vector128);
414+
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
415+
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
407416
#endif // TARGET_ARM64
408417
#ifdef TARGET_AMD64
409418
if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64))
@@ -594,6 +603,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
594603
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
595604
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
596605
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
606+
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
607+
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
608+
if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
609+
resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
610+
if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
611+
resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
597612
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
598613
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
599614
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
@@ -674,6 +689,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
674689
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
675690
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
676691
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
692+
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
693+
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
694+
if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
695+
resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
696+
if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
697+
resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
677698
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
678699
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
679700
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
@@ -738,6 +759,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
738759
return "Dczva";
739760
case InstructionSet_Rcpc :
740761
return "Rcpc";
762+
case InstructionSet_VectorT128 :
763+
return "VectorT128";
741764
#endif // TARGET_ARM64
742765
#ifdef TARGET_AMD64
743766
case InstructionSet_X86Base :
@@ -862,6 +885,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
862885
return "AVX512VBMI_VL";
863886
case InstructionSet_AVX512VBMI_VL_X64 :
864887
return "AVX512VBMI_VL_X64";
888+
case InstructionSet_VectorT128 :
889+
return "VectorT128";
890+
case InstructionSet_VectorT256 :
891+
return "VectorT256";
892+
case InstructionSet_VectorT512 :
893+
return "VectorT512";
865894
#endif // TARGET_AMD64
866895
#ifdef TARGET_X86
867896
case InstructionSet_X86Base :
@@ -928,6 +957,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
928957
return "AVX512VBMI";
929958
case InstructionSet_AVX512VBMI_VL :
930959
return "AVX512VBMI_VL";
960+
case InstructionSet_VectorT128 :
961+
return "VectorT128";
962+
case InstructionSet_VectorT256 :
963+
return "VectorT256";
964+
case InstructionSet_VectorT512 :
965+
return "VectorT512";
931966
#endif // TARGET_X86
932967

933968
default:
@@ -958,6 +993,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
958993
case READYTORUN_INSTRUCTION_Sha256: return InstructionSet_Sha256;
959994
case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics;
960995
case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc;
996+
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
961997
#endif // TARGET_ARM64
962998
#ifdef TARGET_AMD64
963999
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
@@ -989,6 +1025,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
9891025
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
9901026
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
9911027
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
1028+
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
1029+
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
1030+
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
9921031
#endif // TARGET_AMD64
9931032
#ifdef TARGET_X86
9941033
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
@@ -1020,6 +1059,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
10201059
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
10211060
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
10221061
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
1062+
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
1063+
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
1064+
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
10231065
#endif // TARGET_X86
10241066

10251067
default:

src/coreclr/inc/jiteeversionguid.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
4343
#define GUID_DEFINED
4444
#endif // !GUID_DEFINED
4545

46-
constexpr GUID JITEEVersionIdentifier = { /* d4414be1-70e4-46ac-8866-ca3a6c2f8422 */
47-
0xd4414be1,
48-
0x70e4,
49-
0x46ac,
50-
{0x88, 0x66, 0xca, 0x3a, 0x6c, 0x2f, 0x84, 0x22}
46+
constexpr GUID JITEEVersionIdentifier = { /* fda2f9dd-6b3e-4ecd-a7b8-79e5edf1f072 */
47+
0xfda2f9dd,
48+
0x6b3e,
49+
0x4ecd,
50+
{0xa7, 0xb8, 0x79, 0xe5, 0xed, 0xf1, 0xf0, 0x72}
5151
};
5252

5353
//////////////////////////////////////////////////////////////////////////////////////////////////////////

src/coreclr/inc/readytoruninstructionset.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ enum ReadyToRunInstructionSet
4747
READYTORUN_INSTRUCTION_Avx512DQ_VL=36,
4848
READYTORUN_INSTRUCTION_Avx512Vbmi=37,
4949
READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38,
50+
READYTORUN_INSTRUCTION_VectorT128=39,
51+
READYTORUN_INSTRUCTION_VectorT256=40,
52+
READYTORUN_INSTRUCTION_VectorT512=41,
5053

5154
};
5255

src/coreclr/jit/codegenxarch.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3401,15 +3401,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
34013401
assert(srcOffset < (INT32_MAX - static_cast<int>(size)));
34023402
assert(dstOffset < (INT32_MAX - static_cast<int>(size)));
34033403

3404-
if (size >= XMM_REGSIZE_BYTES)
3404+
// Get the largest SIMD register available if the size is large enough
3405+
unsigned regSize = compiler->roundDownSIMDSize(size);
3406+
3407+
if ((size >= regSize) && (regSize > 0))
34053408
{
34063409
regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT);
34073410

34083411
instruction simdMov = simdUnalignedMovIns();
34093412

3410-
// Get the largest SIMD register available if the size is large enough
3411-
unsigned regSize = compiler->roundDownSIMDSize(size);
3412-
34133413
auto emitSimdMovs = [&]() {
34143414
if (srcLclNum != BAD_VAR_NUM)
34153415
{

0 commit comments

Comments
 (0)