Skip to content

Adding zmmStateSupport and AVX512F, AVX512CD, AVX512BW, AVX512DQ and AVX512VL ISAs. #74113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,14 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW, W("EnableAVX512BW"), 1, "Allows AVX512BW+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1, "Allows AVX512BW_VL+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD, W("EnableAVX512CD"), 1, "Allows AVX512CD+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ, W("EnableAVX512DQ"), 1, "Allows AVX512DQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 1, "Allows AVX512F+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVX VNNI+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled")
Expand Down
252 changes: 214 additions & 38 deletions src/coreclr/inc/corinfoinstructionset.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 0cd8b9d4-04f4-45a7-b16b-7f24b7c0a454 */
0x0cd8b9d4,
0x04f4,
0x45a7,
{0xb1, 0x6b, 0x7f, 0x24, 0xb7, 0xc0, 0xa4, 0x54}
constexpr GUID JITEEVersionIdentifier = { /* eb8352bd-0a13-4b5b-badb-58f9ecc40c44 */
0xeb8352bd,
0x0a13,
0x4b5b,
{0xba, 0xdb, 0x58, 0xf9, 0xec, 0xc4, 0x0c, 0x44}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Rcpc=26,
READYTORUN_INSTRUCTION_Movbe=27,
READYTORUN_INSTRUCTION_X86Serialize=28,
READYTORUN_INSTRUCTION_Avx512F=29,
READYTORUN_INSTRUCTION_Avx512F_VL=30,
READYTORUN_INSTRUCTION_Avx512BW=31,
READYTORUN_INSTRUCTION_Avx512BW_VL=32,
READYTORUN_INSTRUCTION_Avx512CD=33,
READYTORUN_INSTRUCTION_Avx512CD_VL=34,
READYTORUN_INSTRUCTION_Avx512DQ=35,
READYTORUN_INSTRUCTION_Avx512DQ_VL=36,

};

Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,14 @@ CONFIG_INTEGER(EnableHWIntrinsic, W("EnableHWIntrinsic"), 1) // Allows Base+ h
CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Allows AES+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Allows AVX+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1) // Allows AVX2+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512BW, W("EnableAVX512BW"), 1) // Allows AVX512BW+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512CD, W("EnableAVX512CD"), 1) // Allows AVX512CD+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1) // Allows AVX512CD+ AVX512VL+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512DQ, W("EnableAVX512DQ"), 1) // Allows AVX512DQ+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1) // Allows AVX512DQ+ AVX512VL+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512F, W("EnableAVX512F"), 1) // Allows AVX512F+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVX512F_VL, W("EnableAVX512F_VL"), 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Allows AVX VNNI+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Allows BMI1+ hardware intrinsics to be disabled
CONFIG_INTEGER(EnableBMI2, W("EnableBMI2"), 1) // Allows BMI2+ hardware intrinsics to be disabled
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/nativeaot/Runtime/IntrinsicConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ enum XArchIntrinsicConstants
XArchIntrinsicConstants_Lzcnt = 0x1000,
XArchIntrinsicConstants_AvxVnni = 0x2000,
XArchIntrinsicConstants_Movbe = 0x4000,
XArchIntrinsicConstants_Avx512f = 0x8000,
XArchIntrinsicConstants_Avx512f_vl = 0x10000,
XArchIntrinsicConstants_Avx512bw = 0x20000,
XArchIntrinsicConstants_Avx512bw_vl = 0x40000,
XArchIntrinsicConstants_Avx512cd = 0x80000,
XArchIntrinsicConstants_Avx512cd_vl = 0x100000,
XArchIntrinsicConstants_Avx512dq = 0x200000,
XArchIntrinsicConstants_Avx512dq_vl = 0x400000,
};
#endif //HOST_X86 || HOST_AMD64

Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/nativeaot/Runtime/PalRedhawk.h
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,9 @@ REDHAWK_PALIMPORT void __cpuidex(int cpuInfo[4], int function_id, int subFunctio
#endif

REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport();
REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI avx512StateSupport();
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled();
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvx512Enabled();

#endif // defined(HOST_X86) || defined(HOST_AMD64)

Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/nativeaot/Runtime/amd64/GC.asm
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,19 @@ LEAF_ENTRY xmmYmmStateSupport, _TEXT
ret
LEAF_END xmmYmmStateSupport, _TEXT

;; extern "C" DWORD __stdcall avx512StateSupport();
LEAF_ENTRY avx512StateSupport, _TEXT
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be hooked up here:

bool DetectCPUFeatures()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the AVX512 ISA checks need to be mirrored there as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And the 'end' was in the wrong place :) . Have added the checks in startup.cpp

mov ecx, 0 ; Specify xcr0
xgetbv ; result in EDX:EAX
and eax, 0E6H
cmp eax, 0E6H ; check OS has enabled XMM, YMM and ZMM state support
jne not_supported
mov eax, 1
jmp done
not_supported:
mov eax, 0
done:
ret
LEAF_END avx512StateSupport, _TEXT

end
42 changes: 42 additions & 0 deletions src/coreclr/nativeaot/Runtime/startup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,48 @@ bool DetectCPUFeatures()
{
g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni;
}

if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111
{
if ((cpuidInfo[EBX] & (1 << 16)) != 0) // AVX512F
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f;

bool isAVX512_VLSupported = false;
if ((cpuidInfo[EBX] & (1 << 31)) != 0) // AVX512VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl;
isAVX512_VLSupported = true;
}

if ((cpuidInfo[EBX] & (1 << 30)) != 0) // AVX512BW
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw;
if (isAVX512_VLSupported)
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl;
}
}

if ((cpuidInfo[EBX] & (1 << 28)) != 0) // AVX512CD
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd;
if (isAVX512_VLSupported)
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl;
}
}

if ((cpuidInfo[EBX] & (1 << 17)) != 0) // AVX512DQ
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq;
if (isAVX512_VLSupported)
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl;
}
}
}
}
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,11 @@ REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled()
return true;
}

REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvx512Enabled()
{
return true;
}

REDHAWK_PALEXPORT void PalPrintFatalError(const char* message)
{
// Write the message using lowest-level OS API available. This is used to print the stack overflow
Expand Down Expand Up @@ -1287,6 +1292,19 @@ REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport()
// check OS has enabled both XMM and YMM state support
return ((eax & 0x06) == 0x06) ? 1 : 0;
}

REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI avx512StateSupport()
{
DWORD eax;
__asm(" xgetbv\n" \
: "=a"(eax) /*output in eax*/\
: "c"(0) /*inputs - 0 in ecx*/\
: "edx" /* registers that are clobbered*/
);
// check OS has enabled XMM, YMM and ZMM state support
return ((eax & 0xE6) == 0x0E6) ? 1 : 0;
}

#endif // defined(HOST_X86) || defined(HOST_AMD64)

#if defined (HOST_ARM64)
Expand Down
25 changes: 25 additions & 0 deletions src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,31 @@ REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled()
return TRUE;
}

REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvx512Enabled()
{
typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;

HMODULE hMod = LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
if (hMod == NULL)
return FALSE;

pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures");

if (pfnGetEnabledXStateFeatures == NULL)
{
return FALSE;
}

DWORD64 FeatureMask = pfnGetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX512) == 0)
{
return FALSE;
}

return TRUE;
}

REDHAWK_PALEXPORT void* REDHAWK_PALAPI PalAddVectoredExceptionHandler(uint32_t firstHandler, _In_ PVECTORED_EXCEPTION_HANDLER vectoredHandler)
{
return AddVectoredExceptionHandler(firstHandler, vectoredHandler);
Expand Down
29 changes: 29 additions & 0 deletions src/coreclr/pal/src/arch/amd64/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,32 @@ extern "C" unsigned int XmmYmmStateSupport()
// Check OS has enabled both XMM and YMM state support
return ((eax & 0x06) == 0x06) ? 1 : 0;
}

/*++
Function:
Avx512StateSupport

Check if OS has enabled XMM, YMM and ZMM state support

Return value:
1 if XMM, YMM and ZMM are enabled, 0 otherwise
--*/
extern "C" unsigned int Avx512StateSupport()
{
unsigned int eax;
__asm(" mov $1, %%eax\n" \
" cpuid\n" \
" xor %%eax, %%eax\n" \
" and $0x18000000, %%ecx\n" /* check for xsave feature set and that it is enabled by the OS */ \
" cmp $0x18000000, %%ecx\n" \
" jne endz\n" \
" xor %%ecx, %%ecx\n" \
" xgetbv\n" \
"endz:\n" \
: "=a"(eax) /* output in eax */ \
: /* no inputs */ \
: "ebx", "ecx", "edx" /* registers that are clobbered */
);
// Check OS has enabled XMM, YMM and ZMM state support
return ((eax & 0x0E6) == 0x0E6) ? 1 : 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ public enum ReadyToRunInstructionSet
Rcpc=26,
Movbe=27,
X86Serialize=28,
Avx512F=29,
Avx512F_VL=30,
Avx512BW=31,
Avx512BW_VL=32,
Avx512CD=33,
Avx512CD_VL=34,
Avx512DQ=35,
Avx512DQ_VL=36,

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,22 @@ public static class ReadyToRunInstructionSetHelper
case InstructionSet.X64_MOVBE_X64: return ReadyToRunInstructionSet.Movbe;
case InstructionSet.X64_X86Serialize: return ReadyToRunInstructionSet.X86Serialize;
case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize;
case InstructionSet.X64_AVX512F: return ReadyToRunInstructionSet.Avx512F;
case InstructionSet.X64_AVX512F_X64: return ReadyToRunInstructionSet.Avx512F;
case InstructionSet.X64_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL;
case InstructionSet.X64_AVX512F_VL_X64: return ReadyToRunInstructionSet.Avx512F_VL;
case InstructionSet.X64_AVX512BW: return ReadyToRunInstructionSet.Avx512BW;
case InstructionSet.X64_AVX512BW_X64: return ReadyToRunInstructionSet.Avx512BW;
case InstructionSet.X64_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL;
case InstructionSet.X64_AVX512BW_VL_X64: return ReadyToRunInstructionSet.Avx512BW_VL;
case InstructionSet.X64_AVX512CD: return ReadyToRunInstructionSet.Avx512CD;
case InstructionSet.X64_AVX512CD_X64: return ReadyToRunInstructionSet.Avx512CD;
case InstructionSet.X64_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL;
case InstructionSet.X64_AVX512CD_VL_X64: return ReadyToRunInstructionSet.Avx512CD_VL;
case InstructionSet.X64_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ;
case InstructionSet.X64_AVX512DQ_X64: return ReadyToRunInstructionSet.Avx512DQ;
case InstructionSet.X64_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL;
case InstructionSet.X64_AVX512DQ_VL_X64: return ReadyToRunInstructionSet.Avx512DQ_VL;

default: throw new Exception("Unknown instruction set");
}
Expand Down Expand Up @@ -142,6 +158,22 @@ public static class ReadyToRunInstructionSetHelper
case InstructionSet.X86_MOVBE_X64: return null;
case InstructionSet.X86_X86Serialize: return ReadyToRunInstructionSet.X86Serialize;
case InstructionSet.X86_X86Serialize_X64: return null;
case InstructionSet.X86_AVX512F: return ReadyToRunInstructionSet.Avx512F;
case InstructionSet.X86_AVX512F_X64: return null;
case InstructionSet.X86_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL;
case InstructionSet.X86_AVX512F_VL_X64: return null;
case InstructionSet.X86_AVX512BW: return ReadyToRunInstructionSet.Avx512BW;
case InstructionSet.X86_AVX512BW_X64: return null;
case InstructionSet.X86_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL;
case InstructionSet.X86_AVX512BW_VL_X64: return null;
case InstructionSet.X86_AVX512CD: return ReadyToRunInstructionSet.Avx512CD;
case InstructionSet.X86_AVX512CD_X64: return null;
case InstructionSet.X86_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL;
case InstructionSet.X86_AVX512CD_VL_X64: return null;
case InstructionSet.X86_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ;
case InstructionSet.X86_AVX512DQ_X64: return null;
case InstructionSet.X86_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL;
case InstructionSet.X86_AVX512DQ_VL_X64: return null;

default: throw new Exception("Unknown instruction set");
}
Expand Down
Loading