Skip to content

Commit 342bcdb

Browse files
Add support for --instruction-set:native (#87865)
This allows compiling for the ISA extensions that the currently running CPU supports.
1 parent 2ee61bb commit 342bcdb

23 files changed

+1001
-843
lines changed

src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "RuntimeInstance.h"
1717
#include "CachedInterfaceDispatch.h"
1818
#include "shash.h"
19+
#include <minipal/cpufeatures.h>
1920

2021
class AsmOffsets
2122
{

src/coreclr/nativeaot/Runtime/CMakeLists.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ set(COMMON_RUNTIME_SOURCES
4949
${GC_DIR}/handletablescan.cpp
5050
${GC_DIR}/objecthandle.cpp
5151
${GC_DIR}/softwarewritewatch.cpp
52+
53+
${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c
5254
)
5355

5456
set(SERVER_GC_SOURCES
@@ -115,10 +117,6 @@ if (WIN32)
115117
list(APPEND FULL_RUNTIME_SOURCES windows/CoffNativeCodeManager.cpp)
116118

117119
set(ASM_SUFFIX asm)
118-
119-
if (CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_AMD64)
120-
set(RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/GC.${ASM_SUFFIX})
121-
endif()
122120
else()
123121

124122
include_directories(unix)
@@ -249,6 +247,7 @@ else()
249247
endif()
250248
add_definitions(-DNO_UI_ASSERT)
251249
include(unix/configure.cmake)
250+
include(${CLR_SRC_NATIVE_DIR}/minipal/configure.cmake)
252251
include_directories(${CMAKE_CURRENT_BINARY_DIR})
253252
endif()
254253

src/coreclr/nativeaot/Runtime/MiscHelpers.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "GCMemoryHelpers.inl"
4040
#include "yieldprocessornormalized.h"
4141
#include "RhConfig.h"
42+
#include <minipal/cpuid.h>
4243

4344
COOP_PINVOKE_HELPER(void, RhDebugBreak, ())
4445
{

src/coreclr/nativeaot/Runtime/PalRedhawk.h

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "CommonTypes.h"
2121
#include "CommonMacros.h"
2222
#include "gcenv.structs.h" // CRITICAL_SECTION
23-
#include "IntrinsicConstants.h"
2423
#include "PalRedhawkCommon.h"
2524

2625
#ifndef PAL_REDHAWK_INCLUDED
@@ -776,31 +775,6 @@ REDHAWK_PALIMPORT char* PalCopyTCharAsChar(const TCHAR* toCopy);
776775
REDHAWK_PALIMPORT int32_t __cdecl _stricmp(const char *string1, const char *string2);
777776
#endif // TARGET_UNIX
778777

779-
#if defined(HOST_X86) || defined(HOST_AMD64)
780-
781-
#ifdef TARGET_UNIX
782-
// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures
783-
// We define matching signatures for use on Unix platforms.
784-
//
785-
// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid
786-
787-
REDHAWK_PALIMPORT void __cpuid(int cpuInfo[4], int function_id);
788-
REDHAWK_PALIMPORT void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id);
789-
#else
790-
#include <intrin.h>
791-
#endif
792-
793-
REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport();
794-
REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI avx512StateSupport();
795-
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled();
796-
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvx512Enabled();
797-
798-
#endif // defined(HOST_X86) || defined(HOST_AMD64)
799-
800-
#if defined(HOST_ARM64)
801-
REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags);
802-
#endif //defined(HOST_ARM64)
803-
804778
#include "PalRedhawkInline.h"
805779

806780
#endif // !PAL_REDHAWK_INCLUDED

src/coreclr/nativeaot/Runtime/amd64/GC.asm

Lines changed: 0 additions & 36 deletions
This file was deleted.

src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,6 @@ RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@Ru
288288
EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC
289289

290290
EXTERN g_fGcStressStarted : DWORD
291-
EXTERN g_fHasFastFxsave : BYTE
292291

293292
;;
294293
;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.

src/coreclr/nativeaot/Runtime/startup.cpp

Lines changed: 2 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "stressLog.h"
2626
#include "RestrictedCallouts.h"
2727
#include "yieldprocessornormalized.h"
28+
#include <minipal/cpufeatures.h>
2829

2930
#ifdef FEATURE_PERFTRACING
3031
#include "EventPipeInterface.h"
@@ -48,9 +49,6 @@ static bool DetectCPUFeatures();
4849

4950
extern RhConfig * g_pRhConfig;
5051

51-
EXTERN_C bool g_fHasFastFxsave;
52-
bool g_fHasFastFxsave = false;
53-
5452
CrstStatic g_ThunkPoolLock;
5553

5654
#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)
@@ -183,203 +181,7 @@ static bool InitDLL(HANDLE hPalInstance)
183181
bool DetectCPUFeatures()
184182
{
185183
#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)
186-
187-
#if defined(HOST_X86) || defined(HOST_AMD64)
188-
189-
int cpuidInfo[4];
190-
191-
const int CPUID_EAX = 0;
192-
const int CPUID_EBX = 1;
193-
const int CPUID_ECX = 2;
194-
const int CPUID_EDX = 3;
195-
196-
__cpuid(cpuidInfo, 0x00000000);
197-
uint32_t maxCpuId = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);
198-
199-
if (maxCpuId >= 1)
200-
{
201-
__cpuid(cpuidInfo, 0x00000001);
202-
203-
const int requiredBaselineEdxFlags = (1 << 25) // SSE
204-
| (1 << 26); // SSE2
205-
206-
if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags)
207-
{
208-
g_cpuFeatures |= XArchIntrinsicConstants_VectorT128;
209-
210-
if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI
211-
{
212-
g_cpuFeatures |= XArchIntrinsicConstants_Aes;
213-
}
214-
215-
if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ
216-
{
217-
g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq;
218-
}
219-
220-
if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3
221-
{
222-
g_cpuFeatures |= XArchIntrinsicConstants_Sse3;
223-
224-
if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3
225-
{
226-
g_cpuFeatures |= XArchIntrinsicConstants_Ssse3;
227-
228-
if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1
229-
{
230-
g_cpuFeatures |= XArchIntrinsicConstants_Sse41;
231-
232-
if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2
233-
{
234-
g_cpuFeatures |= XArchIntrinsicConstants_Sse42;
235-
236-
if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE
237-
{
238-
g_cpuFeatures |= XArchIntrinsicConstants_Movbe;
239-
}
240-
241-
if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT
242-
{
243-
g_cpuFeatures |= XArchIntrinsicConstants_Popcnt;
244-
}
245-
246-
const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE
247-
| (1 << 28); // AVX
248-
249-
if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags)
250-
{
251-
if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11
252-
{
253-
g_cpuFeatures |= XArchIntrinsicConstants_Avx;
254-
255-
if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA
256-
{
257-
g_cpuFeatures |= XArchIntrinsicConstants_Fma;
258-
}
259-
260-
if (maxCpuId >= 0x07)
261-
{
262-
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);
263-
264-
if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2
265-
{
266-
g_cpuFeatures |= XArchIntrinsicConstants_Avx2;
267-
g_cpuFeatures |= XArchIntrinsicConstants_VectorT256;
268-
269-
if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111
270-
{
271-
if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F
272-
{
273-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f;
274-
g_cpuFeatures |= XArchIntrinsicConstants_VectorT512;
275-
276-
bool isAVX512_VLSupported = false;
277-
if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL
278-
{
279-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl;
280-
isAVX512_VLSupported = true;
281-
}
282-
283-
if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW
284-
{
285-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw;
286-
if (isAVX512_VLSupported) // AVX512BW_VL
287-
{
288-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl;
289-
}
290-
}
291-
292-
if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD
293-
{
294-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd;
295-
if (isAVX512_VLSupported) // AVX512CD_VL
296-
{
297-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl;
298-
}
299-
}
300-
301-
if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ
302-
{
303-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq;
304-
if (isAVX512_VLSupported) // AVX512DQ_VL
305-
{
306-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl;
307-
}
308-
}
309-
310-
if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI
311-
{
312-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi;
313-
if (isAVX512_VLSupported) // AVX512VBMI_VL
314-
{
315-
g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi_vl;
316-
}
317-
}
318-
}
319-
}
320-
321-
__cpuidex(cpuidInfo, 0x00000007, 0x00000001);
322-
323-
if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI
324-
{
325-
g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni;
326-
}
327-
}
328-
}
329-
}
330-
}
331-
}
332-
}
333-
}
334-
}
335-
}
336-
337-
if (maxCpuId >= 0x07)
338-
{
339-
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);
340-
341-
if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1
342-
{
343-
g_cpuFeatures |= XArchIntrinsicConstants_Bmi1;
344-
}
345-
346-
if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2
347-
{
348-
g_cpuFeatures |= XArchIntrinsicConstants_Bmi2;
349-
}
350-
351-
if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0)
352-
{
353-
g_cpuFeatures |= XArchIntrinsicConstants_Serialize; // SERIALIZE
354-
}
355-
}
356-
}
357-
358-
__cpuid(cpuidInfo, 0x80000000);
359-
uint32_t maxCpuIdEx = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);
360-
361-
if (maxCpuIdEx >= 0x80000001)
362-
{
363-
__cpuid(cpuidInfo, 0x80000001);
364-
365-
if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT
366-
{
367-
g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt;
368-
}
369-
370-
#ifdef HOST_AMD64
371-
// AMD has a "fast" mode for fxsave/fxrstor, which omits the saving of xmm registers. The OS will enable this mode
372-
// if it is supported. So if we continue to use fxsave/fxrstor, we must manually save/restore the xmm registers.
373-
// fxsr_opt is bit 25 of CPUID_EDX
374-
if ((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0)
375-
g_fHasFastFxsave = true;
376-
#endif
377-
}
378-
#endif // HOST_X86 || HOST_AMD64
379-
380-
#if defined(HOST_ARM64)
381-
PAL_GetCpuCapabilityFlags (&g_cpuFeatures);
382-
#endif
184+
g_cpuFeatures = minipal_getcpufeatures();
383185

384186
if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures)
385187
{

0 commit comments

Comments
 (0)