Skip to content

Commit 9528c15

Browse files
authored
ARM64-SVE: Add SVE registers to pal context (#103801)
* ARM64-SVE: Add SVE registers to pal context * fix debug sizes * Add SVE defines if missing from Linux host * More missing defines * More missing defines * Add cast * Move SVE registers after debug registers * Fix dbgtargetcontext * Remove SVE from debug context * Move ffr * Add SVE registers to asmconstants * Remove Z registers from context * backup/restore SVE in Context2.S Change-Id: I2f6bc39068d9fed3f45b548089b144884607d97b * Remove unused SVE128 struct * Add XStateFeaturesMask * restore instrsarm64sve.h changes * Restore SIZEOF__CONTEXT for windows * Fix AsmOffsets.cs for windows * Fix AsmOffsets.cs for windows * Restore missing ldr * Check size of SVE data returned from the kernel * 16 P registers * Copy context based on XSTATE_MASK_SVE * Move context handling inside XSTATE checks * Set CONTEXT_XSTATE * Remove __pad and fix sizes * Fix context sizes * Fix context sizes * Only read/write OS context SVE registers on 128bit
1 parent 6f1d8c5 commit 9528c15

File tree

10 files changed

+457
-49
lines changed

10 files changed

+457
-49
lines changed

src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,15 @@ class AsmOffsets
2525
public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0;
2626
#endif // TARGET_UNIX
2727
#elif TARGET_ARM64
28+
#if TARGET_UNIX
29+
public const int SIZEOF__REGDISPLAY = 0x9e0;
30+
public const int OFFSETOF__REGDISPLAY__SP = 0x938;
31+
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940;
32+
#else // TARGET_UNIX
2833
public const int SIZEOF__REGDISPLAY = 0x940;
2934
public const int OFFSETOF__REGDISPLAY__SP = 0x898;
3035
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0;
36+
#endif // TARGET_UNIX
3137
#elif TARGET_ARM
3238
public const int SIZEOF__REGDISPLAY = 0x410;
3339
public const int OFFSETOF__REGDISPLAY__SP = 0x3ec;
@@ -71,9 +77,15 @@ class AsmOffsets
7177
public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8;
7278
#endif // TARGET_UNIX
7379
#elif TARGET_ARM64
80+
#if TARGET_UNIX
81+
public const int SIZEOF__REGDISPLAY = 0x9d0;
82+
public const int OFFSETOF__REGDISPLAY__SP = 0x930;
83+
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938;
84+
#else // TARGET_UNIX
7485
public const int SIZEOF__REGDISPLAY = 0x930;
7586
public const int OFFSETOF__REGDISPLAY__SP = 0x890;
7687
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898;
88+
#endif // TARGET_UNIX
7789
#elif TARGET_ARM
7890
public const int SIZEOF__REGDISPLAY = 0x408;
7991
public const int OFFSETOF__REGDISPLAY__SP = 0x3e8;
@@ -111,9 +123,13 @@ class AsmOffsets
111123
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20;
112124
#else // TARGET_UNIX
113125
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0;
114-
#endif // TARGET_UNIx
126+
#endif // TARGET_UNIX
115127
#elif TARGET_ARM64
128+
#if TARGET_UNIX
129+
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3e0;
130+
#else // TARGET_UNIX
116131
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x390;
132+
#endif // TARGET_UNIX
117133
#elif TARGET_ARM
118134
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0;
119135
#elif TARGET_X86

src/coreclr/debug/inc/dbgtargetcontext.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,12 @@ typedef DECLSPEC_ALIGN(16) struct {
470470

471471
} DT_CONTEXT;
472472

473+
474+
#if !defined(CROSS_COMPILE) && !defined(TARGET_WINDOWS)
475+
static_assert(sizeof(DT_CONTEXT) == offsetof(T_CONTEXT, XStateFeaturesMask), "DT_CONTEXT must not include the SVE registers on AMD64");
476+
#else
473477
static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size on ARM64");
478+
#endif
474479

475480
#elif defined(DTCONTEXT_IS_LOONGARCH64)
476481

src/coreclr/pal/inc/pal.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1844,6 +1844,12 @@ typedef struct _IMAGE_ARM_RUNTIME_FUNCTION_ENTRY {
18441844
#define CONTEXT_EXCEPTION_REQUEST 0x40000000L
18451845
#define CONTEXT_EXCEPTION_REPORTING 0x80000000L
18461846

1847+
#define CONTEXT_XSTATE (CONTEXT_ARM64 | 0x40L)
1848+
1849+
#define XSTATE_SVE (0)
1850+
1851+
#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE))
1852+
18471853
//
18481854
// This flag is set by the unwinder if it has unwound to a call
18491855
// site, and cleared whenever it unwinds through a trap frame.
@@ -1944,7 +1950,18 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
19441950
/* +0x338 */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS];
19451951
/* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS];
19461952
/* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS];
1947-
/* +0x390 */
1953+
1954+
/* +0x390 */ DWORD64 XStateFeaturesMask;
1955+
1956+
//
1957+
// Sve Registers
1958+
//
1959+
// TODO-SVE: Support Vector register sizes >128bit
1960+
// For 128bit, Z and V registers fully overlap, so there is no need to load/store both.
1961+
/* +0x398 */ DWORD Vl;
1962+
/* +0x39c */ DWORD Ffr;
1963+
/* +0x3a0 */ DWORD P[16];
1964+
/* +0x3e0 */
19481965

19491966
} CONTEXT, *PCONTEXT, *LPCONTEXT;
19501967

src/coreclr/pal/src/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ if(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET
9090
add_definitions(-DXSTATE_SUPPORTED)
9191
endif(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)
9292

93+
if(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)
94+
# Currently the _xstate is not available on Alpine Linux
95+
add_definitions(-DXSTATE_SUPPORTED)
96+
endif(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)
97+
9398
if(CLR_CMAKE_TARGET_LINUX_MUSL)
9499
# Setting RLIMIT_NOFILE breaks debugging of coreclr on Alpine Linux for some reason
95100
add_definitions(-DDONT_SET_RLIMIT_NOFILE)

src/coreclr/pal/src/arch/arm64/asmconstants.h

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818

1919
#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT)
2020

21+
#define CONTEXT_XSTATE_BIT (6)
22+
#define CONTEXT_XSTATE (1 << CONTEXT_XSTATE_BIT)
23+
24+
#define XSTATE_SVE_BIT (0)
25+
26+
#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE))
27+
2128

2229
#define CONTEXT_ContextFlags 0
2330
#define CONTEXT_Cpsr CONTEXT_ContextFlags+4
@@ -54,6 +61,7 @@
5461
#define CONTEXT_Lr CONTEXT_Fp+8
5562
#define CONTEXT_Sp CONTEXT_Lr+8
5663
#define CONTEXT_Pc CONTEXT_Sp+8
64+
5765
#define CONTEXT_NEON_OFFSET CONTEXT_Pc+8
5866
#define CONTEXT_V0 0
5967
#define CONTEXT_V1 CONTEXT_V0+16
@@ -89,7 +97,42 @@
8997
#define CONTEXT_V31 CONTEXT_V30+16
9098
#define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31+16
9199
#define CONTEXT_Fpcr 0
92-
#define CONTEXT_Fpsr CONTEXT_Fpcr+8
93-
#define CONTEXT_Size ((CONTEXT_NEON_OFFSET + CONTEXT_Fpsr + 8 + 0xf) & ~0xf)
100+
#define CONTEXT_Fpsr CONTEXT_Fpcr+4
101+
#define CONTEXT_NEON_SIZE CONTEXT_FLOAT_CONTROL_OFFSET+CONTEXT_Fpsr+4
102+
103+
#define CONTEXT_DEBUG_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_NEON_SIZE
104+
#define CONTEXT_DEBUG_SIZE 120 // (8*4)+(8*8)+(2*4)+(2*8)
105+
106+
#define CONTEXT_XSTATEFEATURESMASK_OFFSET CONTEXT_DEBUG_OFFSET+CONTEXT_DEBUG_SIZE
107+
108+
// TODO-SVE: Support Vector register sizes >128bit
109+
110+
#define CONTEXT_SVE_OFFSET CONTEXT_XSTATEFEATURESMASK_OFFSET+8
111+
#define CONTEXT_VL_OFFSET 0
112+
113+
// SVE register offsets are multiples of the vector length
114+
#define CONTEXT_SVE_REGS_OFFSET CONTEXT_VL_OFFSET+4
115+
#define CONTEXT_FFR_VL 0
116+
#define CONTEXT_P0_VL CONTEXT_FFR_VL+1
117+
#define CONTEXT_P1_VL CONTEXT_P0_VL+1
118+
#define CONTEXT_P2_VL CONTEXT_P1_VL+1
119+
#define CONTEXT_P3_VL CONTEXT_P2_VL+1
120+
#define CONTEXT_P4_VL CONTEXT_P3_VL+1
121+
#define CONTEXT_P5_VL CONTEXT_P4_VL+1
122+
#define CONTEXT_P6_VL CONTEXT_P5_VL+1
123+
#define CONTEXT_P7_VL CONTEXT_P6_VL+1
124+
#define CONTEXT_P8_VL CONTEXT_P7_VL+1
125+
#define CONTEXT_P9_VL CONTEXT_P8_VL+1
126+
#define CONTEXT_P10_VL CONTEXT_P9_VL+1
127+
#define CONTEXT_P11_VL CONTEXT_P10_VL+1
128+
#define CONTEXT_P12_VL CONTEXT_P11_VL+1
129+
#define CONTEXT_P13_VL CONTEXT_P12_VL+1
130+
#define CONTEXT_P14_VL CONTEXT_P13_VL+1
131+
#define CONTEXT_P15_VL CONTEXT_P14_VL+1
132+
133+
#define CONTEXT_SVE_REGS_SIZE ((CONTEXT_P15_VL+1) * 4)
134+
#define CONTEXT_SVE_SIZE CONTEXT_SVE_REGS_SIZE + 8
135+
136+
#define CONTEXT_Size CONTEXT_SVE_OFFSET + CONTEXT_SVE_SIZE
94137

95138
#endif

src/coreclr/pal/src/arch/arm64/context2.S

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33
//
4-
// Implementation of _CONTEXT_CaptureContext for the ARM platform.
4+
// Implementation of _CONTEXT_CaptureContext for the ARM64 platform.
55
// This function is processor dependent. It is used by exception handling,
66
// and is always apply to the current thread.
77
//
@@ -12,6 +12,7 @@
1212
// Incoming:
1313
// x0: Context*
1414
//
15+
.arch_extension sve
1516
LEAF_ENTRY CONTEXT_CaptureContext, _TEXT
1617
PROLOG_STACK_ALLOC 32
1718
.cfi_adjust_cfa_offset 32
@@ -69,7 +70,6 @@ LOCAL_LABEL(Done_CONTEXT_CONTROL):
6970
stp x26, x27, [x0, CONTEXT_X26]
7071
str x28, [x0, CONTEXT_X28]
7172

72-
7373
LOCAL_LABEL(Done_CONTEXT_INTEGER):
7474
ldr w1, [x0, CONTEXT_ContextFlags]
7575
// clangs assembler doesn't seem to support the mov Wx, imm32 yet
@@ -104,6 +104,41 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER):
104104
sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + CONTEXT_NEON_OFFSET
105105

106106
LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT):
107+
ldr w1, [x0, CONTEXT_ContextFlags]
108+
// clangs assembler doesn't seem to support the mov Wx, imm32 yet
109+
movz w2, #0x40, lsl #16
110+
movk w2, #0x40
111+
mov w3, w2
112+
and w2, w1, w2
113+
cmp w2, w3
114+
b.ne LOCAL_LABEL(Done_CONTEXT_SVE)
115+
116+
ldr x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET]
117+
tbz x1, #XSTATE_SVE_BIT, LOCAL_LABEL(Done_CONTEXT_SVE)
118+
119+
add x0, x0, CONTEXT_SVE_OFFSET
120+
str p0, [x0, CONTEXT_P0_VL, MUL VL]
121+
str p1, [x0, CONTEXT_P1_VL, MUL VL]
122+
str p2, [x0, CONTEXT_P2_VL, MUL VL]
123+
str p3, [x0, CONTEXT_P3_VL, MUL VL]
124+
str p4, [x0, CONTEXT_P4_VL, MUL VL]
125+
str p5, [x0, CONTEXT_P5_VL, MUL VL]
126+
str p6, [x0, CONTEXT_P6_VL, MUL VL]
127+
str p7, [x0, CONTEXT_P7_VL, MUL VL]
128+
str p8, [x0, CONTEXT_P8_VL, MUL VL]
129+
str p9, [x0, CONTEXT_P9_VL, MUL VL]
130+
str p10, [x0, CONTEXT_P10_VL, MUL VL]
131+
str p11, [x0, CONTEXT_P11_VL, MUL VL]
132+
str p12, [x0, CONTEXT_P12_VL, MUL VL]
133+
str p13, [x0, CONTEXT_P13_VL, MUL VL]
134+
str p14, [x0, CONTEXT_P14_VL, MUL VL]
135+
str p15, [x0, CONTEXT_P15_VL, MUL VL]
136+
rdffr p0.b
137+
str p0, [x0, CONTEXT_FFR_VL, MUL VL]
138+
ldr p0, [x0, 0, MUL VL]
139+
sub x0, x0, CONTEXT_SVE_OFFSET
140+
141+
LOCAL_LABEL(Done_CONTEXT_SVE):
107142

108143
EPILOG_STACK_FREE 32
109144
ret
@@ -124,6 +159,7 @@ LEAF_ENTRY RtlCaptureContext, _TEXT
124159
orr w1, w1, #0x4
125160
orr w1, w1, #0x8
126161
str w1, [x0, CONTEXT_ContextFlags]
162+
str xzr, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET]
127163
ldr x1, [sp]
128164
EPILOG_STACK_FREE 16
129165
b C_FUNC(CONTEXT_CaptureContext)
@@ -133,6 +169,7 @@ LEAF_END RtlCaptureContext, _TEXT
133169
// x0: Context*
134170
// x1: Exception*
135171
//
172+
.arch_extension sve
136173
LEAF_ENTRY RtlRestoreContext, _TEXT
137174

138175
#ifdef HAS_ADDRESS_SANITIZER
@@ -154,6 +191,34 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT):
154191
// since we potentially clobber x0 below, we'll bank it in x16
155192
mov x16, x0
156193

194+
ldr w17, [x16, CONTEXT_ContextFlags]
195+
tbz w17, #CONTEXT_XSTATE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE)
196+
197+
ldr w17, [x16, CONTEXT_XSTATEFEATURESMASK_OFFSET]
198+
tbz w17, #XSTATE_SVE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE)
199+
200+
add x16, x16, CONTEXT_SVE_OFFSET
201+
ldr p0, [x16, CONTEXT_FFR_VL, MUL VL]
202+
wrffr p0.b
203+
ldr p0, [x16, CONTEXT_P0_VL, MUL VL]
204+
ldr p1, [x16, CONTEXT_P1_VL, MUL VL]
205+
ldr p2, [x16, CONTEXT_P2_VL, MUL VL]
206+
ldr p3, [x16, CONTEXT_P3_VL, MUL VL]
207+
ldr p4, [x16, CONTEXT_P4_VL, MUL VL]
208+
ldr p5, [x16, CONTEXT_P5_VL, MUL VL]
209+
ldr p6, [x16, CONTEXT_P6_VL, MUL VL]
210+
ldr p7, [x16, CONTEXT_P7_VL, MUL VL]
211+
ldr p8, [x16, CONTEXT_P8_VL, MUL VL]
212+
ldr p9, [x16, CONTEXT_P9_VL, MUL VL]
213+
ldr p10, [x16, CONTEXT_P10_VL, MUL VL]
214+
ldr p11, [x16, CONTEXT_P11_VL, MUL VL]
215+
ldr p12, [x16, CONTEXT_P12_VL, MUL VL]
216+
ldr p13, [x16, CONTEXT_P13_VL, MUL VL]
217+
ldr p14, [x16, CONTEXT_P14_VL, MUL VL]
218+
ldr p15, [x16, CONTEXT_P15_VL, MUL VL]
219+
sub x16, x16, CONTEXT_SVE_OFFSET
220+
221+
LOCAL_LABEL(No_Restore_CONTEXT_SVE):
157222
ldr w17, [x16, CONTEXT_ContextFlags]
158223
tbz w17, #CONTEXT_FLOATING_POINT_BIT, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT)
159224

@@ -230,3 +295,12 @@ LEAF_ENTRY RestoreCompleteContext, _TEXT
230295
LEAF_END RestoreCompleteContext, _TEXT
231296

232297
#endif // __APPLE__
298+
299+
// Incoming:
300+
// None
301+
//
302+
.arch_extension sve
303+
LEAF_ENTRY CONTEXT_GetSveLengthFromOS, _TEXT
304+
rdvl x0, 1
305+
ret lr
306+
LEAF_END CONTEXT_GetSveLengthFromOS, _TEXT

src/coreclr/pal/src/exception/signal.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -863,7 +863,7 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex
863863

864864
ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT;
865865

866-
#if defined(HOST_AMD64)
866+
#if defined(HOST_AMD64) || defined(HOST_ARM64)
867867
contextFlags |= CONTEXT_XSTATE;
868868
#endif
869869

@@ -1053,7 +1053,7 @@ static bool common_signal_handler(int code, siginfo_t *siginfo, void *sigcontext
10531053

10541054
ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT;
10551055

1056-
#if defined(HOST_AMD64)
1056+
#if defined(HOST_AMD64) || defined(HOST_ARM64)
10571057
contextFlags |= CONTEXT_XSTATE;
10581058
#endif
10591059

0 commit comments

Comments
 (0)