Skip to content

release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) #100546

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,6 @@ struct FEATURES {

extern struct FEATURES __aarch64_cpu_features;

struct SME_STATE {
long PSTATE;
long TPIDR2_EL0;
};

extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible;

extern bool __aarch64_has_sme_and_tpidr2_el0;

#if __GNUC__ >= 9
#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
#endif
Expand All @@ -28,22 +19,3 @@ __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {

__init_cpu_features();
}

__attribute__((target("sve"))) long
__arm_get_current_vg(void) __arm_streaming_compatible {
struct SME_STATE State = __arm_sme_state();
unsigned long long features =
__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
bool HasSVE = features & (1ULL << FEAT_SVE);

if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
return 0;

if (HasSVE || (State.PSTATE & 1)) {
long vl;
__asm__ __volatile__("cntd %0" : "=r"(vl));
return vl;
}

return 0;
}
44 changes: 44 additions & 0 deletions compiler-rt/lib/builtins/aarch64/sme-abi.S
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
#if !defined(__APPLE__)
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
#else
// MachO requires @page/@pageoff directives because the global is defined
// in a different file. Otherwise this file may fail to build.
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
#endif

.arch armv9-a+sme
Expand Down Expand Up @@ -180,6 +184,46 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
.variant_pcs __arm_get_current_vg
BTI_C

stp x29, x30, [sp, #-16]!
.cfi_def_cfa_offset 16
mov x29, sp
.cfi_def_cfa w29, 16
.cfi_offset w30, -8
.cfi_offset w29, -16
adrp x17, CPU_FEATS_SYMBOL
ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
tbnz w17, #30, 0f
adrp x16, TPIDR2_SYMBOL
ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
cbz w16, 1f
0:
mov x18, x1
bl __arm_sme_state
mov x1, x18
and x17, x17, #0x40000000
bfxil x17, x0, #0, #1
cbz x17, 1f
cntd x0
.cfi_def_cfa wsp, 16
ldp x29, x30, [sp], #16
.cfi_def_cfa_offset 0
.cfi_restore w30
.cfi_restore w29
ret
1:
mov x0, xzr
.cfi_def_cfa wsp, 16
ldp x29, x30, [sp], #16
.cfi_def_cfa_offset 0
.cfi_restore w30
.cfi_restore w29
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)

NO_EXEC_STACK_DIRECTIVE

// GNU property note for BTI and PAC
Expand Down
Loading