Skip to content

Amzn2 5.10.144 SVE state trap patch #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

4 changes: 3 additions & 1 deletion arch/arm64/include/asm/fpsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_kvm_prepare(void);

extern void fpsimd_bind_task_to_cpu(void);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
void *sve_state, unsigned int sve_vl);
void *sve_state, unsigned int sve_vl,
enum fp_type *type, enum fp_type to_save);

extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
Expand Down
12 changes: 12 additions & 0 deletions arch/arm64/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,19 @@ struct vcpu_reset_state {

struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;

/*
* Guest floating point state
*
* The architecture has two main floating point extensions,
* the original FPSIMD and SVE. These have overlapping
* register views, with the FPSIMD V registers occupying the
* low 128 bits of the SVE Z registers. When the core
* floating point code saves the register state of a task it
* records which view it saved in fp_type.
*/
void *sve_state;
enum fp_type fp_type;
unsigned int sve_max_vl;

/* Stage 2 paging state used by the hardware on next switch */
Expand Down
13 changes: 13 additions & 0 deletions arch/arm64/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ struct debug_info {
#endif
};

enum vec_type {
ARM64_VEC_SVE = 0,
ARM64_VEC_SME,
ARM64_VEC_MAX,
};

enum fp_type {
FP_STATE_CURRENT, /* Save based on current task state. */
FP_STATE_FPSIMD,
FP_STATE_SVE,
};

struct cpu_context {
unsigned long x19;
unsigned long x20;
Expand Down Expand Up @@ -141,6 +153,7 @@ struct thread_struct {
struct user_fpsimd_state fpsimd_state;
} uw;

enum fp_type fp_type; /* registers FPSIMD or SVE? */
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
unsigned int sve_vl; /* SVE vector length */
Expand Down
188 changes: 149 additions & 39 deletions arch/arm64/kernel/fpsimd.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ struct fpsimd_last_state_struct {
struct user_fpsimd_state *st;
void *sve_state;
unsigned int sve_vl;
enum fp_type *fp_type;
enum fp_type to_save;
};

static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
Expand Down Expand Up @@ -241,14 +243,6 @@ static void sve_free(struct task_struct *task)
* The task can execute SVE instructions while in userspace without
* trapping to the kernel.
*
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
* corresponding Zn), P0-P15 and FFR are encoded in in
* task->thread.sve_state, formatted appropriately for vector
* length task->thread.sve_vl.
*
* task->thread.sve_state must point to a valid buffer at least
* sve_state_size(task) bytes in size.
*
* During any syscall, the kernel may optionally clear TIF_SVE and
* discard the vector state except for the FPSIMD subset.
*
Expand All @@ -258,15 +252,39 @@ static void sve_free(struct task_struct *task)
* do_sve_acc() to be called, which does some preparation and then
* sets TIF_SVE.
*
* When stored, FPSIMD registers V0-V31 are encoded in
* During any syscall, the kernel may optionally clear TIF_SVE and
* discard the vector state except for the FPSIMD subset.
*
* The data will be stored in one of two formats:
*
* * FPSIMD only - FP_STATE_FPSIMD:
*
* When the FPSIMD only state stored task->thread.fp_type is set to
* FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
* logically zero but not stored anywhere; P0-P15 and FFR are not
* stored and have unspecified values from userspace's point of
* view. For hygiene purposes, the kernel zeroes them on next use,
* but userspace is discouraged from relying on this.
*
* task->thread.sve_state does not need to be non-NULL, valid or any
* particular size: it must not be dereferenced.
* particular size: it must not be dereferenced and any data stored
* there should be considered stale and not referenced.
*
* * SVE state - FP_STATE_SVE:
*
* When the full SVE state is stored task->thread.fp_type is set to
* FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
* corresponding Zn), P0-P15 and FFR are encoded in in
* task->thread.sve_state, formatted appropriately for vector
* length task->thread.sve_vl or, if SVCR.SM is set,
* task->thread.sme_vl. The storage for the vector registers in
* task->thread.uw.fpsimd_state should be ignored.
*
* task->thread.sve_state must point to a valid buffer at least
* sve_state_size(task) bytes in size. The data stored in
* task->thread.uw.fpsimd_state.vregs should be considered stale
* and not referenced.
*
* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
* irrespective of whether TIF_SVE is clear or set, since these are
Expand All @@ -282,47 +300,104 @@ static void sve_free(struct task_struct *task)
*/
static void task_fpsimd_load(void)
{
bool restore_sve_regs = false;
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());

if (system_supports_sve() && test_thread_flag(TIF_SVE))
sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
sve_vq_from_vl(current->thread.sve_vl) - 1);
else
fpsimd_load_state(&current->thread.uw.fpsimd_state);
/* Check if we should restore SVE first */
if (system_supports_sve()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
/* Stop tracking SVE for this task until next use. */
if (test_and_clear_thread_flag(TIF_SVE))
sve_user_disable();
break;
case FP_STATE_SVE:
if (!WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
sve_user_enable();
restore_sve_regs = true;
break;
default:
/*
* This indicates either a bug in
* fpsimd_save() or memory corruption, we
* should always record an explicit format
* when we save. We always at least have the
* memory allocated for FPSMID registers so
* try that and hope for the best.
*/
WARN_ON_ONCE(1);
clear_thread_flag(TIF_SVE);
break;
}
}

if (restore_sve_regs) {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
sve_vq_from_vl(current->thread.sve_vl) - 1);
} else {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
fpsimd_load_state(&current->thread.uw.fpsimd_state);
}
}

/*
* Ensure FPSIMD/SVE storage in memory for the loaded context is up to
* date with respect to the CPU registers.
* date with respect to the CPU registers. Note carefully that the
* current context is the context last bound to the CPU stored in
* last, if KVM is involved this may be the guest VM context rather
* than the host thread for the VM pointed to by current. This means
* that we must always reference the state storage via last rather
* than via current, if we are saving KVM state then it will have
* ensured that the type of registers to save is set in last->to_save.
*/
static void fpsimd_save(void)
{
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */

bool save_sve_regs = false;
unsigned int vl;
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */

WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());

if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
if (WARN_ON(sve_get_vl() != last->sve_vl)) {
/*
* Can't save the user regs, so current would
* re-enter user with corrupt state.
* There's no way to recover, so kill it:
*/
force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
return;
}

sve_save_state((char *)last->sve_state +
sve_ffr_offset(last->sve_vl),
&last->st->fpsr);
} else
fpsimd_save_state(last->st);
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;

/*
* If a task is in a syscall the ABI allows us to only
* preserve the state shared with FPSIMD so don't bother
* saving the full SVE state in that case.
*/
if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
!in_syscall(current_pt_regs())) ||
last->to_save == FP_STATE_SVE) {
save_sve_regs = true;
vl = last->sve_vl;
}

if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
/* Get the configured VL from RDVL, will account for SM */
if (WARN_ON(sve_get_vl() != vl)) {
/*
* Can't save the user regs, so current would
* re-enter user with corrupt state.
* There's no way to recover, so kill it:
*/
force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
return;
}

sve_save_state((char *)last->sve_state +
sve_ffr_offset(last->sve_vl),
&last->st->fpsr);
*last->fp_type = FP_STATE_SVE;
} else {
fpsimd_save_state(last->st);
*last->fp_type = FP_STATE_FPSIMD;
}
}

Expand Down Expand Up @@ -550,7 +625,7 @@ void fpsimd_sync_to_sve(struct task_struct *task)
*/
void sve_sync_to_fpsimd(struct task_struct *task)
{
if (test_tsk_thread_flag(task, TIF_SVE))
if (task->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(task);
}

Expand Down Expand Up @@ -627,8 +702,10 @@ int sve_set_vector_length(struct task_struct *task,
}

fpsimd_flush_task_state(task);
if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) {
sve_to_fpsimd(task);
task->thread.fp_type = FP_STATE_FPSIMD;
}

if (task == current)
put_cpu_fpsimd_context();
Expand Down Expand Up @@ -952,6 +1029,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
fpsimd_flush_task_state(current);

fpsimd_to_sve(current);
current->thread.fp_type = FP_STATE_SVE;
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */

Expand Down Expand Up @@ -1067,6 +1145,8 @@ void fpsimd_flush_thread(void)
current->thread.sve_vl_onexec = 0;
}

current->thread.fp_type = FP_STATE_FPSIMD;

put_cpu_fpsimd_context();
}

Expand Down Expand Up @@ -1096,6 +1176,31 @@ void fpsimd_signal_preserve_current_state(void)
sve_to_fpsimd(current);
}

/*
* Called by KVM when entering the guest.
*/
void fpsimd_kvm_prepare(void)
{
if (!system_supports_sve())
return;

/*
* KVM does not save host SVE state since we can only enter
* the guest from a syscall so the ABI means that only the
* non-saved SVE state needs to be saved. If we have left
* SVE enabled for performance reasons then update the task
* state to be FPSIMD only.
*/
get_cpu_fpsimd_context();

if (test_and_clear_thread_flag(TIF_SVE)) {
sve_to_fpsimd(current);
current->thread.fp_type = FP_STATE_FPSIMD;
}

put_cpu_fpsimd_context();
}

/*
* Associate current's FPSIMD context with this cpu
* The caller must have ownership of the cpu FPSIMD context before calling
Expand All @@ -1110,6 +1215,8 @@ void fpsimd_bind_task_to_cpu(void)
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
last->sve_vl = current->thread.sve_vl;
last->fp_type = &current->thread.fp_type;
last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();

if (system_supports_sve()) {
Expand All @@ -1124,7 +1231,8 @@ void fpsimd_bind_task_to_cpu(void)
}

void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
unsigned int sve_vl)
unsigned int sve_vl,
enum fp_type *type, enum fp_type to_save)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
Expand All @@ -1135,6 +1243,8 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
last->st = st;
last->sve_state = sve_state;
last->sve_vl = sve_vl;
last->fp_type = type;
last->to_save = to_save;
}

/*
Expand Down
2 changes: 2 additions & 0 deletions arch/arm64/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);

dst->thread.fp_type = FP_STATE_FPSIMD;

/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);

Expand Down
8 changes: 8 additions & 0 deletions arch/arm64/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,7 @@ static int sve_set(struct task_struct *target,
ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
SVE_PT_FPSIMD_OFFSET);
clear_tsk_thread_flag(target, TIF_SVE);
target->thread.fp_type = FP_STATE_FPSIMD;
goto out;
}

Expand All @@ -847,6 +848,12 @@ static int sve_set(struct task_struct *target,
}

sve_alloc(target);
if (!target->thread.sve_state) {
ret = -ENOMEM;
clear_tsk_thread_flag(target, TIF_SVE);
target->thread.fp_type = FP_STATE_FPSIMD;
goto out;
}

/*
* Ensure target->thread.sve_state is up to date with target's
Expand All @@ -855,6 +862,7 @@ static int sve_set(struct task_struct *target,
*/
fpsimd_sync_to_sve(target);
set_tsk_thread_flag(target, TIF_SVE);
target->thread.fp_type = FP_STATE_SVE;

BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
Expand Down
Loading