Skip to content

Commit 3d1bf78

Browse files
committed
Merge branch 'for-next/sve' into for-next/core
Optimise SVE switching for CPUs with 128-bit implementations. * for-next/sve: arm64/sve: Skip flushing Z registers with 128 bit vectors arm64/sve: Use the sve_flush macros in sve_load_from_fpsimd_state() arm64/sve: Split _sve_flush macro into separate Z and predicate flushes
2 parents a4a4914 + ad4711f commit 3d1bf78

File tree

4 files changed

+23
-11
lines changed

4 files changed

+23
-11
lines changed

arch/arm64/include/asm/fpsimd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
6969
extern void sve_save_state(void *state, u32 *pfpsr);
7070
extern void sve_load_state(void const *state, u32 const *pfpsr,
7171
unsigned long vq_minus_1);
72-
extern void sve_flush_live(void);
72+
extern void sve_flush_live(unsigned long vq_minus_1);
7373
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
7474
unsigned long vq_minus_1);
7575
extern unsigned int sve_get_vl(void);

arch/arm64/include/asm/fpsimdmacros.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,10 @@
213213
mov v\nz\().16b, v\nz\().16b
214214
.endm
215215

216-
.macro sve_flush
216+
.macro sve_flush_z
217217
_for n, 0, 31, _sve_flush_z \n
218+
.endm
219+
.macro sve_flush_p_ffr
218220
_for n, 0, 15, _sve_pfalse \n
219221
_sve_wrffr 0
220222
.endm

arch/arm64/kernel/entry-fpsimd.S

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,24 @@ SYM_FUNC_END(sve_set_vq)
6363
* and the rest zeroed. All the other SVE registers will be zeroed.
6464
*/
6565
SYM_FUNC_START(sve_load_from_fpsimd_state)
66-
sve_load_vq x1, x2, x3
67-
fpsimd_restore x0, 8
68-
_for n, 0, 15, _sve_pfalse \n
69-
_sve_wrffr 0
70-
ret
66+
sve_load_vq x1, x2, x3
67+
fpsimd_restore x0, 8
68+
sve_flush_p_ffr
69+
ret
7170
SYM_FUNC_END(sve_load_from_fpsimd_state)
7271

73-
/* Zero all SVE registers but the first 128-bits of each vector */
72+
/*
73+
* Zero all SVE registers but the first 128-bits of each vector
74+
*
75+
* VQ must already be configured by caller, any further updates of VQ
76+
* will need to ensure that the register state remains valid.
77+
*
78+
* x0 = VQ - 1
79+
*/
7480
SYM_FUNC_START(sve_flush_live)
75-
sve_flush
81+
cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
82+
sve_flush_z
83+
1: sve_flush_p_ffr
7684
ret
7785
SYM_FUNC_END(sve_flush_live)
7886

arch/arm64/kernel/fpsimd.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
957957
* disabling the trap, otherwise update our in-memory copy.
958958
*/
959959
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
960-
sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
961-
sve_flush_live();
960+
unsigned long vq_minus_one =
961+
sve_vq_from_vl(current->thread.sve_vl) - 1;
962+
sve_set_vq(vq_minus_one);
963+
sve_flush_live(vq_minus_one);
962964
fpsimd_bind_task_to_cpu();
963965
} else {
964966
fpsimd_to_sve(current);

0 commit comments

Comments
 (0)