Skip to content

Commit

Permalink
s390/kernel: lazy restore fpu registers
Browse files Browse the repository at this point in the history
Improve the save and restore behavior of FPU register contents to use the
vector extension within the kernel.

The kernel does not use floating-point or vector registers and, therefore,
saving and restoring the FPU register contents are performed for handling
signals or switching processes only.  To prepare for using vector
instructions and vector registers within the kernel, enhance the save
behavior and implement a lazy restore at return to user space from a
system call or interrupt.

To implement the lazy restore, the save_fpu_regs() sets a CPU information
flag, CIF_FPU, to indicate that the FPU registers must be restored.
Saving and setting CIF_FPU is performed in an atomic fashion to be
interrupt-safe.  When the kernel wants to use the vector extension or
wants to change the FPU register state for a task during signal handling,
the save_fpu_regs() must be called first.  The CIF_FPU flag is also set at
process switch.  At return to user space, the FPU state is restored.  In
particular, the FPU state includes the floating-point or vector register
contents, as well as, vector-enablement and floating-point control.  The
FPU state restore and clearing CIF_FPU is also performed in an atomic
fashion.

For KVM, the restore of the FPU register state is performed when restoring
the general-purpose guest registers before the SIE instructions is started.
Because the path towards the SIE instruction is interruptible, the CIF_FPU
flag must be checked again right before going into SIE.  If set, the guest
registers must be reloaded again by re-entering the outer SIE loop.  This
is the same behavior as if the SIE critical section is interrupted.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
  • Loading branch information
hbrueckner authored and Martin Schwidefsky committed Jul 22, 2015
1 parent bd55033 commit 9977e88
Show file tree
Hide file tree
Showing 15 changed files with 482 additions and 203 deletions.
2 changes: 2 additions & 0 deletions arch/s390/include/asm/ctl_reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
__ctl_load(reg, cr, cr);
}

void __ctl_set_vx(void);

void smp_ctl_set_bit(int cr, int bit);
void smp_ctl_clear_bit(int cr, int bit);

Expand Down
110 changes: 10 additions & 100 deletions arch/s390/include/asm/fpu-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,14 @@ struct fpu {
};
};

void save_fpu_regs(struct fpu *fpu);

#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))

/* VX array structure for address operand constraints in inline assemblies */
struct vx_array { __vector128 _[__NUM_VXRS]; };

static inline int test_fp_ctl(u32 fpc)
{
u32 orig_fpc;
Expand All @@ -48,76 +53,6 @@ static inline int test_fp_ctl(u32 fpc)
return rc;
}

static inline void save_fp_ctl(u32 *fpc)
{
asm volatile(
" stfpc %0\n"
: "+Q" (*fpc));
}

static inline int restore_fp_ctl(u32 *fpc)
{
int rc;

asm volatile(
" lfpc %1\n"
"0: la %0,0\n"
"1:\n"
: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
return rc;
}

static inline void save_fp_regs(freg_t *fprs)
{
asm volatile("std 0,%0" : "=Q" (fprs[0]));
asm volatile("std 2,%0" : "=Q" (fprs[2]));
asm volatile("std 4,%0" : "=Q" (fprs[4]));
asm volatile("std 6,%0" : "=Q" (fprs[6]));
asm volatile("std 1,%0" : "=Q" (fprs[1]));
asm volatile("std 3,%0" : "=Q" (fprs[3]));
asm volatile("std 5,%0" : "=Q" (fprs[5]));
asm volatile("std 7,%0" : "=Q" (fprs[7]));
asm volatile("std 8,%0" : "=Q" (fprs[8]));
asm volatile("std 9,%0" : "=Q" (fprs[9]));
asm volatile("std 10,%0" : "=Q" (fprs[10]));
asm volatile("std 11,%0" : "=Q" (fprs[11]));
asm volatile("std 12,%0" : "=Q" (fprs[12]));
asm volatile("std 13,%0" : "=Q" (fprs[13]));
asm volatile("std 14,%0" : "=Q" (fprs[14]));
asm volatile("std 15,%0" : "=Q" (fprs[15]));
}

static inline void restore_fp_regs(freg_t *fprs)
{
asm volatile("ld 0,%0" : : "Q" (fprs[0]));
asm volatile("ld 2,%0" : : "Q" (fprs[2]));
asm volatile("ld 4,%0" : : "Q" (fprs[4]));
asm volatile("ld 6,%0" : : "Q" (fprs[6]));
asm volatile("ld 1,%0" : : "Q" (fprs[1]));
asm volatile("ld 3,%0" : : "Q" (fprs[3]));
asm volatile("ld 5,%0" : : "Q" (fprs[5]));
asm volatile("ld 7,%0" : : "Q" (fprs[7]));
asm volatile("ld 8,%0" : : "Q" (fprs[8]));
asm volatile("ld 9,%0" : : "Q" (fprs[9]));
asm volatile("ld 10,%0" : : "Q" (fprs[10]));
asm volatile("ld 11,%0" : : "Q" (fprs[11]));
asm volatile("ld 12,%0" : : "Q" (fprs[12]));
asm volatile("ld 13,%0" : : "Q" (fprs[13]));
asm volatile("ld 14,%0" : : "Q" (fprs[14]));
asm volatile("ld 15,%0" : : "Q" (fprs[15]));
}

static inline void save_vx_regs(__vector128 *vxrs)
{
typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;

asm volatile(
" la 1,%0\n"
" .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
: "=Q" (*(addrtype *) vxrs) : : "1");
}

static inline void save_vx_regs_safe(__vector128 *vxrs)
{
unsigned long cr0, flags;
Expand All @@ -126,20 +61,13 @@ static inline void save_vx_regs_safe(__vector128 *vxrs)
__ctl_store(cr0, 0, 0);
__ctl_set_bit(0, 17);
__ctl_set_bit(0, 18);
save_vx_regs(vxrs);
__ctl_load(cr0, 0, 0);
arch_local_irq_restore(flags);
}

static inline void restore_vx_regs(__vector128 *vxrs)
{
typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;

asm volatile(
" la 1,%0\n"
" .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
: : "Q" (*(addrtype *) vxrs) : "1");
" .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
: "=Q" (*(struct vx_array *) vxrs) : : "1");
__ctl_load(cr0, 0, 0);
arch_local_irq_restore(flags);
}

static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
Expand Down Expand Up @@ -177,24 +105,6 @@ static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
sizeof(fpregs->fprs));
}

static inline void save_fpu_regs(struct fpu *fpu)
{
save_fp_ctl(&fpu->fpc);
if (is_vx_fpu(fpu))
save_vx_regs(fpu->vxrs);
else
save_fp_regs(fpu->fprs);
}

static inline void restore_fpu_regs(struct fpu *fpu)
{
restore_fp_ctl(&fpu->fpc);
if (is_vx_fpu(fpu))
restore_vx_regs(fpu->vxrs);
else
restore_fp_regs(fpu->fprs);
}

#endif

#endif /* _ASM_S390_FPU_INTERNAL_H */
6 changes: 3 additions & 3 deletions arch/s390/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu-internal.h>
#include <asm/isc.h>

#define KVM_MAX_VCPUS 64
Expand Down Expand Up @@ -498,10 +499,9 @@ struct kvm_guestdbg_info_arch {

struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
s390_fp_regs host_fpregs;
unsigned int host_acrs[NUM_ACRS];
s390_fp_regs guest_fpregs;
struct kvm_s390_vregs *host_vregs;
struct fpu host_fpregs;
struct fpu guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
Expand Down
2 changes: 2 additions & 0 deletions arch/s390/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore vector registers */

#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
#define _CIF_ASCE (1<<CIF_ASCE)
#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY)
#define _CIF_FPU (1<<CIF_FPU)

#ifndef __ASSEMBLY__

Expand Down
2 changes: 1 addition & 1 deletion arch/s390/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static inline void restore_access_regs(unsigned int *acrs)
} \
if (next->mm) { \
update_cr_regs(next); \
restore_fpu_regs(&next->thread.fpu); \
set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
} \
Expand Down
5 changes: 5 additions & 0 deletions arch/s390/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,16 @@ int main(void)
DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
BLANK();
DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
DEFINE(__THREAD_fpu, offsetof(struct task_struct, thread.fpu));
DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
BLANK();
DEFINE(__FPU_fpc, offsetof(struct fpu, fpc));
DEFINE(__FPU_flags, offsetof(struct fpu, flags));
DEFINE(__FPU_regs, offsetof(struct fpu, regs));
BLANK();
DEFINE(__TI_task, offsetof(struct thread_info, task));
DEFINE(__TI_flags, offsetof(struct thread_info, flags));
DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
Expand Down
3 changes: 2 additions & 1 deletion arch/s390/kernel/compat_signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ static void store_sigregs(void)
static void load_sigregs(void)
{
restore_access_regs(current->thread.acrs);
restore_fpu_regs(&current->thread.fpu);
}

static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
Expand Down Expand Up @@ -287,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
goto badframe;
set_current_blocked(&set);
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->sregs_ext))
Expand All @@ -309,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
save_fpu_regs(&current->thread.fpu);
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
Expand Down
Loading

0 comments on commit 9977e88

Please sign in to comment.