Skip to content

Commit

Permalink
aarch64: handle exceptions on dedicated stack
Browse files Browse the repository at this point in the history
This patch changes exception handling mechanism to use dedicated
exception stack instead of the default stack provided for kernel and
application threads. This is critical to support Golang apps which
are known to use tiny stacks in coroutines and exception handler of
svc instruction cannnot use single byte of the application stack in such
case. Having separate exception stack has other benefits for debugging
and will allow future implementation of "lazy" stacks. This also makes
aarch64 port similar to x64 where we use dedicated stacks as well.

To support dedicated stacks, we take advantage of the fact that at every
exception level but EL0 there are two stack registers available -
SP_ELx and SP_EL0. OSv runs at the exception level EL1 and in boot.S
selects EP_EL1 to be used by default. The SP effectively is an alias to
one of the two stack registers and can be changed by setting the system
register SPSel (stack selector).

This patch changes all exception handlers (both synchrounous and
asynchronous (interrupts)) in entry.S to switch to the new exception
stack before pushing a frame by setting the SPSel to #0 which makes
SP point to SP_EL0. We have to switch to SP_EL0 even in the case of the
nested exception when we are on SP_EL0 as per ARM specification the SP
is always reset to SP_ELx (in our case SP_EL1) after taking an
exception. The typical case of nested exception is handling of a page
fault where we enable exceptions downstream in the page fault handler
(arch/aarch64/mmu.cc) and it may be interrupted by an asynchronous
exception like a timer one. To that end we also add the exception
handlers for curr_el_sp0 which system invokes when code is running
with SP pointing to SP_EL0.

Finally, we also change the context switch code in sched.S to make
it save not only default stack register but explicitly save
SP_EL0 and SP_EL1 and SPSel for old thread and then restore those
from arch_thread_state for new thread. This makes context switch
slightly more expensive and has been measured to add around 5% of
overhead.

This patch effectively enhances OSv to allow runing Golang apps on
AArch64.

Fixes #1155

Signed-off-by: Waldemar Kozaczuk <jwkozaczuk@gmail.com>
  • Loading branch information
wkozaczuk committed May 4, 2022
1 parent 4a004ed commit a6fbc36
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 29 deletions.
1 change: 1 addition & 0 deletions arch/aarch64/arch-cpu.hh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ struct arch_cpu {
};

struct arch_thread {
char exception_stack[4096*4] __attribute__((aligned(16)));
};

struct arch_fpu {
Expand Down
12 changes: 8 additions & 4 deletions arch/aarch64/arch-switch.hh
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ void thread::switch_to_first()
remote_thread_local_var(percpu_base) = _detached_state->_cpu->percpu_base;

asm volatile("\n"
"ldp x29, x0, %2 \n"
"ldp x22, x21, %3 \n"
"ldp x29, x0, %3 \n"
"ldp x22, x21, %4 \n"
"mov sp, x22 \n"
"ldr x22, %5 \n"
"msr sp_el0, x22 \n"
"blr x21 \n"
: // No output operands - this is to designate the input operands as earlyclobbers
"=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp)
: "Ump"(this->_state.fp), "Ump"(this->_state.sp)
"=&Ump"(this->_state.fp), "=&Ump"(this->_state.sp), "=&Ump"(this->_state.exception_sp)
: "Ump"(this->_state.fp), "Ump"(this->_state.sp), "Ump"(this->_state.exception_sp)
: "x0", "x19", "x20", "x21", "x22", "x23", "x24",
"x25", "x26", "x27", "x28", "x30", "memory");
}
Expand All @@ -59,6 +61,8 @@ void thread::init_stack()
_state.thread = this;
_state.sp = stacktop;
_state.pc = reinterpret_cast<void*>(thread_main);
_state.exception_sp = _arch.exception_stack + sizeof(_arch.exception_stack);
_state.stack_selector = 1; //Select SP_ELx
}

void thread::setup_tcb()
Expand Down
3 changes: 3 additions & 0 deletions arch/aarch64/arch-thread-state.hh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ struct thread_state {
void* sp;
void* pc;
void* tcb;

void* exception_sp; //SP_EL0
u64 stack_selector; //1 - selects SP_ELx (default), 0 - selects SP_EL0 (exceptions)
};

#endif /* ARCH_THREAD_STATE_HH_ */
73 changes: 49 additions & 24 deletions arch/aarch64/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,16 @@ exception_vectors:
vector_entry lower_el_aarch32 fiq
vector_entry lower_el_aarch32 serror

/* keep in sync with the struct in exceptions.hh */
.macro push_state_to_exception_frame
sub sp, sp, #48 // make space for align2, align1+ESR, PSTATE, PC, SP
/* keep in sync with the struct in exceptions.hh
the switch argument (1 or 0) indicates if we would be switching from
SP_ELx -> SP_EL0 (1) or staying on the same stack - SP_EL0 -> SP_EL0 (0) */
.macro push_state_to_exception_frame switch
// Regardless which stack (pointed by SP_ELx or SP_EL0) was in use when
// exception was taken, the stack is always reset to SP_ELx before exception
// handler is executed. To make sure the exception handler uses the exception
// stack pointed by SP_EL0 we need to set SPSEL to #0.
msr spsel, #0 // switch to exception stack by selecting SP_EL0
sub sp, sp, #48 // make space for align2, align1+ESR, PSTATE, PC, SP
.cfi_adjust_cfa_offset 48
push_pair x28, x29
push_pair x26, x27
Expand All @@ -76,7 +83,13 @@ exception_vectors:
push_pair x4, x5
push_pair x2, x3
push_pair x0, x1
.if \switch == 1
msr spsel, #1 // switch to regular stack (SP_ELx) for brief moment to read it
mov x1, sp // fetch SP of regular stack (spsel 1)
msr spsel, #0 // switch back to exception stack
.else
add x1, sp, #288 // x1 := old SP (48 + 16 * 15 = 288)
.endif
mrs x2, elr_el1
mrs x3, spsr_el1
stp x30, x1, [sp, #240] // store lr, old SP
Expand All @@ -102,6 +115,10 @@ exception_vectors:
pop_pair x24, x25
pop_pair x26, x27
pop_pair x28, x29
// please note we do not need to explicitly switch the stack when returning
// from exception by resetting the stack selector register, as it will
// happen automatically based on the value of spsr_el1 which we restored above
// (the spsr_el1 holds PSTATE and EL and SP selector)
ldr x30, [sp], #48
.cfi_adjust_cfa_offset -48
.endm /* pop_state_to_exception_frame */
Expand Down Expand Up @@ -143,7 +160,7 @@ entry_\level\()_\type:
.cfi_offset x30, -32 // Point to the elr register located at the -32 offset
// of the exception frame to help gdb link to the
// address when interrupt was raised
push_state_to_exception_frame
push_state_to_exception_frame 1
mrs x1, esr_el1
str w1, [sp, #272] // Store Exception Syndrom Register in the frame
mov x0, sp // Save exception_frame to x0
Expand All @@ -165,8 +182,6 @@ entry_\level\()_\type:
.equ EX_TYPE_FIQ, 0x2
.equ EX_TYPE_SERROR, 0x3

entry_unexpected_exception curr_el_sp0, sync, #CURR_EL_SP0, #EX_TYPE_SYNC
entry_unexpected_exception curr_el_sp0, irq, #CURR_EL_SP0, #EX_TYPE_IRQ
entry_unexpected_exception curr_el_sp0, fiq, #CURR_EL_SP0, #EX_TYPE_FIQ
entry_unexpected_exception curr_el_sp0, serror, #CURR_EL_SP0, #EX_TYPE_SERROR

Expand All @@ -183,38 +198,39 @@ entry_unexpected_exception lower_el_aarch32, irq, #LOWER_EL_AARCH32, #EX_TYPE_IR
entry_unexpected_exception lower_el_aarch32, fiq, #LOWER_EL_AARCH32, #EX_TYPE_FIQ
entry_unexpected_exception lower_el_aarch32, serror, #LOWER_EL_AARCH32, #EX_TYPE_SERROR

.global entry_curr_el_spx_sync
.hidden entry_curr_el_spx_sync
.type entry_curr_el_spx_sync, @function
entry_curr_el_spx_sync:
.macro entry_curr_el_sync stack, switch
.global entry_curr_el_sp\stack\()_sync
.hidden entry_curr_el_sp\stack\()_sync
.type entry_curr_el_sp\stack\()_sync, @function
entry_curr_el_sp\stack\()_sync:
.cfi_startproc simple
.cfi_signal_frame
.cfi_def_cfa sp, 0
.cfi_offset x30, -32 // Point to the elr register located at the -32 offset
// of the exception frame to help gdb link to the
// address when interrupt was raised
push_state_to_exception_frame
push_state_to_exception_frame \switch
mrs x1, esr_el1
str w1, [sp, #272] // Store Exception Syndrom Register in the frame
ubfm x2, x1, #ESR_EC_BEG, #ESR_EC_END // Exception Class -> X2
ubfm x3, x1, #ESR_FLT_BEG, #ESR_FLT_END // FLT -> X3
cmp x2, #ESR_EC_SVC64
b.eq handle_system_call
b.eq handle_system_call_sp\stack
cmp x2, #ESR_EC_DATA_ABORT
b.eq handle_mem_abort
b.eq handle_mem_abort_sp\stack
cmp x2, #ESR_EC_INSN_ABORT
b.ne unexpected_sync_exception
handle_mem_abort:
cbz x3, unexpected_sync_exception
b.ne unexpected_sync_exception_sp\stack
handle_mem_abort_sp\stack:
cbz x3, unexpected_sync_exception_sp\stack
cmp x3, #3
b.hi unexpected_sync_exception
b.hi unexpected_sync_exception_sp\stack

mov x0, sp // save exception_frame to x0
bl page_fault
pop_state_from_exception_frame
eret
.cfi_endproc
handle_system_call:
handle_system_call_sp\stack:
.cfi_startproc
//see https://man7.org/linux/man-pages/man2/syscall.2.html for details
//about calling convention for arm64
Expand All @@ -237,7 +253,7 @@ handle_system_call:
pop_state_from_exception_frame
eret
.cfi_endproc
unexpected_sync_exception:
unexpected_sync_exception_sp\stack:
.cfi_startproc
mov x0, sp // save exception_frame to x0
mov x1, #CURR_EL_SPX
Expand All @@ -246,23 +262,32 @@ unexpected_sync_exception:
pop_state_from_exception_frame
bl abort
.cfi_endproc
.endm

entry_curr_el_sync 0, 0 // the synchronous exception handler used when the SP_EL0 is active
entry_curr_el_sync x, 1 // the synchronous exception handler used when the SP_ELx is active

.global entry_curr_el_spx_irq
.hidden entry_curr_el_spx_irq
.type entry_curr_el_spx_irq, @function
entry_curr_el_spx_irq:
.macro entry_curr_el_irq stack, switch
.global entry_curr_el_sp\stack\()_irq
.hidden entry_curr_el_sp\stack\()_irq
.type entry_curr_el_sp\stack\()_irq, @function
entry_curr_el_sp\stack\()_irq:
.cfi_startproc simple
.cfi_signal_frame
.cfi_def_cfa sp, 0
.cfi_offset x30, -32 // Point to the elr register located at the -32 offset
// of the exception frame to help gdb link to the
// address when interrupt was raised
push_state_to_exception_frame
push_state_to_exception_frame \switch
mov x0, sp
bl interrupt // extern "C"
pop_state_from_exception_frame
eret
.cfi_endproc
.endm

entry_curr_el_irq 0, 0 // the asynchronous exception handler used when the SP_EL0 is active
entry_curr_el_irq x, 1 // the asynchronous exception handler used when the SP_ELx is active

.global call_signal_handler_thunk
.hidden call_signal_handler_thunk
Expand Down
18 changes: 17 additions & 1 deletion arch/aarch64/sched.S
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,31 @@ reschedule_from_interrupt:
isb

str x29, [x0, #0] //Save frame pointer of the old thread

mrs x2, spsel //Fetch old thread stack selector
msr spsel, #1 //Select SP_ELx
mov x3, sp //Fetch old thread stack pointer

adr x4, 1f //Fetch old thread instruction point
stp x3, x4, [x0, #16] //Save old thread sp and pc

msr spsel, #0 //Select SP_EL0
mov x3, sp //Fetch old thread exception stack pointer
stp x3, x2, [x0, #40] //Save old thread exception stack pointer and stack selector

ldp x29, x0, [x1, #0] //Set frame pointer of the new thread and this (x0) of the new thread
//Please note that the pc may point to thread_main_c(thread*) which is
//why we have to set x0 (1st argument) to new thread object
ldp x3, x4, [x1, #16] //Fetch new thread sp and pc
mov sp, x3 //Set new thread stack pointer

msr spsel, #1 //Select SP_ELx
mov sp, x3 //Restore new thread stack pointer

ldp x3, x2, [x1, #40] //Load new thread exception stack pointer and stack selector
msr spsel, #0 //Select SP_EL0
mov sp, x3 //Restore new thread exception stack pointer
msr spsel, x2 //Restore new thread stack selector (1-SP_ELx,0-SP_EL0)

blr x4 //Jump to the new thread pc

1:
Expand Down

0 comments on commit a6fbc36

Please sign in to comment.