Skip to content

Commit

Permalink
Initial Windows on ARM (AArch64) Support (#249)
Browse files Browse the repository at this point in the history
* Conditionally include ntdll.dll

* Use text relocation instead of GOT

* Use FlushInstructionCache instead of clear_cache for arm64

* Load address in two stages (adrp, add)

* objc_msgSend.aarch64.S add comments

* Add seh directives

* Move .seh_proc into slow sloop section

* Comment out cfi directives

* Substitute raw .seh directives with macros

* Add documentation of SEH annotations

* Detect CPU Architecture with preprocessor

* Cleanup CMakeLists.txt

Co-authored-by: David Chisnall <davidchisnall@users.noreply.github.com>

* Remove line in objc_msgSend.aarch64.S

Co-authored-by: David Chisnall <davidchisnall@users.noreply.github.com>

* Change Test CMakeList to use ARCHITECTURE var

* Use existing clear cache macro

* Change _WIN64 to _WIN32 and reorder labels

* Remove macro and replace _WIN64 with _WIN32

* Remove argument from non-win32 macro

---------

Co-authored-by: David Chisnall <davidchisnall@users.noreply.github.com>
  • Loading branch information
hmelder and davidchisnall authored Dec 2, 2023
1 parent d0d28b8 commit ab23f14
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 40 deletions.
12 changes: 12 additions & 0 deletions CMake/detect_arch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// detect_arch.c
#if defined(__aarch64__)
#error aarch64
#elif defined(__arm__)
#error arm
#elif defined(__i386__)
#error i386
#elif defined(__x86_64__)
#error x86_64
#else
#error unknown
#endif
21 changes: 19 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,21 @@ if (MSVC)
set(objc_LINK_FLAGS "/DEBUG /INCREMENTAL:NO ${objc_LINK_FLAGS}")
endif()

# Get Architecture without relying on CMake
try_compile(
COMPILE_SUCCESS
${CMAKE_BINARY_DIR}
${CMAKE_SOURCE_DIR}/CMake/detect_arch.c
OUTPUT_VARIABLE COMPILE_OUTPUT
)

if(NOT COMPILE_SUCCESS)
string(REGEX MATCH "(aarch64|arm|i386|x86_64|unknown)" ARCHITECTURE ${COMPILE_OUTPUT})
endif()

set(ARCHITECTURE ${ARCHITECTURE} CACHE STRING "Architecture Type")
message(STATUS "Architecture: ${ARCHITECTURE}")

# Build configuration
add_compile_definitions(GNUSTEP __OBJC_RUNTIME_INTERNAL__=1)

Expand Down Expand Up @@ -215,6 +230,10 @@ target_sources(objc PRIVATE ${libobjc_CXX_SRCS})

include(FindThreads)
target_link_libraries(objc Threads::Threads)
# Link against ntdll.dll for RtlRaiseException
if (WIN32)
target_link_libraries(objc ntdll.dll)
endif()


set_target_properties(objc PROPERTIES
Expand Down Expand Up @@ -342,8 +361,6 @@ configure_file(
add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)



if (TESTS)
enable_testing()
add_subdirectory(Test)
Expand Down
6 changes: 3 additions & 3 deletions Test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ if (ENABLE_ALL_OBJC_ARC_TESTS)
endif()

# UnexpectedException test currently fails on ARM and needs to be fixed
if (NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
list(APPEND TESTS UnexpectedException.m)
endif ()
if(NOT ARCHITECTURE MATCHES "^(arm|aarch64)")
list(APPEND TESTS UnexpectedException.m)
endif()

# List of single-file tests that won't work with the legacy ABI and so
# shouldn't be run in legacy mode.
Expand Down
18 changes: 13 additions & 5 deletions block_to_imp.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#else
#include "safewindows.h"
#endif
#include "objc/runtime.h"
#include "objc/blocks_runtime.h"
Expand All @@ -22,11 +24,17 @@
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if __has_builtin(__builtin___clear_cache)
# define clear_cache __builtin___clear_cache

#if defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
static inline void __clear_cache(void* start, void* end) {
FlushInstructionCache(GetCurrentProcess(), start, end - start);
}
#define clear_cache __clear_cache
#elif __has_builtin(__builtin___clear_cache)
#define clear_cache __builtin___clear_cache
#else
void __clear_cache(void* start, void* end);
# define clear_cache __clear_cache
void __clear_cache(void* start, void* end);
#define clear_cache __clear_cache
#endif


Expand All @@ -36,7 +44,6 @@ void __clear_cache(void* start, void* end);
#endif

#ifdef _WIN32
#include "safewindows.h"
#if defined(WINAPI_FAMILY) && WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP && _WIN32_WINNT >= 0x0A00
// Prefer the *FromApp versions when we're being built in a Windows Store App context on
// Windows >= 10. *FromApp require the application to be manifested for "codeGeneration".
Expand Down Expand Up @@ -178,6 +185,7 @@ static struct trampoline_set *alloc_trampolines(char *start, char *end)
metadata->buffers->headers[HEADERS_PER_PAGE-1].block = NULL;
mprotect(metadata->buffers->rx_buffer, PAGE_SIZE, PROT_READ | PROT_EXEC);
clear_cache(metadata->buffers->rx_buffer, &metadata->buffers->rx_buffer[PAGE_SIZE]);

return metadata;
}

Expand Down
188 changes: 158 additions & 30 deletions objc_msgSend.aarch64.S
Original file line number Diff line number Diff line change
@@ -1,15 +1,88 @@
#define ARGUMENT_SPILL_SIZE (8*10 + 8*16)
.macro MSGSEND receiver, sel
.cfi_startproc
cbz \receiver, 4f // Skip everything if the receiver is nil

/* Windows ARM64 Exception Handling
*
* Structured Exception Handling (SEH) on Windows ARM64 differs from the x64
* implementation. Functions consist of a single prologue and zero or more
* epilogues. Instead of using offsets for the .seh* directives to manipulate the
* stack frame, each directive corresponds to a single instruction.
*
* This presents a challenge for our objc_msgSend function, which only modifies
* the stack when a slow lookup is needed (see label "5").
*
* To address this, we move the directive marking the start of a function deep
* into the msgSend body to prevent marking every instruction as ".seh_nop."
*
* For Windows:
* - EH_START(x): Start of function (no effect on Windows)
* - EH_END(x): End of function (no effect on Windows)
* - EH_START_AT_OFFSET(x): Mark Start of function (Delayed)
* - EH_END_AT_OFFSET(x): Mark End of function (Delayed)
* - EH_END_PROLOGUE: End of function prologue
* - EH_START_EPILOGUE: Start of function epilogue
* - EH_END_EPILOGUE: End of function epilogue
* - EH_SAVE_FP_LR(x): Save Frame Pointer and Link Register
* - EH_STACK_ALLOC(x): Stack allocation (inside prologue)
* - EH_ADD_FP(x): Add to Frame Pointer
* - EH_NOP: Mark instruction with no unwinding relevance
*
* For non-64-bit Windows systems or other platforms, these macros have no effect and can be used without causing issues.
*/

#ifdef _WIN32
# define EH_START
# define EH_END

# define EH_START_AT_OFFSET .seh_proc objc_msgSend
# define EH_END_AT_OFFSET .seh_endproc objc_msgSend

# define EH_END_PROLOGUE .seh_endprologue
# define EH_START_EPILOGUE .seh_startepilogue
# define EH_END_EPILOGUE .seh_endepilogue

# define EH_SAVE_FP_LR(x) .seh_save_fplr x
# define EH_STACK_ALLOC(x) .seh_stackalloc x
# define EH_ADD_FP(x) .seh_add_fp x

# define EH_NOP .seh_nop
#else
// Marks the real start and end of the function
# define EH_START .cfi_startproc
# define EH_END .cfi_endproc

// The following directives are either not
// needed or not available with CFI
# define EH_START_AT_OFFSET
# define EH_END_AT_OFFSET
# define EH_END_PROLOGUE
# define EH_START_EPILOGUE
# define EH_END_EPILOGUE
# define EH_SAVE_FP_LR(x)
# define EH_STACK_ALLOC(x)
# define EH_ADD_FP(x)
# define EH_NOP
#endif

.globl CDECL(objc_msgSend_fpret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function)
.globl CDECL(objc_msgSend)
TYPE_DIRECTIVE(CDECL(objc_msgSend), %function)
.globl CDECL(objc_msgSend_stret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function)
CDECL(objc_msgSend):
CDECL(objc_msgSend_fpret):
CDECL(objc_msgSend_stret):
EH_START

cbz x0, 4f // Skip everything if the receiver is nil
// Jump to 6: if this is a small object
ubfx x9, \receiver, #0, #SMALLOBJ_BITS
ubfx x9, x0, #0, #SMALLOBJ_BITS
cbnz x9, 6f

ldr x9, [\receiver] // Load class to x9 if not a small int
ldr x9, [x0] // Load class to x9 if not a small int
1:
ldr x9, [x9, #DTABLE_OFFSET] // Dtable -> x9
ldr w10, [\sel] // selector->index -> x10
ldr w10, [x1] // selector->index -> x10
ldr w11, [x9, #SHIFT_OFFSET] // dtable->shift -> x11

cmp x11, #8 // If this is a small dtable, jump to the
Expand Down Expand Up @@ -41,59 +114,114 @@
mov v0.d[1], x0
br lr
5: // Slow lookup
EH_START_AT_OFFSET

// Save anything that will be clobbered by
// the call
// the call.
// Note that we pre-index (see "!"), meaning
// that we adjust the sp before storing the pair
// of registers.
stp x0, x1, [sp, #-(ARGUMENT_SPILL_SIZE)]!
stp x2, x3, [sp, #16] // The order is arbitrary, except that
stp x4, x5, [sp, #32] // fp and lr must be spilled together and
stp x6, x7, [sp, #48] // it's convenient if \receiver is spilled at sp
EH_STACK_ALLOC((ARGUMENT_SPILL_SIZE))

stp x2, x3, [sp, #16]
EH_NOP // The following instructions can be ignored by SEH
stp x4, x5, [sp, #32]
EH_NOP
stp x6, x7, [sp, #48]
EH_NOP
stp q0, q1, [sp, #64]
EH_NOP
stp q2, q3, [sp, #96]
EH_NOP
stp q4, q5, [sp, #128]
EH_NOP
stp q6, q7, [sp, #160]
stp fp, lr, [sp, #192]
add fp, sp, 192
stp \receiver, x8, [sp, #-16]!
EH_NOP
stp fp, lr, [sp, #192] // The order is arbitrary, except that
EH_SAVE_FP_LR(192) // fp and lr must be spilled together

add fp, sp, 192 // Adjust frame pointer
EH_ADD_FP(192)
stp x0, x8, [sp, #-16]! // it's convenient if x0 is spilled at sp
EH_STACK_ALLOC(16) // stp performed pre-indexing by sp-16

EH_END_PROLOGUE

#ifndef _WIN32
.cfi_def_cfa fp, 16
.cfi_offset fp, -16
.cfi_offset lr, -8
#endif
// We now have all argument registers, the link
// register and the receiver spilled on the
// stack, with sp containing
// the address of the receiver

mov x0, sp // &self, _cmd in arguments
mov x1, \sel
bl CDECL(slowMsgLookup) // This is the only place where the CFI directives
mov x1, x1
bl CDECL(slowMsgLookup) // This is the only place where the EH directives
// have to be accurate...
mov x9, x0 // IMP -> x9

EH_START_EPILOGUE
ldp x0, x1, [sp, #16] // Reload spilled argument registers
EH_NOP
ldp x2, x3, [sp, #32]
EH_NOP
ldp x4, x5, [sp, #64]
EH_NOP
ldp x6, x7, [sp, #64]
EH_NOP
ldp q0, q1, [sp, #80]
EH_NOP
ldp q2, q3, [sp, #112]
EH_NOP
ldp q4, q5, [sp, #144]
EH_NOP
ldp q6, q7, [sp, #176]
EH_NOP
ldp fp, lr, [sp, #208]
ldp \receiver, x8, [sp], #(ARGUMENT_SPILL_SIZE + 16)
EH_SAVE_FP_LR(208)

// Post-increment sp += ARGUMENT_SPILL_SIZE +16
ldp x0, x8, [sp], #(ARGUMENT_SPILL_SIZE + 16)
EH_STACK_ALLOC((ARGUMENT_SPILL_SIZE + 16))

EH_END_EPILOGUE
EH_END_AT_OFFSET

br x9
6:
adrp x10, :got:SmallObjectClasses
ldr x10, [x10, :got_lo12:SmallObjectClasses]
// Load 63:12 of SmallObjectClasses address
// We use the CDECL macro as Windows prefixes
// cdecl conforming symbols with "_".
adrp x10, CDECL(SmallObjectClasses) // The macro handles this transparently.

// Add lower 12-bits of SmallObjectClasses address to x10
add x10, x10, :lo12:CDECL(SmallObjectClasses)
ldr x9, [x10, x9, lsl #3]

b 1b
.cfi_endproc
.endm
EH_END

.globl CDECL(objc_msgSend_fpret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function)
.globl CDECL(objc_msgSend)
TYPE_DIRECTIVE(CDECL(objc_msgSend), %function)
.globl CDECL(objc_msgSend_stret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function)
CDECL(objc_msgSend):
CDECL(objc_msgSend_fpret):
CDECL(objc_msgSend_stret):
MSGSEND x0, x1
#ifdef _WIN32
.text
.def objc_msgSend;
.scl 2;
.type 32;
.endef
.def objc_msgSend_fpret;
.scl 2;
.type 32;
.endef
.def objc_msgSend_stret;
.scl 2;
.type 32;
.endef

.section .drectve,"yn"
.ascii " /EXPORT:objc_msgSend"
.ascii " /EXPORT:objc_msgSend_fpret"
.ascii " /EXPORT:objc_msgSend_stret"
#endif

0 comments on commit ab23f14

Please sign in to comment.