Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,21 @@ endif()
# we do this manually by adding flags.
set(ATOMIC_LINKAGE_FIX FALSE)
if(MSVC)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(MSVC_FLAG -D__x86_64__)
# Detect target architecture using CMAKE_SYSTEM_PROCESSOR or compiler checks
if(CMAKE_SYSTEM_PROCESSOR MATCHES "ARM64|aarch64")
set(MSVC_FLAG -D__aarch64__)
set(MSVC_TCG_PATH /I${CMAKE_CURRENT_SOURCE_DIR}/qemu/tcg/aarch64)
set(MSVC_TARGET_ARCH "aarch64")
elseif(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(MSVC_FLAG -D__x86_64__)
set(MSVC_TCG_PATH /I${CMAKE_CURRENT_SOURCE_DIR}/qemu/tcg/i386)
set(MSVC_TARGET_ARCH "x86_64")
elseif(CMAKE_SIZEOF_VOID_P EQUAL 4)
set(MSVC_FLAG -D__i386__)
set(MSVC_FLAG -D__i386__)
set(MSVC_TCG_PATH /I${CMAKE_CURRENT_SOURCE_DIR}/qemu/tcg/i386)
set(MSVC_TARGET_ARCH "i386")
else()
message(FATAL_ERROR "Neither WIN64 or WIN32!")
message(FATAL_ERROR "Unsupported architecture!")
endif()

add_compile_options(
Expand All @@ -135,7 +144,7 @@ if(MSVC)
-D_CRT_SECURE_NO_WARNINGS
-DWIN32_LEAN_AND_MEAN
${MSVC_FLAG}
/I${CMAKE_CURRENT_SOURCE_DIR}/qemu/tcg/i386
${MSVC_TCG_PATH}
)

# Disable some warnings
Expand Down Expand Up @@ -1214,7 +1223,16 @@ if(MSVC)
qemu/util/oslib-win32.c
qemu/util/qemu-thread-win32.c
)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
if(MSVC_TARGET_ARCH STREQUAL "aarch64")
# ARM64 uses armasm64 for assembly
add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/setjmp-wrapper-win32-arm64.obj"
COMMAND armasm64 -o "${CMAKE_CURRENT_BINARY_DIR}/setjmp-wrapper-win32-arm64.obj" "${CMAKE_CURRENT_SOURCE_DIR}/qemu/util/setjmp-wrapper-win32-arm64.asm"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/qemu/util/setjmp-wrapper-win32-arm64.asm"
COMMENT "Building ARM64 setjmp wrapper"
)
set_source_files_properties("${CMAKE_CURRENT_BINARY_DIR}/setjmp-wrapper-win32-arm64.obj" PROPERTIES GENERATED TRUE EXTERNAL_OBJECT TRUE)
set(UNICORN_COMMON_SRCS ${UNICORN_COMMON_SRCS} "${CMAKE_CURRENT_BINARY_DIR}/setjmp-wrapper-win32-arm64.obj")
elseif(CMAKE_SIZEOF_VOID_P EQUAL 8)
if(MSVC_VERSION LESS 1600 AND MSVC_IDE)
add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build/setjmp-wrapper-win32.dir/setjmp-wrapper-win32.obj"
COMMAND ml64 /c /nologo /Fo"${CMAKE_CURRENT_SOURCE_DIR}/build/setjmp-wrapper-win32.dir/setjmp-wrapper-win32.obj" /W3 /errorReport:prompt /Ta"${CMAKE_CURRENT_SOURCE_DIR}/qemu/util/setjmp-wrapper-win32.asm"
Expand Down Expand Up @@ -1467,6 +1485,13 @@ if (UNICORN_LEGACY_STATIC_ARCHIVE)
set_target_properties(unicorn PROPERTIES OUTPUT_NAME "unicorn-static")
bundle_static_library(unicorn unicorn_archive unicorn)
endif()
# Merge ARM64 setjmp wrapper into the archive (bundle_static_library doesn't include external objects)
if(MSVC AND MSVC_TARGET_ARCH STREQUAL "aarch64")
add_custom_command(TARGET unicorn_archive POST_BUILD
COMMAND lib /NOLOGO "$<TARGET_FILE:unicorn_archive>" "${CMAKE_CURRENT_BINARY_DIR}/setjmp-wrapper-win32-arm64.obj" /OUT:"$<TARGET_FILE:unicorn_archive>"
COMMENT "Merging ARM64 setjmp wrapper into unicorn archive"
)
endif()
endif()

if(UNICORN_FUZZ)
Expand Down
6 changes: 5 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ fn build_with_cmake() {
} else {
println!("cargo:rustc-link-lib=static=unicorn");
}
if !compiler.is_like_msvc() {
let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
if target_os != "windows" && !compiler.is_like_msvc() {
println!("cargo:rustc-link-lib=pthread");
println!("cargo:rustc-link-lib=m");
}
Expand Down Expand Up @@ -379,8 +380,11 @@ fn main() {
} else {
println!("cargo:rustc-link-arg=-Wl,-allow-multiple-definition");
println!("cargo:rustc-link-lib=static=unicorn");
let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
if target_os != "windows" && !cc::Build::new().get_compiler().is_like_msvc() {
println!("cargo:rustc-link-lib=pthread");
println!("cargo:rustc-link-lib=m");
}
}
}
Err(_) => {
Expand Down
94 changes: 93 additions & 1 deletion qemu/accel/tcg/translate-all.c
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,99 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
uc->seh_closure = closure;
data = closure + CLOSURE_SIZE /2;

#ifdef _WIN64
#if defined(_WIN64) && defined(_M_ARM64)
/*
* ARM64 Windows trampoline.
* On ARM64 Windows, the calling convention passes first arg in x0, second in x1.
* We need to pass the uc pointer as the second argument to the handler.
*
* ARM64 instructions are 4 bytes each.
* We build a simple trampoline that:
* 1. Saves x1 and lr to data area
* 2. Loads uc pointer into x1
* 3. Loads handler address and calls it (using blr)
* 4. Restores x1 and lr
* 5. Returns
*/
{
uint32_t *code = (uint32_t *)closure;
uint64_t data_addr = (uint64_t)data;
uint64_t handler_addr = (uint64_t)handler;
uint64_t uc_addr = (uint64_t)uc;

/* Store uc and handler pointers in data area */
memcpy(data + 0x00, &uc_addr, 8); /* data[0x00]: uc pointer */
memcpy(data + 0x08, &handler_addr, 8); /* data[0x08]: handler pointer */
/* data[0x10]: saved x1 */
/* data[0x18]: saved lr */

/*
* Generate ARM64 code:
* We use x9 as scratch register (caller-saved, safe to clobber)
*
* Layout:
* code[0]: ldr x9, [pc, #offset] ; load &data from literal pool
* code[1]: str x1, [x9, #0x10] ; save x1 to data[0x10]
* code[2]: str lr, [x9, #0x18] ; save lr to data[0x18]
* code[3]: ldr x1, [x9, #0x00] ; load uc ptr into x1
* code[4]: ldr x9, [x9, #0x08] ; load handler ptr
* code[5]: blr x9 ; call handler (clobbers lr)
* code[6]: ldr x9, [pc, #offset] ; reload &data
* code[7]: ldr x1, [x9, #0x10] ; restore x1
* code[8]: ldr lr, [x9, #0x18] ; restore lr
* code[9]: ret ; return via restored lr
* code[10]: nop ; padding for alignment
* code[11-12]: data_addr ; 64-bit literal pool
*/

int literal_offset;

/* code[0]: ldr x9, [pc, #offset] - load data pointer from literal pool */
/* LDR (literal) encoding: 0x58000000 | (imm19 << 5) | Rt */
literal_offset = (11 - 0) * 4; /* offset from code[0] to code[11] = 44 bytes */
code[0] = 0x58000009 | ((literal_offset / 4) << 5);

/* code[1]: str x1, [x9, #0x10] - save x1 */
/* STR (unsigned offset): 0xF9000000 | (imm12 << 10) | (Rn << 5) | Rt */
/* imm12 = offset/8, Rn=9, Rt=1 */
code[1] = 0xF9000121 | ((0x10 / 8) << 10);

/* code[2]: str lr, [x9, #0x18] - save lr (x30) */
/* Rn=9, Rt=30 (lr) */
code[2] = 0xF900013E | ((0x18 / 8) << 10);

/* code[3]: ldr x1, [x9, #0x00] - load uc pointer into x1 */
code[3] = 0xF9400121;

/* code[4]: ldr x9, [x9, #0x08] - load handler pointer into x9 */
code[4] = 0xF9400129 | ((0x08 / 8) << 10);

/* code[5]: blr x9 - call the handler */
code[5] = 0xD63F0120;

/* code[6]: ldr x9, [pc, #offset] - reload data pointer */
literal_offset = (11 - 6) * 4; /* offset from code[6] to code[11] = 20 bytes */
code[6] = 0x58000009 | ((literal_offset / 4) << 5);

/* code[7]: ldr x1, [x9, #0x10] - restore x1 */
code[7] = 0xF9400121 | ((0x10 / 8) << 10);

/* code[8]: ldr lr, [x9, #0x18] - restore lr (x30) */
code[8] = 0xF940013E | ((0x18 / 8) << 10);

/* code[9]: ret - return via lr */
code[9] = 0xD65F03C0;

/* code[10]: nop - padding for 8-byte alignment of literal pool */
code[10] = 0xD503201F;

/* code[11-12]: Literal pool - data address (64-bit) */
memcpy(&code[11], &data_addr, 8);

/* Flush instruction cache for the generated code */
FlushInstructionCache(GetCurrentProcess(), closure, 13 * 4);
}
#elif defined(_WIN64)
ptr = closure;
*ptr = 0x48; // REX.w
ptr += 1;
Expand Down
4 changes: 2 additions & 2 deletions qemu/include/qemu/atomic128.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
}

# define HAVE_ATOMIC128 1
#elif defined(__aarch64__)
/* We can do better than cmpxchg for AArch64. */
#elif defined(__aarch64__) && !defined(_MSC_VER)
/* We can do better than cmpxchg for AArch64 (GCC/Clang only - uses inline asm). */
static inline Int128 atomic16_read(Int128 *ptr)
{
uint64_t l, h;
Expand Down
7 changes: 7 additions & 0 deletions qemu/include/sysemu/os-win32.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ extern int _setjmp_wrapper(jmp_buf);
#undef setjmp
#define setjmp(env) _setjmp_wrapper(env)

#if defined(_M_ARM64)
// On ARM64, we also need a custom longjmp to avoid unwinding issues
extern __declspec(noreturn) void _longjmp_wrapper(jmp_buf, int);
#undef longjmp
#define longjmp(env, val) _longjmp_wrapper(env, val)
#endif

#else // MingW

#undef setjmp
Expand Down
8 changes: 0 additions & 8 deletions qemu/include/tcg/tcg-opc.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,7 @@ DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))

#ifdef _MSC_VER
DEF(ctpop_i32, 1, 1, 0, 0)
#else
DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
#endif

DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
Expand Down Expand Up @@ -199,11 +195,7 @@ DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))

#ifdef _MSC_VER
DEF(ctpop_i64, 1, 1, 0, IMPL64)
#else
DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
#endif

DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
Expand Down
7 changes: 3 additions & 4 deletions qemu/target/i386/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1282,10 +1282,10 @@ void host_cpuid(uint32_t function, uint32_t count,
{
uint32_t vec[4];

#ifdef _MSC_VER
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
/* MSVC on x86/x64 */
__cpuidex((int*)vec, function, count);
#else
#ifdef __x86_64__
#elif defined(__x86_64__)
asm volatile("cpuid"
: "=a"(vec[0]), "=b"(vec[1]),
"=c"(vec[2]), "=d"(vec[3])
Expand All @@ -1303,7 +1303,6 @@ void host_cpuid(uint32_t function, uint32_t count,
#else
abort();
#endif
#endif // _MSC_VER

if (eax)
*eax = vec[0];
Expand Down
32 changes: 22 additions & 10 deletions qemu/tcg/aarch64/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1080,9 +1080,9 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
opc = I3405_MOVZ;
}
s0 = ctz64(t0) & (63 & -16);
t1 = t0 & ~(0xffffUL << s0);
t1 = t0 & ~(0xffffULL << s0);
s1 = ctz64(t1) & (63 & -16);
t2 = t1 & ~(0xffffUL << s1);
t2 = t1 & ~(0xffffULL << s1);
if (t2 == 0) {
tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
if (t1 != 0) {
Expand Down Expand Up @@ -1503,14 +1503,22 @@ static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,

static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
{
static const uint32_t sync[] = {
[0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
[TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
[TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
[TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
[TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
};
tcg_out32(s, sync[a0 & TCG_MO_ALL]);
/* Use switch instead of array range initializers for MSVC compatibility */
uint32_t sync_val;
switch (a0 & TCG_MO_ALL) {
case TCG_MO_ST_ST:
sync_val = DMB_ISH | DMB_ST;
break;
case TCG_MO_LD_LD:
case TCG_MO_LD_ST:
case TCG_MO_LD_ST | TCG_MO_LD_LD:
sync_val = DMB_ISH | DMB_LD;
break;
default:
sync_val = DMB_ISH | DMB_LD | DMB_ST;
break;
}
tcg_out32(s, sync_val);
}

static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
Expand Down Expand Up @@ -2895,6 +2903,9 @@ typedef struct {
uint8_t fde_reg_ofs[24];
} DebugFrame;

#if !defined(__ELF__)
/* Host machine without ELF. */
#else
#define ELF_HOST_MACHINE EM_AARCH64

static const DebugFrame debug_frame = {
Expand Down Expand Up @@ -2933,3 +2944,4 @@ void tcg_register_jit(TCGContext *s, void *buf, size_t buf_size)
{
tcg_register_jit_int(s, buf, buf_size, &debug_frame, sizeof(debug_frame));
}
#endif /* __ELF__ */
2 changes: 1 addition & 1 deletion qemu/util/cacheinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static void sys_cache_info(int *isize, int *dsize)
* Architecture (+ OS) specific detection mechanisms.
*/

#if defined(__aarch64__)
#if defined(__aarch64__) && !defined(_MSC_VER)

static void arch_cache_info(int *isize, int *dsize)
{
Expand Down
Loading