diff --git a/configure b/configure index a71901b87c..fcdbe8030b 100755 --- a/configure +++ b/configure @@ -54,6 +54,7 @@ Supported targets: x86_64-linux (x86 64 bits, Linux) x86_64-bsd (x86 64 bits, BSD) x86_64-macosx (x86 64 bits, MacOS X) + x86_64-cygwin (x86 64 bits, Cygwin environment under Windows) rv32-linux (RISC-V 32 bits, Linux) rv64-linux (RISC-V 64 bits, Linux) aarch64-linux (AArch64, i.e. ARMv8 in 64-bit mode, Linux) @@ -387,6 +388,18 @@ if test "$arch" = "x86" -a "$bitsize" = "64"; then libmath="" system="macosx" ;; + cygwin) + abi="standard" + casm="${toolprefix}gcc" + casm_options="-m64 -c" + cc="${toolprefix}gcc -m64" + clinker="${toolprefix}gcc" + clinker_options="-m64" + cprepro="${toolprefix}gcc" + cprepro_options="-std=c99 -m64 -U__GNUC__ '-D__attribute__(x)=' -E" + libmath="-lm" + system="cygwin" + ;; *) echo "Error: invalid eabi/system '$target' for architecture X86_64." 1>&2 echo "$usage" 1>&2 diff --git a/runtime/x86_64/i64_dtou.S b/runtime/x86_64/i64_dtou.S index cc822d6725..7f12ae8810 100644 --- a/runtime/x86_64/i64_dtou.S +++ b/runtime/x86_64/i64_dtou.S @@ -39,13 +39,13 @@ // Conversion float -> unsigned long FUNCTION(__compcert_i64_dtou) - ucomisd .LC1(%rip), %xmm0 + ucomisd .LC1(%rip), FP_ARG_1 jnb 1f - cvttsd2siq %xmm0, %rax + cvttsd2siq FP_ARG_1, INT_RES ret -1: subsd .LC1(%rip), %xmm0 - cvttsd2siq %xmm0, %rax - addq .LC2(%rip), %rax +1: subsd .LC1(%rip), FP_ARG_1 + cvttsd2siq FP_ARG_1, INT_RES + addq .LC2(%rip), INT_RES ret .p2align 3 diff --git a/runtime/x86_64/i64_utod.S b/runtime/x86_64/i64_utod.S index 62e6e484ea..4d4870fc4a 100644 --- a/runtime/x86_64/i64_utod.S +++ b/runtime/x86_64/i64_utod.S @@ -39,18 +39,18 @@ // Conversion unsigned long -> double-precision float FUNCTION(__compcert_i64_utod) - testq %rdi, %rdi + testq INT_ARG_1, INT_ARG_1 js 1f - pxor %xmm0, %xmm0 // if < 2^63, - cvtsi2sdq %rdi, %xmm0 // convert as if signed + pxor FP_RES, FP_RES // if < 2^63, + cvtsi2sdq INT_ARG_1, FP_RES // convert as if signed ret 1: // if >= 2^63, use round-to-odd trick - movq %rdi, %rax + movq INT_ARG_1, %rax shrq %rax - andq $1, %rdi - orq %rdi, %rax // (arg >> 1) | (arg & 1) - pxor %xmm0, %xmm0 - cvtsi2sdq %rax, %xmm0 // convert as if signed - addsd %xmm0, %xmm0 // multiply result by 2.0 + andq $1, INT_ARG_1 + orq INT_ARG_1, %rax // (arg >> 1) | (arg & 1) + pxor FP_RES, FP_RES + cvtsi2sdq %rax, FP_RES // convert as if signed + addsd FP_RES, FP_RES // multiply result by 2.0 ret ENDFUNCTION(__compcert_i64_utod) diff --git a/runtime/x86_64/i64_utof.S b/runtime/x86_64/i64_utof.S index 63a3392097..0e878121a2 100644 --- a/runtime/x86_64/i64_utof.S +++ b/runtime/x86_64/i64_utof.S @@ -39,18 +39,18 @@ // Conversion unsigned long -> single-precision float FUNCTION(__compcert_i64_utof) - testq %rdi, %rdi + testq INT_ARG_1, INT_ARG_1 js 1f - pxor %xmm0, %xmm0 // if < 2^63, - cvtsi2ssq %rdi, %xmm0 // convert as if signed + pxor FP_RES, FP_RES // if < 2^63, + cvtsi2ssq INT_ARG_1, FP_RES // convert as if signed ret 1: // if >= 2^63, use round-to-odd trick - movq %rdi, %rax + movq INT_ARG_1, %rax shrq %rax - andq $1, %rdi - orq %rdi, %rax // (arg >> 1) | (arg & 1) - pxor %xmm0, %xmm0 - cvtsi2ssq %rax, %xmm0 // convert as if signed - addss %xmm0, %xmm0 // multiply result by 2.0 + andq $1, INT_ARG_1 + orq INT_ARG_1, %rax // (arg >> 1) | (arg & 1) + pxor FP_RES, FP_RES + cvtsi2ssq %rax, FP_RES // convert as if signed + addss FP_RES, FP_RES // multiply result by 2.0 ret ENDFUNCTION(__compcert_i64_utof) diff --git a/runtime/x86_64/sysdeps.h b/runtime/x86_64/sysdeps.h index e9d456afd5..aacef8f041 100644 --- a/runtime/x86_64/sysdeps.h +++ b/runtime/x86_64/sysdeps.h @@ -63,13 +63,25 @@ _##f: #if defined(SYS_cygwin) -#define GLOB(x) _##x +#define GLOB(x) x #define FUNCTION(f) \ .text; \ - .globl _##f; \ + .globl f; \ .align 16; \ -_##f: +f: #define ENDFUNCTION(f) #endif + +// Names for argument and result registers + +#if defined(SYS_cygwin) +#define INT_ARG_1 %rcx +#else +#define INT_ARG_1 %rdi +#endif +#define FP_ARG_1 %xmm0 +#define INT_RES %rax +#define FP_RES %xmm0 + diff --git a/runtime/x86_64/vararg.S b/runtime/x86_64/vararg.S index 9c0d787be6..c5225b34c9 100644 --- a/runtime/x86_64/vararg.S +++ b/runtime/x86_64/vararg.S @@ -34,6 +34,12 @@ // Helper functions for variadic functions . x86_64 version. +#include "sysdeps.h" + +// ELF ABI + +#if defined(SYS_linux) || defined(SYS_bsd) || defined(SYS_macosx) + // typedef struct { // unsigned int gp_offset; // unsigned int fp_offset; @@ -60,8 +66,6 @@ // unsigned long long __compcert_va_int64(va_list ap); // double __compcert_va_float64(va_list ap); -#include "sysdeps.h" - FUNCTION(__compcert_va_int32) movl 0(%rdi), %edx // edx = gp_offset cmpl $48, %edx @@ -146,3 +150,58 @@ FUNCTION(__compcert_va_saveregs) movaps %xmm7, 160(%r10) 1: ret ENDFUNCTION(__compcert_va_saveregs) + +#endif + +// Windows ABI + +#if defined(SYS_cygwin) + +// typedef void * va_list; +// unsigned int __compcert_va_int32(va_list * ap); +// unsigned long long __compcert_va_int64(va_list * ap); +// double __compcert_va_float64(va_list * ap); + +FUNCTION(__compcert_va_int32) // %rcx = pointer to argument pointer + movq 0(%rcx), %rdx // %rdx = current argument pointer + movl 0(%rdx), %eax // load the int32 value there + addq $8, %rdx // increment argument pointer by 8 + movq %rdx, 0(%rcx) + ret +ENDFUNCTION(__compcert_va_int32) + +FUNCTION(__compcert_va_int64) // %rcx = pointer to argument pointer + movq 0(%rcx), %rdx // %rdx = current argument pointer + movq 0(%rdx), %rax // load the int64 value there + addq $8, %rdx // increment argument pointer by 8 + movq %rdx, 0(%rcx) + ret +ENDFUNCTION(__compcert_va_int64) + +FUNCTION(__compcert_va_float64) // %rcx = pointer to argument pointer + movq 0(%rcx), %rdx // %rdx = current argument pointer + movsd 0(%rdx), %xmm0 // load the float64 value there + addq $8, %rdx // increment argument pointer by 8 + movq %rdx, 0(%rcx) + ret +ENDFUNCTION(__compcert_va_float64) + +FUNCTION(__compcert_va_composite) + jmp GLOB(__compcert_va_int64) // by-ref convention, FIXME +ENDFUNCTION(__compcert_va_composite) + +// Save arguments passed in register in the stack at beginning of vararg +// function. The caller of the vararg function reserved 32 bytes of stack +// just for this purpose. +// FP arguments are passed both in FP registers and integer registers, +// so it's enough to save the integer registers used for parameter passing. + +FUNCTION(__compcert_va_saveregs) + movq %rcx, 16(%rsp) + movq %rdx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + ret +ENDFUNCTION(__compcert_va_saveregs) + +#endif diff --git a/x86/Asm.v b/x86/Asm.v index 58e28c4007..33f1f2adb1 100644 --- a/x86/Asm.v +++ b/x86/Asm.v @@ -279,6 +279,7 @@ Inductive instruction: Type := | Pmaxsd (rd: freg) (r2: freg) | Pminsd (rd: freg) (r2: freg) | Pmovb_rm (rd: ireg) (a: addrmode) + | Pmovq_rf (rd: ireg) (r1: freg) | Pmovsq_mr (a: addrmode) (rs: freg) | Pmovsq_rm (rd: freg) (a: addrmode) | Pmovsb @@ -998,6 +999,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pmaxsd _ _ | Pminsd _ _ | Pmovb_rm _ _ + | Pmovq_rf _ _ | Pmovsq_rm _ _ | Pmovsq_mr _ _ | Pmovsb diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml index caa9775ae0..73efc3c513 100644 --- a/x86/Asmexpand.ml +++ b/x86/Asmexpand.ml @@ -44,7 +44,7 @@ let stack_alignment () = 16 let _Plea (r, addr) = if Archi.ptr64 then Pleaq (r, addr) else Pleal (r, addr) -(* SP adjustment to allocate or free a stack frame *) +(* SP adjustment to allocate or free a stack frame. *) let align n a = if n >= 0 then (n + a - 1) land (-a) else n land (-a) @@ -56,7 +56,7 @@ let sp_adjustment_32 sz = (* The top 4 bytes have already been allocated by the "call" instruction. *) sz - 4 -let sp_adjustment_64 sz = +let sp_adjustment_elf64 sz = let sz = Z.to_int sz in if is_current_function_variadic() then begin (* If variadic, add room for register save area, which must be 16-aligned *) @@ -73,6 +73,13 @@ let sp_adjustment_64 sz = (sz - 8, -1) end +let sp_adjustment_win64 sz = + let sz = Z.to_int sz in + (* Preserve proper alignment of the stack *) + let sz = align sz 16 in + (* The top 8 bytes have already been allocated by the "call" instruction. *) + sz - 8 + (* Built-ins. They come in two flavors: - annotation statements: take their arguments in registers or stack locations; generate no code; @@ -256,7 +263,7 @@ let expand_builtin_va_start_32 r = emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs))); emit (Pmovl_mr (linear_addr r _0z, RAX)) -let expand_builtin_va_start_64 r = +let expand_builtin_va_start_elf64 r = if not (is_current_function_variadic ()) then invalid_arg "Fatal error: va_start used in non-vararg function"; let (ir, fr, ofs) = @@ -287,6 +294,17 @@ let expand_builtin_va_start_64 r = emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 reg_save_area))); emit (Pmovq_mr (linear_addr r _16z, RAX)) +let expand_builtin_va_start_win64 r = + if not (is_current_function_variadic ()) then + invalid_arg "Fatal error: va_start used in non-vararg function"; + let num_args = + List.length (get_current_function_args()) in + let ofs = + Int64.(add !current_function_stacksize + (mul 8L (of_int num_args))) in + emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 ofs))); + emit (Pmovq_mr (linear_addr r _0z, RAX)) + (* FMA operations *) (* vfmadd r1, r2, r3 performs r1 := ri * rj + rk @@ -463,8 +481,8 @@ let expand_builtin_inline name args res = (* Vararg stuff *) | "__builtin_va_start", [BA(IR a)], _ -> assert (a = RDX); - if Archi.ptr64 - then expand_builtin_va_start_64 a + if Archi.win64 then expand_builtin_va_start_win64 a + else if Archi.ptr64 then expand_builtin_va_start_elf64 a else expand_builtin_va_start_32 a (* Synchronization *) | "__builtin_membar", [], _ -> @@ -476,24 +494,66 @@ let expand_builtin_inline name args res = | _ -> raise (Error ("unrecognized builtin " ^ name)) -(* Calls to variadic functions for x86-64: register AL must contain +(* Calls to variadic functions for x86-64 ELF: register AL must contain the number of XMM registers used for parameter passing. To be on - the safe side. do the same if the called function is + the safe side, do the same if the called function is unprototyped. *) -let set_al sg = - if Archi.ptr64 && (sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto) then begin +let fixup_funcall_elf64 sg = + if sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto then begin let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in emit (Pmovl_ri (RAX, coqint_of_camlint (Int32.of_int fr))) end +(* Calls to variadic functions for x86-64 Windows: + FP arguments passed in FP registers must also be passed in integer + registers. +*) + +let rec copy_fregs_to_iregs args fr ir = + match (ir, fr, args) with + | (i1 :: ir, f1 :: fr, (Tfloat | Tsingle) :: args) -> + emit (Pmovq_rf (i1, f1)); + copy_fregs_to_iregs args fr ir + | (i1 :: ir, f1 :: fr, _ :: args) -> + copy_fregs_to_iregs args fr ir + | _ -> + () + +let fixup_funcall_win64 sg = + if sg.sig_cc.cc_vararg then + copy_fregs_to_iregs sg.sig_args [XMM0; XMM1; XMM2; XMM3] [RCX; RDX; R8; R9] + +let fixup_funcall sg = + if Archi.ptr64 + then if Archi.win64 + then fixup_funcall_win64 sg + else fixup_funcall_elf64 sg + else () + (* Expansion of instructions *) let expand_instruction instr = match instr with | Pallocframe (sz, ofs_ra, ofs_link) -> - if Archi.ptr64 then begin - let (sz, save_regs) = sp_adjustment_64 sz in + if Archi.win64 then begin + let sz = sp_adjustment_win64 sz in + if is_current_function_variadic() then + (* Save parameters passed in registers in reserved stack area *) + emit (Pcall_s (intern_string "__compcert_va_saveregs", + {sig_args = []; sig_res = Tvoid; sig_cc = cc_default})); + (* Allocate frame *) + let sz' = Z.of_uint sz in + emit (Psubl_ri (RSP, sz')); + emit (Pcfi_adjust sz'); + (* Stack chaining *) + let addr1 = linear_addr RSP (Z.of_uint (sz + 8)) in + let addr2 = linear_addr RSP ofs_link in + emit (Pleaq (RAX,addr1)); + emit (Pmovq_mr (addr2, RAX)); + current_function_stacksize := Int64.of_int (sz + 8) + end else if Archi.ptr64 then begin + let (sz, save_regs) = sp_adjustment_elf64 sz in (* Allocate frame *) let sz' = Z.of_uint sz in emit (Psubq_ri (RSP, sz')); @@ -525,15 +585,18 @@ let expand_instruction instr = PrintAsmaux.current_function_stacksize := Int32.of_int sz end | Pfreeframe(sz, ofs_ra, ofs_link) -> - if Archi.ptr64 then begin - let (sz, _) = sp_adjustment_64 sz in + if Archi.win64 then begin + let sz = sp_adjustment_win64 sz in + emit (Paddq_ri (RSP, Z.of_uint sz)) + end else if Archi.ptr64 then begin + let (sz, _) = sp_adjustment_elf64 sz in emit (Paddq_ri (RSP, Z.of_uint sz)) end else begin let sz = sp_adjustment_32 sz in emit (Paddl_ri (RSP, Z.of_uint sz)) end | Pjmp_s(_, sg) | Pjmp_r(_, sg) | Pcall_s(_, sg) | Pcall_r(_, sg) -> - set_al sg; + fixup_funcall sg; emit instr | Pbuiltin (ef,args, res) -> begin diff --git a/x86/CBuiltins.ml b/x86/CBuiltins.ml index 6820c08993..a16f3ef7ff 100644 --- a/x86/CBuiltins.ml +++ b/x86/CBuiltins.ml @@ -19,8 +19,12 @@ open C let (va_list_type, va_list_scalar, size_va_list) = if Archi.ptr64 then - (* Actually a struct passed by reference; equivalent to 3 64-bit words *) - (TArray(TInt(IULong, []), Some 3L, []), false, 3*8) + if Archi.win64 then + (* Just a pointer *) + (TPtr(TVoid [], []), true, 8) + else + (* Actually a struct passed by reference; equivalent to 3 64-bit words *) + (TArray(TInt(IULong, []), Some 3L, []), false, 3*8) else (* Just a pointer *) (TPtr(TVoid [], []), true, 4) diff --git a/x86/ConstpropOp.vp b/x86/ConstpropOp.vp index ada8d54a9a..dd4b839aae 100644 --- a/x86/ConstpropOp.vp +++ b/x86/ConstpropOp.vp @@ -17,11 +17,10 @@ Require Import Coqlib Compopts. Require Import AST Integers Floats. Require Import Op Registers. Require Import ValueDomain ValueAOp. +Require SelectOp. (** * Converting known values to constants *) -Parameter symbol_is_external: ident -> bool. (**r See [SelectOp] *) - Definition Olea_ptr (a: addressing) := if Archi.ptr64 then Oleal a else Olea a. Definition const_for_result (a: aval) : option operation := @@ -31,7 +30,7 @@ Definition const_for_result (a: aval) : option operation := | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None | Ptr(Gl id ofs) => - if symbol_is_external id then + if SelectOp.symbol_is_external id then if Ptrofs.eq ofs Ptrofs.zero then Some (Oindirectsymbol id) else None else Some (Olea_ptr (Aglobal id ofs)) diff --git a/x86/ConstpropOpproof.v b/x86/ConstpropOpproof.v index 6d2df9c1c9..82179fa4c8 100644 --- a/x86/ConstpropOpproof.v +++ b/x86/ConstpropOpproof.v @@ -107,7 +107,7 @@ Proof. - (* pointer *) destruct p; try discriminate; SimplVM. + (* global *) - destruct (symbol_is_external id). + destruct (SelectOp.symbol_is_external id). * revert H2; predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero; intros EQ; inv EQ. exists (Genv.symbol_address ge id Ptrofs.zero); auto. * inv H2. exists (Genv.symbol_address ge id ofs); split. diff --git a/x86/Conventions1.v b/x86/Conventions1.v index fdd9423976..645aae90b5 100644 --- a/x86/Conventions1.v +++ b/x86/Conventions1.v @@ -31,40 +31,44 @@ Definition is_callee_save (r: mreg) : bool := match r with | AX | CX | DX => false | BX | BP => true - | SI | DI => negb Archi.ptr64 (**r callee-save in 32 bits but not in 64 bits *) + | SI | DI => negb Archi.ptr64 || Archi.win64 (**r callee-save in ELF 64 bits *) | R8 | R9 | R10 | R11 => false | R12 | R13 | R14 | R15 => true | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7 => false - | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => false + | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => Archi.win64 | FP0 => false end. Definition int_caller_save_regs := if Archi.ptr64 - then AX :: CX :: DX :: SI :: DI :: R8 :: R9 :: R10 :: R11 :: nil + then if Archi.win64 + then AX :: CX :: DX :: R8 :: R9 :: R10 :: R11 :: nil + else AX :: CX :: DX :: SI :: DI :: R8 :: R9 :: R10 :: R11 :: nil else AX :: CX :: DX :: nil. Definition float_caller_save_regs := if Archi.ptr64 - then X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: - X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil + then if Archi.win64 + then X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: nil + else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: + X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil. Definition int_callee_save_regs := if Archi.ptr64 - then BX :: BP :: R12 :: R13 :: R14 :: R15 :: nil + then if Archi.win64 + then BX :: SI :: DI :: BP :: R12 :: R13 :: R14 :: R15 :: nil + else BX :: BP :: R12 :: R13 :: R14 :: R15 :: nil else BX :: SI :: DI :: BP :: nil. -Definition float_callee_save_regs : list mreg := nil. +Definition float_callee_save_regs := + if Archi.ptr64 && Archi.win64 + then X6 :: X7 :: X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil + else nil. Definition destroyed_at_call := List.filter (fun r => negb (is_callee_save r)) all_mregs. -Definition dummy_int_reg := AX. (**r Used in [Regalloc]. *) -Definition dummy_float_reg := X0. (**r Used in [Regalloc]. *) - -Definition callee_save_type := mreg_type. - Definition is_float_reg (r: mreg) := match r with | AX | BX | CX | DX | SI | DI | BP @@ -73,6 +77,11 @@ Definition is_float_reg (r: mreg) := | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 | FP0 => true end. +Definition dummy_int_reg := AX. (**r Used in [Regalloc]. *) +Definition dummy_float_reg := X0. (**r Used in [Regalloc]. *) + +Definition callee_save_type := mreg_type. + (** * Function calling conventions *) (** The functions in this section determine the locations (machine registers @@ -181,7 +190,7 @@ Fixpoint loc_arguments_32 :: loc_arguments_32 tys (ofs + typesize ty) end. -(** In the x86-64 ABI: +(** In the x86-64 ELF ABI: - The first 6 integer arguments are passed in registers [DI], [SI], [DX], [CX], [R8], [R9]. - The first 8 floating-point arguments are passed in registers [X0] to [X7]. - Extra arguments are passed on the stack, in [Outgoing] slots. @@ -189,26 +198,62 @@ Fixpoint loc_arguments_32 of data is used in a slot. *) -Definition int_param_regs := DI :: SI :: DX :: CX :: R8 :: R9 :: nil. -Definition float_param_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil. +Definition int_param_regs_elf64 := DI :: SI :: DX :: CX :: R8 :: R9 :: nil. +Definition float_param_regs_elf64 := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil. -Fixpoint loc_arguments_64 +Fixpoint loc_arguments_elf64 (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) := match tyl with | nil => nil | (Tint | Tlong | Tany32 | Tany64) as ty :: tys => - match list_nth_z int_param_regs ir with + match list_nth_z int_param_regs_elf64 ir with | None => - One (S Outgoing ofs ty) :: loc_arguments_64 tys ir fr (ofs + 2) + One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2) | Some ireg => - One (R ireg) :: loc_arguments_64 tys (ir + 1) fr ofs + One (R ireg) :: loc_arguments_elf64 tys (ir + 1) fr ofs end | (Tfloat | Tsingle) as ty :: tys => - match list_nth_z float_param_regs fr with + match list_nth_z float_param_regs_elf64 fr with | None => - One (S Outgoing ofs ty) :: loc_arguments_64 tys ir fr (ofs + 2) + One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2) | Some freg => - One (R freg) :: loc_arguments_64 tys ir (fr + 1) ofs + One (R freg) :: loc_arguments_elf64 tys ir (fr + 1) ofs + end + end. + +(** In the x86-64 Win64 ABI: +- The first 4 arguments are passed in registers [RCX], [RDX], [R8], and [R9] + (for integer arguments) and [X0] to [X3] (for floating-point arguments). + Each argument "burns" both an integer register and a FP integer. +- The first 8 floating-point arguments are passed in registers [X0] to [X7]. +- Extra arguments are passed on the stack, in [Outgoing] slots. + Consecutive stack slots are separated by 8 bytes, even if only 4 bytes + of data is used in a slot. +- Four 8-byte words are always reserved at the bottom of the outgoing area + so that the callee can use them to save the registers containing the + first four arguments. This is handled in the Stacking phase. +*) + +Definition int_param_regs_win64 := CX :: DX :: R8 :: R9 :: nil. +Definition float_param_regs_win64 := X0 :: X1 :: X2 :: X3 :: nil. + +Fixpoint loc_arguments_win64 + (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) := + match tyl with + | nil => nil + | (Tint | Tlong | Tany32 | Tany64) as ty :: tys => + match list_nth_z int_param_regs_win64 r with + | None => + One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2) + | Some ireg => + One (R ireg) :: loc_arguments_win64 tys (r + 1) ofs + end + | (Tfloat | Tsingle) as ty :: tys => + match list_nth_z float_param_regs_win64 r with + | None => + One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2) + | Some freg => + One (R freg) :: loc_arguments_win64 tys (r + 1) ofs end end. @@ -217,7 +262,9 @@ Fixpoint loc_arguments_64 Definition loc_arguments (s: signature) : list (rpair loc) := if Archi.ptr64 - then loc_arguments_64 s.(sig_args) 0 0 0 + then if Archi.win64 + then loc_arguments_win64 s.(sig_args) 0 0 + else loc_arguments_elf64 s.(sig_args) 0 0 0 else loc_arguments_32 s.(sig_args) 0. (** Argument locations are either caller-save registers or [Outgoing] @@ -236,9 +283,16 @@ Definition loc_argument_32_charact (ofs: Z) (l: loc) : Prop := | _ => False end. -Definition loc_argument_64_charact (ofs: Z) (l: loc) : Prop := +Definition loc_argument_elf64_charact (ofs: Z) (l: loc) : Prop := match l with - | R r => In r int_param_regs \/ In r float_param_regs + | R r => In r int_param_regs_elf64 \/ In r float_param_regs_elf64 + | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs') + | _ => False + end. + +Definition loc_argument_win64_charact (ofs: Z) (l: loc) : Prop := + match l with + | R r => In r int_param_regs_win64 \/ In r float_param_regs_win64 | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs') | _ => False end. @@ -258,37 +312,75 @@ Proof. * destruct H; split; eapply X; eauto; omega. Qed. -Remark loc_arguments_64_charact: +Remark loc_arguments_elf64_charact: forall tyl ir fr ofs p, - In p (loc_arguments_64 tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_64_charact ofs) p. + In p (loc_arguments_elf64 tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_elf64_charact ofs) p. +Proof. + assert (X: forall ofs1 ofs2 l, loc_argument_elf64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_elf64_charact ofs1 l). + { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. } + assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_elf64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_elf64_charact ofs1) p). + { destruct p; simpl; intuition eauto. } + assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)). + { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. } +Opaque list_nth_z. + induction tyl; simpl loc_arguments_elf64; intros. + elim H. + assert (A: forall ty, In p + match list_nth_z int_param_regs_elf64 ir with + | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl (ir + 1) fr ofs + | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2) + end -> + forall_rpair (loc_argument_elf64_charact ofs) p). + { intros. destruct (list_nth_z int_param_regs_elf64 ir) as [r|] eqn:E; destruct H1. + subst. left. eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + subst. split. omega. assumption. + eapply Y; eauto. omega. } + assert (B: forall ty, In p + match list_nth_z float_param_regs_elf64 fr with + | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl ir (fr + 1) ofs + | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2) + end -> + forall_rpair (loc_argument_elf64_charact ofs) p). + { intros. destruct (list_nth_z float_param_regs_elf64 fr) as [r|] eqn:E; destruct H1. + subst. right. eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + subst. split. omega. assumption. + eapply Y; eauto. omega. } + destruct a; eauto. +Qed. + +Remark loc_arguments_win64_charact: + forall tyl r ofs p, + In p (loc_arguments_win64 tyl r ofs) -> (2 | ofs) -> forall_rpair (loc_argument_win64_charact ofs) p. Proof. - assert (X: forall ofs1 ofs2 l, loc_argument_64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_64_charact ofs1 l). + assert (X: forall ofs1 ofs2 l, loc_argument_win64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_win64_charact ofs1 l). { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. } - assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_64_charact ofs1) p). + assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_win64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_win64_charact ofs1) p). { destruct p; simpl; intuition eauto. } assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)). { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. } Opaque list_nth_z. - induction tyl; simpl loc_arguments_64; intros. + induction tyl; simpl loc_arguments_win64; intros. elim H. assert (A: forall ty, In p - match list_nth_z int_param_regs ir with - | Some ireg => One (R ireg) :: loc_arguments_64 tyl (ir + 1) fr ofs - | None => One (S Outgoing ofs ty) :: loc_arguments_64 tyl ir fr (ofs + 2) + match list_nth_z int_param_regs_win64 r with + | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs + | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2) end -> - forall_rpair (loc_argument_64_charact ofs) p). - { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1. + forall_rpair (loc_argument_win64_charact ofs) p). + { intros. destruct (list_nth_z int_param_regs_win64 r) as [r'|] eqn:E; destruct H1. subst. left. eapply list_nth_z_in; eauto. eapply IHtyl; eauto. subst. split. omega. assumption. eapply Y; eauto. omega. } assert (B: forall ty, In p - match list_nth_z float_param_regs fr with - | Some ireg => One (R ireg) :: loc_arguments_64 tyl ir (fr + 1) ofs - | None => One (S Outgoing ofs ty) :: loc_arguments_64 tyl ir fr (ofs + 2) + match list_nth_z float_param_regs_win64 r with + | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs + | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2) end -> - forall_rpair (loc_argument_64_charact ofs) p). - { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1. + forall_rpair (loc_argument_win64_charact ofs) p). + { intros. destruct (list_nth_z float_param_regs_win64 r) as [r'|] eqn:E; destruct H1. subst. right. eapply list_nth_z_in; eauto. eapply IHtyl; eauto. subst. split. omega. assumption. @@ -300,18 +392,30 @@ Lemma loc_arguments_acceptable: forall (s: signature) (p: rpair loc), In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p. Proof. - unfold loc_arguments; intros. destruct Archi.ptr64 eqn:SF. -- (* 64 bits *) - assert (A: forall r, In r int_param_regs -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF; decide_goal). - assert (B: forall r, In r float_param_regs -> is_callee_save r = false) by decide_goal. - assert (X: forall l, loc_argument_64_charact 0 l -> loc_argument_acceptable l). - { unfold loc_argument_64_charact, loc_argument_acceptable. + unfold loc_arguments; intros. destruct Archi.ptr64 eqn:SF; [destruct Archi.win64 eqn:W64|]. +- (* WIN 64 bits *) + assert (A: forall r, In r int_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF; decide_goal). + assert (B: forall r, In r float_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; decide_goal). + assert (X: forall l, loc_argument_win64_charact 0 l -> loc_argument_acceptable l). + { unfold loc_argument_win64_charact, loc_argument_acceptable. destruct l as [r | [] ofs ty]; auto. intros [C|C]; auto. intros [C D]. split; auto. apply Z.divide_trans with 2; auto. exists (2 / typealign ty); destruct ty; reflexivity. } - exploit loc_arguments_64_charact; eauto using Z.divide_0_r. + exploit loc_arguments_win64_charact; eauto using Z.divide_0_r. unfold forall_rpair; destruct p; intuition auto. +- (* ELF 64 bits *) + assert (A: forall r, In r int_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF, W64; decide_goal). + assert (B: forall r, In r float_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite W64; decide_goal). + assert (X: forall l, loc_argument_elf64_charact 0 l -> loc_argument_acceptable l). + { unfold loc_argument_elf64_charact, loc_argument_acceptable. + destruct l as [r | [] ofs ty]; auto. intros [C|C]; auto. + intros [C D]. split; auto. apply Z.divide_trans with 2; auto. + exists (2 / typealign ty); destruct ty; reflexivity. + } + exploit loc_arguments_elf64_charact; eauto using Z.divide_0_r. + unfold forall_rpair; destruct p; intuition auto. + - (* 32 bits *) assert (X: forall l, loc_argument_32_charact 0 l -> loc_argument_acceptable l). { destruct l as [r | [] ofs ty]; simpl; intuition auto. rewrite H2; apply Z.divide_1_l. } @@ -324,7 +428,7 @@ Hint Resolve loc_arguments_acceptable: locs. Lemma loc_arguments_main: loc_arguments signature_main = nil. Proof. - unfold loc_arguments; destruct Archi.ptr64; reflexivity. + unfold loc_arguments; destruct Archi.ptr64; auto; destruct Archi.win64; auto. Qed. (** ** Normalization of function results *) diff --git a/x86/Stacklayout.v b/x86/Stacklayout.v index d375febfa1..4f68cf2655 100644 --- a/x86/Stacklayout.v +++ b/x86/Stacklayout.v @@ -15,11 +15,13 @@ Require Import Coqlib. Require Import AST Memory Separation. Require Import Bounds. +Require Archi. Local Open Scope sep_scope. (** The general shape of activation records is as follows, from bottom (lowest offsets) to top: +- For the Win64 ABI: 32 reserved bytes - Space for outgoing arguments to function calls. - Back link to parent frame - Saved values of integer callee-save registers used by the function. @@ -29,11 +31,11 @@ Local Open Scope sep_scope. - Return address. *) -Definition fe_ofs_arg := 0. +Definition fe_ofs_arg := if Archi.win64 then 32 else 0. Definition make_env (b: bounds) : frame_env := let w := if Archi.ptr64 then 8 else 4 in - let olink := align (4 * b.(bound_outgoing)) w in (* back link *) + let olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w in (* back link *) let ocs := olink + w in (* callee-saves *) let ol := align (size_callee_save_area b ocs) 8 in (* locals *) let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *) @@ -61,7 +63,7 @@ Proof. Local Opaque Z.add Z.mul sepconj range. intros; simpl. set (w := if Archi.ptr64 then 8 else 4). - set (olink := align (4 * b.(bound_outgoing)) w). + set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w). set (ocs := olink + w). set (ol := align (size_callee_save_area b ocs) 8). set (ostkdata := align (ol + 4 * b.(bound_local)) 8). @@ -69,8 +71,9 @@ Local Opaque Z.add Z.mul sepconj range. replace (size_chunk Mptr) with w by (rewrite size_chunk_Mptr; auto). assert (0 < w) by (unfold w; destruct Archi.ptr64; omega). generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. + assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; omega). assert (0 <= 4 * b.(bound_outgoing)) by omega. - assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega). + assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; omega). assert (olink + w <= ocs) by (unfold ocs; omega). assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega). @@ -87,7 +90,7 @@ Local Opaque Z.add Z.mul sepconj range. rewrite sep_swap45. rewrite sep_swap34. (* Apply range_split and range_split2 repeatedly *) - unfold fe_ofs_arg. + apply range_drop_left with 0. omega. apply range_split_2. fold olink. omega. omega. apply range_split. omega. apply range_split_2. fold ol. omega. omega. @@ -105,15 +108,16 @@ Lemma frame_env_range: Proof. intros; simpl. set (w := if Archi.ptr64 then 8 else 4). - set (olink := align (4 * b.(bound_outgoing)) w). + set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w). set (ocs := olink + w). set (ol := align (size_callee_save_area b ocs) 8). set (ostkdata := align (ol + 4 * b.(bound_local)) 8). set (oretaddr := align (ostkdata + b.(bound_stack_data)) w). assert (0 < w) by (unfold w; destruct Archi.ptr64; omega). generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros. + assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; omega). assert (0 <= 4 * b.(bound_outgoing)) by omega. - assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega). + assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; omega). assert (olink + w <= ocs) by (unfold ocs; omega). assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr). assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega). @@ -133,14 +137,14 @@ Lemma frame_env_aligned: Proof. intros; simpl. set (w := if Archi.ptr64 then 8 else 4). - set (olink := align (4 * b.(bound_outgoing)) w). + set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w). set (ocs := olink + w). set (ol := align (size_callee_save_area b ocs) 8). set (ostkdata := align (ol + 4 * b.(bound_local)) 8). set (oretaddr := align (ostkdata + b.(bound_stack_data)) w). assert (0 < w) by (unfold w; destruct Archi.ptr64; omega). replace (align_chunk Mptr) with w by (rewrite align_chunk_Mptr; auto). - split. apply Z.divide_0_r. + split. exists (fe_ofs_arg / 8). unfold fe_ofs_arg; destruct Archi.win64; reflexivity. split. apply align_divides; omega. split. apply align_divides; omega. split. apply align_divides; omega. diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml index f0a5450696..481b09b9b2 100644 --- a/x86/TargetPrinter.ml +++ b/x86/TargetPrinter.ml @@ -239,8 +239,11 @@ module MacOS_System : SYSTEM = module Cygwin_System : SYSTEM = struct + let symbol_prefix = + if Archi.ptr64 then "" else "_" + let raw_symbol oc s = - fprintf oc "_%s" s + fprintf oc "%s%s" symbol_prefix s let symbol oc symb = raw_symbol oc (extern_atom symb) @@ -268,19 +271,39 @@ module Cygwin_System : SYSTEM = | Section_debug_str-> assert false (* Should not be used *) | Section_ais_annotation -> assert false (* Not supported for coff binaries *) - let stack_alignment = 8 (* minimum is 4, 8 is better for perfs *) + let stack_alignment = 8 + (* minimum is 4 for 32 bits, 8 for 64 bits; 8 is better for perfs *) let print_align oc n = fprintf oc " .balign %d\n" n + let indirect_symbols : StringSet.t ref = ref StringSet.empty + let print_mov_rs oc rd id = - fprintf oc " movl $%a, %a\n" symbol id ireg rd + if Archi.ptr64 then begin + let s = extern_atom id in + indirect_symbols := StringSet.add s !indirect_symbols; + fprintf oc " movq .refptr.%s(%%rip), %a\n" s ireg rd + end else begin + fprintf oc " movl $%a, %a\n" symbol id ireg rd + end let print_fun_info _ _ = () let print_var_info _ _ = () - let print_epilogue _ = () + let declare_indirect_symbol oc s = + fprintf oc " .section .rdata$.refptr.%s, \"dr\"\n" s; + fprintf oc " .globl .refptr.%s\n" s; + fprintf oc " .linkonce discard\n"; + fprintf oc ".refptr.%s:\n" s; + fprintf oc " .quad %s\n" s + + let print_epilogue oc = + if Archi.ptr64 then begin + StringSet.iter (declare_indirect_symbol oc) !indirect_symbols; + indirect_symbols := StringSet.empty + end let print_comm_decl oc name sz al = fprintf oc " .comm %a, %s, %d\n" @@ -288,7 +311,8 @@ module Cygwin_System : SYSTEM = let print_lcomm_decl oc name sz al = fprintf oc " .lcomm %a, %s, %d\n" - symbol name (Z.to_string sz) (log2 al) + symbol name (Z.to_string sz) + (if Archi.ptr64 then al else log2 al) end @@ -769,6 +793,8 @@ module Target(System: SYSTEM):TARGET = fprintf oc " minsd %a, %a\n" freg a1 freg res | Pmovb_rm (rd,a) -> fprintf oc " movb %a, %a\n" addressing a ireg8 rd + | Pmovq_rf (rd, r1) -> + fprintf oc " movq %a, %a\n" freg r1 ireg64 rd | Pmovsq_mr(a, rs) -> fprintf oc " movq %a, %a\n" freg rs addressing a | Pmovsq_rm(rd, a) -> diff --git a/x86/extractionMachdep.v b/x86/extractionMachdep.v index a29553e8c4..20c6a52101 100644 --- a/x86/extractionMachdep.v +++ b/x86/extractionMachdep.v @@ -15,15 +15,19 @@ (* Additional extraction directives specific to the x86-64 port *) -Require SelectOp ConstpropOp. +Require Archi SelectOp. -(* SelectOp *) - -Extract Constant SelectOp.symbol_is_external => - "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id". +(* Archi *) -(* ConstpropOp *) +Extract Constant Archi.win64 => + "match Configuration.system with + | ""cygwin"" when ptr64 -> true + | _ -> false". -Extract Constant ConstpropOp.symbol_is_external => - "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id". +(* SelectOp *) +Extract Constant SelectOp.symbol_is_external => + "match Configuration.system with + | ""macosx"" -> C2C.atom_is_extern + | ""cygwin"" when Archi.ptr64 -> C2C.atom_is_extern + | _ -> (fun _ -> false)". diff --git a/x86_32/Archi.v b/x86_32/Archi.v index b7e8e58631..bbc04950b6 100644 --- a/x86_32/Archi.v +++ b/x86_32/Archi.v @@ -58,8 +58,12 @@ Definition fma_invalid_mul_is_nan := false. Definition float_of_single_preserves_sNaN := false. +(** Which ABI to use. *) +Parameter win64: bool. (* Always false in 32 bits *) + Global Opaque ptr64 big_endian splitlong default_nan_64 choose_nan_64 default_nan_32 choose_nan_32 fma_order fma_invalid_mul_is_nan float_of_single_preserves_sNaN. + diff --git a/x86_64/Archi.v b/x86_64/Archi.v index 6dc0044f76..0e027c0f22 100644 --- a/x86_64/Archi.v +++ b/x86_64/Archi.v @@ -58,6 +58,9 @@ Definition fma_invalid_mul_is_nan := false. Definition float_of_single_preserves_sNaN := false. +(** Which ABI to use. *) +Parameter win64: bool. + Global Opaque ptr64 big_endian splitlong default_nan_64 choose_nan_64 default_nan_32 choose_nan_32