Support Cygwin 64 bits

- Add support for the Win64 ABI to the x86_64 port - Update vararg support to handle Win64 conventions - Configure support for x86_64-cygwin64
AbsInt · Oct 5, 2020 · 26ddb90 · 26ddb90
1 parent e9c738e
commit 26ddb90
Show file tree

Hide file tree

Showing 17 changed files with 415 additions and 118 deletions.
diff --git a/configure b/configure
@@ -54,6 +54,7 @@ Supported targets:
   x86_64-linux         (x86 64 bits, Linux)
   x86_64-bsd           (x86 64 bits, BSD)
   x86_64-macosx        (x86 64 bits, MacOS X)
+  x86_64-cygwin        (x86 64 bits, Cygwin environment under Windows)
   rv32-linux           (RISC-V 32 bits, Linux)
   rv64-linux           (RISC-V 64 bits, Linux)
   aarch64-linux        (AArch64, i.e. ARMv8 in 64-bit mode, Linux)
@@ -387,6 +388,18 @@ if test "$arch" = "x86" -a "$bitsize" = "64"; then
         libmath=""
         system="macosx"
         ;;
+    cygwin)
+        abi="standard"
+        casm="${toolprefix}gcc"
+        casm_options="-m64 -c"
+        cc="${toolprefix}gcc -m64"
+        clinker="${toolprefix}gcc"
+        clinker_options="-m64"
+        cprepro="${toolprefix}gcc"
+        cprepro_options="-std=c99 -m64 -U__GNUC__ '-D__attribute__(x)=' -E"
+        libmath="-lm"
+        system="cygwin"
+        ;;
     *)
         echo "Error: invalid eabi/system '$target' for architecture X86_64." 1>&2
         echo "$usage" 1>&2

diff --git a/runtime/x86_64/i64_dtou.S b/runtime/x86_64/i64_dtou.S
@@ -39,13 +39,13 @@
 // Conversion float -> unsigned long
 
 FUNCTION(__compcert_i64_dtou)
-        ucomisd .LC1(%rip), %xmm0
+        ucomisd .LC1(%rip), FP_ARG_1
         jnb 1f
-        cvttsd2siq %xmm0, %rax
+        cvttsd2siq FP_ARG_1, INT_RES
         ret
-1:      subsd .LC1(%rip), %xmm0
-        cvttsd2siq %xmm0, %rax
-        addq .LC2(%rip), %rax
+1:      subsd .LC1(%rip), FP_ARG_1
+        cvttsd2siq FP_ARG_1, INT_RES
+        addq .LC2(%rip), INT_RES
         ret
 
         .p2align 3

diff --git a/runtime/x86_64/i64_utod.S b/runtime/x86_64/i64_utod.S
@@ -39,18 +39,18 @@
 // Conversion unsigned long -> double-precision float
 
 FUNCTION(__compcert_i64_utod)
-        testq   %rdi, %rdi
+        testq   INT_ARG_1, INT_ARG_1
         js      1f
-        pxor    %xmm0, %xmm0            // if < 2^63,
-        cvtsi2sdq %rdi, %xmm0           // convert as if signed
+        pxor    FP_RES, FP_RES          // if < 2^63,
+        cvtsi2sdq INT_ARG_1, FP_RES     // convert as if signed
         ret
 1:                                      // if >= 2^63, use round-to-odd trick
-        movq    %rdi, %rax
+        movq    INT_ARG_1, %rax
         shrq    %rax
-        andq    $1, %rdi
-        orq     %rdi, %rax              // (arg >> 1) | (arg & 1)
-        pxor    %xmm0, %xmm0
-        cvtsi2sdq %rax, %xmm0           // convert as if signed
-        addsd   %xmm0, %xmm0            // multiply result by 2.0
+        andq    $1, INT_ARG_1
+        orq     INT_ARG_1, %rax         // (arg >> 1) | (arg & 1)
+        pxor    FP_RES, FP_RES
+        cvtsi2sdq %rax, FP_RES          // convert as if signed
+        addsd   FP_RES, FP_RES          // multiply result by 2.0
         ret
 ENDFUNCTION(__compcert_i64_utod)       
diff --git a/runtime/x86_64/i64_utof.S b/runtime/x86_64/i64_utof.S
@@ -39,18 +39,18 @@
 // Conversion unsigned long -> single-precision float
 
 FUNCTION(__compcert_i64_utof)
-        testq   %rdi, %rdi
+        testq   INT_ARG_1, INT_ARG_1
         js      1f
-        pxor    %xmm0, %xmm0            // if < 2^63,
-        cvtsi2ssq %rdi, %xmm0           // convert as if signed
+        pxor    FP_RES, FP_RES          // if < 2^63,
+        cvtsi2ssq INT_ARG_1, FP_RES     // convert as if signed
         ret
 1:                                      // if >= 2^63, use round-to-odd trick
-        movq    %rdi, %rax
+        movq    INT_ARG_1, %rax
         shrq    %rax
-        andq    $1, %rdi
-        orq     %rdi, %rax              // (arg >> 1) | (arg & 1)
-        pxor    %xmm0, %xmm0
-        cvtsi2ssq %rax, %xmm0           // convert as if signed
-        addss   %xmm0, %xmm0            // multiply result by 2.0
+        andq    $1, INT_ARG_1
+        orq     INT_ARG_1, %rax         // (arg >> 1) | (arg & 1)
+        pxor    FP_RES, FP_RES
+        cvtsi2ssq %rax, FP_RES          // convert as if signed
+        addss   FP_RES, FP_RES          // multiply result by 2.0
         ret
 ENDFUNCTION(__compcert_i64_utof)       
diff --git a/runtime/x86_64/sysdeps.h b/runtime/x86_64/sysdeps.h
@@ -63,13 +63,25 @@ _##f:
 
 #if defined(SYS_cygwin)
 
-#define GLOB(x) _##x
+#define GLOB(x) x
 #define FUNCTION(f) \
 	.text; \
-	.globl _##f; \
+	.globl f; \
 	.align 16; \
-_##f:
+f:
 
 #define ENDFUNCTION(f)
 
 #endif
+
+// Names for argument and result registers
+
+#if defined(SYS_cygwin)
+#define INT_ARG_1 %rcx
+#else
+#define INT_ARG_1 %rdi
+#endif
+#define FP_ARG_1 %xmm0
+#define INT_RES %rax
+#define FP_RES %xmm0
+
diff --git a/runtime/x86_64/vararg.S b/runtime/x86_64/vararg.S
@@ -34,6 +34,12 @@
 
 // Helper functions for variadic functions <stdarg.h>.  x86_64 version.
 
+#include "sysdeps.h"
+
+// ELF ABI
+
+#if defined(SYS_linux) || defined(SYS_bsd) || defined(SYS_macosx)
+
 // typedef struct {
 //    unsigned int gp_offset;
 //    unsigned int fp_offset;
@@ -60,8 +66,6 @@
 // unsigned long long __compcert_va_int64(va_list ap);
 // double __compcert_va_float64(va_list ap);
 
-#include "sysdeps.h"
-
 FUNCTION(__compcert_va_int32)
         movl    0(%rdi), %edx           // edx = gp_offset
         cmpl    $48, %edx
@@ -146,3 +150,58 @@ FUNCTION(__compcert_va_saveregs)
         movaps %xmm7, 160(%r10)
 1:      ret
 ENDFUNCTION(__compcert_va_saveregs)
+
+#endif
+
+// Windows ABI
+
+#if defined(SYS_cygwin)
+
+// typedef void * va_list;
+// unsigned int __compcert_va_int32(va_list * ap);
+// unsigned long long __compcert_va_int64(va_list * ap);
+// double __compcert_va_float64(va_list * ap);
+
+FUNCTION(__compcert_va_int32)      // %rcx = pointer to argument pointer
+        movq 0(%rcx), %rdx         // %rdx = current argument pointer
+        movl 0(%rdx), %eax         // load the int32 value there
+        addq $8, %rdx              // increment argument pointer by 8
+        movq %rdx, 0(%rcx)
+        ret
+ENDFUNCTION(__compcert_va_int32)
+
+FUNCTION(__compcert_va_int64)      // %rcx = pointer to argument pointer
+        movq 0(%rcx), %rdx         // %rdx = current argument pointer
+        movq 0(%rdx), %rax         // load the int64 value there
+        addq $8, %rdx              // increment argument pointer by 8
+        movq %rdx, 0(%rcx)
+        ret
+ENDFUNCTION(__compcert_va_int64)
+
+FUNCTION(__compcert_va_float64)    // %rcx = pointer to argument pointer
+        movq 0(%rcx), %rdx         // %rdx = current argument pointer
+        movsd 0(%rdx), %xmm0       // load the float64 value there
+        addq $8, %rdx              // increment argument pointer by 8
+        movq %rdx, 0(%rcx)
+        ret
+ENDFUNCTION(__compcert_va_float64)
+
+FUNCTION(__compcert_va_composite)
+        jmp     GLOB(__compcert_va_int64)     // by-ref convention, FIXME
+ENDFUNCTION(__compcert_va_composite)
+
+// Save arguments passed in register in the stack at beginning of vararg
+// function.  The caller of the vararg function reserved 32 bytes of stack
+// just for this purpose.
+// FP arguments are passed both in FP registers and integer registers,
+// so it's enough to save the integer registers used for parameter passing.
+
+FUNCTION(__compcert_va_saveregs)
+        movq %rcx, 16(%rsp)
+        movq %rdx, 24(%rsp)
+        movq %r8, 32(%rsp)
+        movq %r9, 40(%rsp)
+        ret
+ENDFUNCTION(__compcert_va_saveregs)
+
+#endif
diff --git a/x86/Asm.v b/x86/Asm.v
@@ -279,6 +279,7 @@ Inductive instruction: Type :=
   | Pmaxsd (rd: freg) (r2: freg)
   | Pminsd (rd: freg) (r2: freg)
   | Pmovb_rm (rd: ireg) (a: addrmode)
+  | Pmovq_rf (rd: ireg) (r1: freg)
   | Pmovsq_mr  (a: addrmode) (rs: freg)
   | Pmovsq_rm (rd: freg) (a: addrmode)
   | Pmovsb
@@ -998,6 +999,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pmaxsd _ _
   | Pminsd _ _
   | Pmovb_rm _ _
+  | Pmovq_rf _ _
   | Pmovsq_rm _ _
   | Pmovsq_mr _ _
   | Pmovsb

diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
@@ -44,7 +44,7 @@ let stack_alignment () = 16
 let _Plea (r, addr) =
   if Archi.ptr64 then Pleaq (r, addr) else Pleal (r, addr)
 
-(* SP adjustment to allocate or free a stack frame *)
+(* SP adjustment to allocate or free a stack frame. *)
 
 let align n a =
   if n >= 0 then (n + a - 1) land (-a) else n land (-a)
@@ -56,7 +56,7 @@ let sp_adjustment_32 sz =
   (* The top 4 bytes have already been allocated by the "call" instruction. *)
   sz - 4
 
-let sp_adjustment_64 sz =
+let sp_adjustment_elf64 sz =
   let sz = Z.to_int sz in
   if is_current_function_variadic() then begin
     (* If variadic, add room for register save area, which must be 16-aligned *)
@@ -73,6 +73,13 @@ let sp_adjustment_64 sz =
     (sz - 8, -1)
   end
 
+let sp_adjustment_win64 sz =
+  let sz = Z.to_int sz in
+  (* Preserve proper alignment of the stack *)
+  let sz = align sz 16 in
+  (* The top 8 bytes have already been allocated by the "call" instruction. *)
+  sz - 8
+
 (* Built-ins.  They come in two flavors:
    - annotation statements: take their arguments in registers or stack
    locations; generate no code;
@@ -256,7 +263,7 @@ let expand_builtin_va_start_32 r =
   emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs)));
   emit (Pmovl_mr (linear_addr r _0z, RAX))
 
-let expand_builtin_va_start_64 r =
+let expand_builtin_va_start_elf64 r =
   if not (is_current_function_variadic ()) then
     invalid_arg "Fatal error: va_start used in non-vararg function";
   let (ir, fr, ofs) =
@@ -287,6 +294,17 @@ let expand_builtin_va_start_64 r =
   emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 reg_save_area)));
   emit (Pmovq_mr (linear_addr r _16z, RAX))
 
+let expand_builtin_va_start_win64 r =
+  if not (is_current_function_variadic ()) then
+    invalid_arg "Fatal error: va_start used in non-vararg function";
+  let num_args =
+    List.length (get_current_function_args()) in
+  let ofs =
+    Int64.(add !current_function_stacksize
+               (mul 8L (of_int num_args))) in
+  emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 ofs)));
+  emit (Pmovq_mr (linear_addr r _0z, RAX))
+
 (* FMA operations *)
 
 (*   vfmadd<i><j><k> r1, r2, r3   performs r1 := ri * rj + rk
@@ -463,8 +481,8 @@ let expand_builtin_inline name args res =
   (* Vararg stuff *)
   | "__builtin_va_start", [BA(IR a)], _ ->
      assert (a = RDX);
-     if Archi.ptr64
-     then expand_builtin_va_start_64 a
+     if Archi.win64 then expand_builtin_va_start_win64 a
+     else if Archi.ptr64 then expand_builtin_va_start_elf64 a
      else expand_builtin_va_start_32 a
   (* Synchronization *)
   | "__builtin_membar", [], _ ->
@@ -476,24 +494,66 @@ let expand_builtin_inline name args res =
   | _ ->
      raise (Error ("unrecognized builtin " ^ name))
 
-(* Calls to variadic functions for x86-64: register AL must contain
+(* Calls to variadic functions for x86-64 ELF: register AL must contain
    the number of XMM registers used for parameter passing.  To be on
-   the safe side.  do the same if the called function is
+   the safe side, do the same if the called function is
    unprototyped. *)
 
-let set_al sg =
-  if Archi.ptr64 && (sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto) then begin
+let fixup_funcall_elf64 sg =
+  if sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto then begin
     let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in
     emit (Pmovl_ri (RAX, coqint_of_camlint (Int32.of_int fr)))
   end
 
+(* Calls to variadic functions for x86-64 Windows:
+   FP arguments passed in FP registers must also be passed in integer
+   registers.
+*)
+
+let rec copy_fregs_to_iregs args fr ir =
+  match (ir, fr, args) with
+  | (i1 :: ir, f1 :: fr, (Tfloat | Tsingle) :: args) ->
+      emit (Pmovq_rf (i1, f1));
+      copy_fregs_to_iregs args fr ir
+  | (i1 :: ir, f1 :: fr, _ :: args) ->
+      copy_fregs_to_iregs args fr ir
+  | _ ->
+      ()
+
+let fixup_funcall_win64 sg =
+  if sg.sig_cc.cc_vararg then
+    copy_fregs_to_iregs sg.sig_args [XMM0; XMM1; XMM2; XMM3] [RCX; RDX; R8; R9]
+
+let fixup_funcall sg =
+  if Archi.ptr64
+  then if Archi.win64
+       then fixup_funcall_win64 sg
+       else fixup_funcall_elf64 sg
+  else ()
+
 (* Expansion of instructions *)
 
 let expand_instruction instr =
   match instr with
   | Pallocframe (sz, ofs_ra, ofs_link) ->
-     if Archi.ptr64 then begin
-       let (sz, save_regs) = sp_adjustment_64 sz in
+     if Archi.win64 then begin
+       let sz = sp_adjustment_win64 sz in
+       if is_current_function_variadic() then
+         (* Save parameters passed in registers in reserved stack area *)
+         emit (Pcall_s (intern_string "__compcert_va_saveregs",
+                        {sig_args = []; sig_res = Tvoid; sig_cc = cc_default}));
+       (* Allocate frame *)
+       let sz' = Z.of_uint sz in
+       emit (Psubl_ri (RSP, sz'));
+       emit (Pcfi_adjust sz');
+       (* Stack chaining *)
+       let addr1 = linear_addr RSP (Z.of_uint (sz + 8)) in
+       let addr2 = linear_addr RSP ofs_link in
+       emit (Pleaq (RAX,addr1));
+       emit (Pmovq_mr (addr2, RAX));
+       current_function_stacksize := Int64.of_int (sz + 8)
+     end else if Archi.ptr64 then begin
+       let (sz, save_regs) = sp_adjustment_elf64 sz in
        (* Allocate frame *)
        let sz' = Z.of_uint sz in
        emit (Psubq_ri (RSP, sz'));
@@ -525,15 +585,18 @@ let expand_instruction instr =
        PrintAsmaux.current_function_stacksize := Int32.of_int sz
      end
   | Pfreeframe(sz, ofs_ra, ofs_link) ->
-     if Archi.ptr64 then begin
-       let (sz, _) = sp_adjustment_64 sz in
+     if Archi.win64 then begin
+       let sz = sp_adjustment_win64 sz in
+       emit (Paddq_ri (RSP, Z.of_uint sz))
+     end else if Archi.ptr64 then begin
+       let (sz, _) = sp_adjustment_elf64 sz in
        emit (Paddq_ri (RSP, Z.of_uint sz))
      end else begin
        let sz = sp_adjustment_32 sz in
        emit (Paddl_ri (RSP, Z.of_uint sz))
      end
   | Pjmp_s(_, sg) | Pjmp_r(_, sg) | Pcall_s(_, sg) | Pcall_r(_, sg) ->
-     set_al sg;
+     fixup_funcall sg;
      emit instr
   | Pbuiltin (ef,args, res) ->
      begin