Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add intrinsics for rdtsc, rdpmc, crc32 (amd64) #20

Merged
merged 4 commits into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/amd64/CSE.ml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ method! class_of_operation op =
| Ioffset_loc(_, _) -> Op_store true
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load
| Ibswap _ | Isqrtf -> super#class_of_operation op
| Irdtsc | Irdpmc -> Op_other
| Icrc32q -> Op_pure
end
| _ -> super#class_of_operation op

Expand Down
16 changes: 16 additions & 0 deletions backend/amd64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
(* POPCNT instruction is not available prior to Nehalem, released in 2008. *)
let popcnt_support = ref true

(* CRC32 requires SSE 4.2 support *)
let crc32_support = ref true

(* Machine-specific command-line options *)

let command_line_options =
Expand All @@ -27,6 +30,10 @@ let command_line_options =
" Use POPCNT instruction (not available prior to Nehalem)";
"-fno-popcnt", Arg.Clear popcnt_support,
" Do not use POPCNT instruction";
"-fcrc32", Arg.Set crc32_support,
" Use CRC32 instructions (requires SSE4.2 support)";
"-fno-crc32", Arg.Clear crc32_support,
" Do not emit CRC32 instructions";
]

(* Specific operations for the AMD64 processor *)
Expand Down Expand Up @@ -54,6 +61,9 @@ type specific_operation =
extension *)
| Izextend32 (* 32 to 64 bit conversion with zero
extension *)
| Irdtsc (* read timestamp *)
| Irdpmc (* read performance counter *)
| Icrc32q (* compute crc *)

and float_operation =
Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
Expand Down Expand Up @@ -143,6 +153,12 @@ let print_specific_operation printreg op ppf arg =
fprintf ppf "sextend32 %a" printreg arg.(0)
| Izextend32 ->
fprintf ppf "zextend32 %a" printreg arg.(0)
| Irdtsc ->
fprintf ppf "rdtsc"
| Irdpmc ->
fprintf ppf "rdpmc %a" printreg arg.(0)
| Icrc32q ->
fprintf ppf "crc32 %a %a" printreg arg.(0) printreg arg.(1)

let win64 =
match Config.system with
Expand Down
20 changes: 20 additions & 0 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,26 @@ let emit_instr fallthrough i =
| Lop(Iintop Ipopcnt) ->
assert (!popcnt_support);
I.popcnt (arg i 0) (res i 0)
| Lop(Ispecific Irdtsc) ->
assert (reg64 i.res.(0) = RDX);
I.rdtsc ();
(* The instruction fills in the low 32 bits of the result registers. *)
(* Combine edx and eax into a single 64-bit result in rdx. *)
I.sal (int 32) (res i 0); (* shift edx to the high part of rdx *)
(* On processors that support the Intel 64 architecture,
the high-order 32 bits of each of RAX and RDX are cleared. *)
I.or_ rax (res i 0) (* combine high and low into rdx *)
| Lop(Ispecific Irdpmc) ->
assert ((arg64 i 0 = RCX) && (reg64 i.res.(0) = RDX));
I.rdpmc ();
(* The instruction fills in the low 32 bits of the result registers. *)
(* Combine edx and eax into a single 64-bit result in rdx. *)
I.sal (int 32) (res i 0); (* shift edx to the high part of rdx *)
I.mov eax eax; (* zero-extend eax *)
I.or_ rax (res i 0) (* combine high and low into rdx *)
| Lop (Ispecific Icrc32q) ->
assert (arg i 0 = res i 0);
I.crc32 (arg i 1) (res i 0)
| Lop (Iname_for_debugger _) -> ()
| Lop (Iprobe _) ->
let probe_label = new_label () in
Expand Down
1 change: 1 addition & 0 deletions backend/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ let destroyed_at_oper = function
[| loc_spacetime_node_hole |]
| Iswitch(_, _) -> [| rax; rdx |]
| Itrywith _ -> [| r11 |]
| Iop(Ispecific (Irdtsc | Irdpmc)) -> [| rax |]
| _ ->
if fp then
(* prevent any use of the frame pointer ! *)
Expand Down
23 changes: 22 additions & 1 deletion backend/amd64/reload.ml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ open Mach
Iintoffloat R S
Ispecific(Ilea) R R R
Ispecific(Ifloatarithmem) R R R
Ispecific(Icrc32q) R R S (and Res = Arg1)
Ispecific(Irdtsc) R (and Res = rdx)
Ispecific(Irdpmc) R R (and Res = rdx, Arg1 = rcx)

Conditional branches:
Iinttest S R
Expand Down Expand Up @@ -86,6 +89,16 @@ method! reload_operation op arg res =
if stackp arg.(0)
then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|]))
else (arg, res)
| Ispecific (Irdtsc | Irdpmc) ->
(* Irdtsc: res(0) already forced in reg.
Irdpmc: res(0) and arg(0) already forced in regs. *)
(arg, res)
| Ispecific Icrc32q ->
(* First argument and result must be in the same register.
Second argument can be either in a register or on stack. *)
if stackp arg.(0)
then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|]))
else (arg, res)
| Ifloatofint | Iintoffloat ->
(* Result must be in register, but argument can be on stack *)
(arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res))
Expand All @@ -97,7 +110,15 @@ method! reload_operation op arg res =
if !Clflags.pic_code || !Clflags.dlcode || Arch.win64
then super#reload_operation op arg res
else (arg, res)
| _ -> (* Other operations: all args and results in registers *)
| Iintop (Ipopcnt | Iclz _| Ictz _)
| Ispecific (Isqrtf | Isextend32 | Izextend32 | Ilea _
| Istore_int (_, _, _)
| Ioffset_loc (_, _) | Ifloatarithmem (_, _)
| Ibswap _| Ifloatsqrtf _)
| Imove|Ispill|Ireload|Inegf|Iabsf|Iconst_float _|Icall_ind _|Icall_imm _
| Itailcall_ind _|Itailcall_imm _|Iextcall _|Istackoffset _|Iload (_, _)
| Istore (_, _, _)|Ialloc _|Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _
-> (* Other operations: all args and results in registers *)
super#reload_operation op arg res

method! reload_test tst arg =
Expand Down
37 changes: 35 additions & 2 deletions backend/amd64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ let pseudoregs_for_operation op arg res =
([| rax |], [| rax |])
(* For imulq, first arg must be in rax, rax is clobbered, and result is in
rdx. *)
| Ispecific (Ibswap _) -> assert false
| Iintop(Imulh) ->
([| rax; arg.(1) |], [| rdx |])
| Ispecific(Ifloatarithmem(_,_)) ->
Expand All @@ -112,8 +113,30 @@ let pseudoregs_for_operation op arg res =
([| rax; rcx |], [| rax |])
| Iintop(Imod) ->
([| rax; rcx |], [| rdx |])
| Ispecific Irdtsc ->
(* For rdtsc instruction, the result is in edx (high) and eax (low).
Make it simple and force the result in rdx and rax clobbered. *)
([| |], [| rdx |])
| Ispecific Irdpmc ->
(* For rdpmc instruction, the argument must be in ecx
and the result is in edx (high) and eax (low).
Make it simple and force the argument in rcx, the result in rdx,
and rax clobbered *)
([| rcx |], [| rdx |])
| Ispecific Icrc32q ->
(* arg.(0) and res.(0) must be the same *)
([|res.(0); arg.(1)|], res)
(* Other instructions are regular *)
| _ -> raise Use_default
| Iintop (Ipopcnt|Iclz _|Ictz _|Icomp _|Icheckbound _)
| Iintop_imm ((Imulh|Idiv|Imod|Icomp _|Icheckbound _
|Ipopcnt|Iclz _|Ictz _), _)
| Ispecific (Isqrtf|Isextend32|Izextend32|Ilea _|Istore_int (_, _, _)
|Ioffset_loc (_, _)|Ifloatsqrtf _)
| Imove|Ispill|Ireload|Ifloatofint|Iintoffloat|Iconst_int _|Iconst_float _
| Iconst_symbol _|Icall_ind _|Icall_imm _|Itailcall_ind _|Itailcall_imm _
| Iextcall _|Istackoffset _|Iload (_, _)|Istore (_, _, _)|Ialloc _
| Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _
-> raise Use_default

(* If you update [inline_ops], you may need to update [is_simple_expr] and/or
[effects_of], below. *)
Expand Down Expand Up @@ -210,7 +233,17 @@ method! select_operation op args dbg =
(Ispecific Isqrtf, [arg])
| _ ->
assert false
end
end
| Cextcall { name; builtin = true; ret; label_after } ->
begin match name, ret with
| "caml_rdtsc_unboxed", [|Int|] -> Ispecific Irdtsc, args
| "caml_rdpmc_unboxed", [|Int|] -> Ispecific Irdpmc, args
| ("caml_int64_crc_unboxed", [|Int|]
| "caml_int_crc_untagged", [|Int|]) when !Arch.crc32_support ->
Ispecific Icrc32q, args
| _ ->
super#select_operation op args dbg
end
(* Recognize store instructions *)
| Cstore ((Word_int|Word_val as chunk), _init) ->
begin match args with
Expand Down
3 changes: 3 additions & 0 deletions backend/x86_ast.mli
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ type instruction =
| CMP of arg * arg
| COMISD of arg * arg
| CQO
| CRC32 of arg * arg
| CVTSD2SI of arg * arg
| CVTSD2SS of arg * arg
| CVTSI2SD of arg * arg
Expand Down Expand Up @@ -172,6 +173,8 @@ type instruction =
| POP of arg
| POPCNT of arg * arg
| PUSH of arg
| RDTSC
| RDPMC
| RET
| ROUNDSD of rounding * arg * arg
| SAL of arg * arg
Expand Down
3 changes: 3 additions & 0 deletions backend/x86_dsl.ml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ module I = struct
let cmp x y = emit (CMP (x, y))
let comisd x y = emit (COMISD (x, y))
let cqo () = emit CQO
let crc32 x y = emit (CRC32 (x, y))
let cvtsd2ss x y = emit (CVTSD2SS (x, y))
let cvtsi2sd x y = emit (CVTSI2SD (x, y))
let cvtss2sd x y = emit (CVTSS2SD (x, y))
Expand Down Expand Up @@ -190,6 +191,8 @@ module I = struct
let pop x = emit (POP x)
let popcnt x y = emit (POPCNT (x, y))
let push x = emit (PUSH x)
let rdtsc () = emit (RDTSC)
let rdpmc () = emit (RDPMC)
let ret () = emit RET
let sal x y = emit (SAL (x, y))
let sar x y = emit (SAR (x, y))
Expand Down
3 changes: 3 additions & 0 deletions backend/x86_dsl.mli
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ module I : sig
val cmp: arg -> arg -> unit
val comisd: arg -> arg -> unit
val cqo: unit -> unit
val crc32 : arg -> arg -> unit
val cvtsd2ss: arg -> arg -> unit
val cvtsi2sd: arg -> arg -> unit
val cvtss2sd: arg -> arg -> unit
Expand Down Expand Up @@ -183,6 +184,8 @@ module I : sig
val pop: arg -> unit
val popcnt : arg -> arg -> unit
val push: arg -> unit
val rdtsc: unit -> unit
val rdpmc: unit -> unit
val ret: unit -> unit
val sal: arg -> arg -> unit
val sar: arg -> arg -> unit
Expand Down
3 changes: 3 additions & 0 deletions backend/x86_gas.ml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ let print_instr b = function
| CMP (arg1, arg2) -> i2_s b "cmp" arg1 arg2
| COMISD (arg1, arg2) -> i2 b "comisd" arg1 arg2
| CQO -> i0 b "cqto"
| CRC32 (arg1, arg2) -> i2_s b "crc32" arg1 arg2
| CVTSD2SI (arg1, arg2) -> i2 b "cvtsd2si" arg1 arg2
| CVTSD2SS (arg1, arg2) -> i2 b "cvtsd2ss" arg1 arg2
| CVTSI2SD (arg1, arg2) -> i2 b ("cvtsi2sd" ^ suf arg1) arg1 arg2
Expand Down Expand Up @@ -207,6 +208,8 @@ let print_instr b = function
| POP arg -> i1_s b "pop" arg
| POPCNT (arg1, arg2) -> i2_s b "popcnt" arg1 arg2
| PUSH arg -> i1_s b "push" arg
| RDTSC -> i0 b "rdtsc"
| RDPMC -> i0 b "rdpmc"
| RET -> i0 b "ret"
| ROUNDSD (r, arg1, arg2) -> i2 b (string_of_rounding r) arg1 arg2
| SAL (arg1, arg2) -> i2_s b "sal" arg1 arg2
Expand Down
3 changes: 3 additions & 0 deletions backend/x86_masm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ let print_instr b = function
| CMP (arg1, arg2) -> i2 b "cmp" arg1 arg2
| COMISD (arg1, arg2) -> i2 b "comisd" arg1 arg2
| CQO -> i0 b "cqo"
| CRC32 (arg1, arg2) -> i2 b "crc32q" arg1 arg2
| CVTSD2SI (arg1, arg2) -> i2 b "cvtsd2si" arg1 arg2
| CVTSD2SS (arg1, arg2) -> i2 b "cvtsd2ss" arg1 arg2
| CVTSI2SD (arg1, arg2) -> i2 b "cvtsi2sd" arg1 arg2
Expand Down Expand Up @@ -199,6 +200,8 @@ let print_instr b = function
| POP arg -> i1 b "pop" arg
| POPCNT (arg1, arg2) -> i2 b "popcnt" arg1 arg2
| PUSH arg -> i1 b "push" arg
| RDTSC -> i0 b "rdtsc"
| RDPMC -> i0 b "rdpmc"
| RET -> i0 b "ret"
| ROUNDSD (r, arg1, arg2) -> i2 b (string_of_rounding r) arg1 arg2
| SAL (arg1, arg2) -> i2 b "sal" arg1 arg2
Expand Down