Skip to content

For testing #3833

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 56 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
f7586bc
Propagate effects info to Cfg.external_call_operation (#3765)
mshinwell Mar 31, 2025
cb8c355
Mark Obj_dup as not having effects and handle in Cfg (#3766)
mshinwell Mar 31, 2025
9cecf95
github ci jobs with debug runtime for macos (#3744)
gretay-js Mar 31, 2025
4d1240f
github ci actions on arm64 linux (#3745)
gretay-js Mar 31, 2025
65c8bc8
Merge `_utils` module (backend) (#3767)
xclerc Mar 31, 2025
ddd6e08
Avoid uncaught `Not_found` arising from free vars in probes (#3776)
lukemaurer Apr 1, 2025
f7364cf
Remove .mlp File from the Arm Backend (#3771)
spiessimon Apr 1, 2025
8e1748e
Add cfg invariants pass (#657)
gretay-js Apr 1, 2025
da0a241
Remove Select_utils.common_selector (#3783)
mshinwell Apr 2, 2025
dcd0d26
Revert "Merge atomic counter inc/dec functions and use them consisten…
mshinwell Apr 2, 2025
51104de
Rename sub_cfg instance variable -> current_sub_cfg (#3788)
mshinwell Apr 2, 2025
22d7822
Remove backend/dune (#3786)
gretay-js Apr 2, 2025
f025167
Revert "Avoid checking pending actions in Condition.wait (#3741)" (#3…
stedolan Apr 2, 2025
5f3b444
Pass around "sub_cfg" in Cfg_selectgen (#3792)
mshinwell Apr 2, 2025
d914311
Refactor stack classes (#3784)
xclerc Apr 2, 2025
9e0a8fe
Delete uses of `Reg.anonymous` (#3793)
TheNumbat Apr 3, 2025
3fd4b3b
Remove the current_sub_cfg instance variable (#3795)
mshinwell Apr 3, 2025
1a9ec8f
Clean up regalloc jobs (CI) (#3787)
xclerc Apr 3, 2025
f3b0cfb
Use caml_array_blit for %arrayblit on Pgenarray and Paddrarray (#3760)
mshinwell Apr 3, 2025
488f30e
Introduce Or_never_returns in Cfg_selectgen (#3798)
mshinwell Apr 3, 2025
82d9705
Compiler Compare Script (#3779)
spiessimon Apr 3, 2025
6f69bbe
Add new interface for target-specific selection code (#3800)
mshinwell Apr 3, 2025
3f67408
Cfg_selectgen emit_expr tidyups etc (#3799)
mshinwell Apr 3, 2025
bb6b65a
Move code from Cfg_selectgen to Select_utils etc (#3801)
mshinwell Apr 3, 2025
cf91b80
Cfg_selectgen: move code around (#3802)
mshinwell Apr 3, 2025
ce76ff7
New version of Cfg_selectgen.emit_stores (#3803)
mshinwell Apr 4, 2025
c7d2ec1
Remove objects from CSE code (#3806)
mshinwell Apr 4, 2025
ca1b65f
Remove objects from instruction selection code (#3782)
mshinwell Apr 4, 2025
ce96f37
Extend ARM DSL and use it for the emission of more instructions (Part…
spiessimon Apr 4, 2025
986dc5f
Specific instructions cannot raise (#3811)
gretay-js Apr 4, 2025
ee49613
Use the same verbose parameter for all register allocators (#3769)
xclerc Apr 7, 2025
8c435c3
Tidying up in Cfg_selectgen (#3815)
mshinwell Apr 7, 2025
c233e29
Improve compiler comparison script (#3818)
spiessimon Apr 7, 2025
d6f1350
Remove all objects from ocamltest (#3821)
lukemaurer Apr 8, 2025
dde9349
Remove Cmm.kind_for_unboxing (#3817)
mshinwell Apr 8, 2025
cd16a28
Remove cvt_emit (#3816)
spiessimon Apr 8, 2025
ba8ed94
Tweak logging code of register allocators (#3822)
xclerc Apr 8, 2025
d67ee12
Simplify the logging logic (register allocators) (#3777)
xclerc Apr 8, 2025
e7ad7a7
Merge `Simple_operation` into `Operation` (#3805)
xclerc Apr 8, 2025
f2aaa29
Remove reference to `simple_operation.ml*` in `.ocamlformat-enable` (…
xclerc Apr 8, 2025
d877934
Fix Weak.set on arm64 (and other relaxed architectures) (#3819)
stedolan Apr 8, 2025
5d6a0f6
Move `Substitution` to a dedicated top-level module (#3700)
xclerc Apr 9, 2025
1772d2b
Small cleanup changes in emit.ml (#3827)
spiessimon Apr 9, 2025
ea238dc
Linscan: use doubly-linked lists for intervals (#3737)
xclerc Apr 9, 2025
252c53e
Linscan: keep only the doubly-linked lists (#3751)
xclerc Apr 9, 2025
d106170
Extend ARM DSL and use it for the emission of more instructions (Part…
spiessimon Apr 9, 2025
2c028b3
Refactor peephole pass (#3797)
gretay-js Apr 9, 2025
be12560
Avoid polymorphic comparison in `backend` (#3649)
xclerc Apr 9, 2025
4fc47a8
Format emit.ml (#3831)
gretay-js Apr 9, 2025
1f6e829
Fix a soundness bug in Simplif (#3832)
stedolan Apr 10, 2025
12ea397
runtime events: disable flaky test (#3830)
gretay-js Apr 10, 2025
5ef70c1
Cfg_simplify pass (#3768)
gretay-js Apr 10, 2025
09d0685
no need for sigwait check as posix signals suffice (#3256)
avsm Apr 10, 2025
b731c0c
Enable cfg invariants and dead trap handler elimination by default
gretay-js Apr 2, 2025
85d4e30
Fix bug in flag
gretay-js Apr 21, 2025
d756cef
Remove fatal_error
gretay-js Apr 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor stack classes (#3784)
  • Loading branch information
xclerc authored and gretay-js committed Apr 21, 2025
commit d914311bc02c05a7d94ed5daf7d87c274bf67768
6 changes: 5 additions & 1 deletion asmcomp/asmgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,11 @@ let register_allocator fd : register_allocator =
let available_regs ~stack_slots ~f x =
(* Skip DWARF variable range generation for complicated functions to avoid
high compilation speed penalties *)
let total_num_stack_slots = Array.fold_left ( + ) 0 (stack_slots x) in
let fun_num_stack_slots = stack_slots x in
let total_num_stack_slots =
Stack_class.Tbl.fold fun_num_stack_slots ~init:0
~f:(fun _stack_class num acc -> acc + num)
in
if total_num_stack_slots > !Dwarf_flags.dwarf_max_function_complexity
then x
else f x
Expand Down
5 changes: 5 additions & 0 deletions backend/.ocamlformat-enable
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ amd64/selection.ml
amd64/selection_utils.ml
amd64/simd*.ml
amd64/stack_check.ml
amd64/stack_class.ml
amd64/vectorize_specific.ml
arm64/cfg_selection.ml
arm64/selection.ml
arm64/selection_utils.ml
arm64/simd*.ml
arm64/stack_check.ml
arm64/stack_class.ml
arm64/vectorize_specific.ml
arm64_ast.ml
arm64_ast.mli
Expand Down Expand Up @@ -53,6 +55,9 @@ selection.mli
simple_operation.ml
simple_operation.mli
stack_check.mli
stack_class.mli
stack_class_utils.ml
stack_class_utils.mli
zero_alloc_checker.ml
zero_alloc_checker.mli
zero_alloc_info.ml
Expand Down
24 changes: 10 additions & 14 deletions backend/amd64/emit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ let fastcode_flag = ref true
(* Layout of the stack frame *)
let stack_offset = ref 0

let num_stack_slots = Array.make Proc.num_stack_slot_classes 0
let num_stack_slots = Stack_class.Tbl.make 0

let prologue_required = ref false

Expand Down Expand Up @@ -317,7 +317,7 @@ let reg = function
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
mem64 (x86_data_type_for_stack_slot ty) ofs R14
| { loc = Stack s; typ = ty } as r ->
let ofs = slot_offset s (stack_slot_class r.typ) in
let ofs = slot_offset s (Stack_class.of_machtype r.typ) in
mem64 (x86_data_type_for_stack_slot ty) ofs RSP
| { loc = Unknown } ->
assert false
Expand All @@ -340,7 +340,7 @@ let reg_low_32_name = Array.map (fun r -> Reg32 r) int_reg_name
let emit_subreg tbl typ r =
match r.loc with
| Reg.Reg r when r < 13 -> tbl.(r)
| Stack s -> mem64 typ (slot_offset s (stack_slot_class r.Reg.typ)) RSP
| Stack s -> mem64 typ (slot_offset s (Stack_class.of_machtype r.Reg.typ)) RSP
| _ -> assert false

let arg8 i n = emit_subreg reg_low_8_name BYTE i.arg.(n)
Expand Down Expand Up @@ -383,13 +383,13 @@ let record_frame_label live dbg =
assert (Proc.gc_regs_offset reg = r);
live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Val; loc = Stack s} as reg ->
live_offset := slot_offset s (stack_slot_class reg.typ) :: !live_offset
live_offset := slot_offset s (Stack_class.of_machtype reg.typ) :: !live_offset
| {typ = Valx2; loc = Reg r} as reg ->
let n = Proc.gc_regs_offset reg in
let encode n = ((n lsl 1) + 1) in
live_offset := encode n :: encode (n + 1) :: !live_offset
| {typ = Valx2; loc = Stack s} as reg ->
let n = slot_offset s (stack_slot_class reg.typ) in
let n = slot_offset s (Stack_class.of_machtype reg.typ) in
live_offset := n :: n + Arch.size_addr :: !live_offset
| {typ = Addr} as r ->
Misc.fatal_error ("bad GC root " ^ Reg.name r)
Expand Down Expand Up @@ -867,7 +867,7 @@ let tailrec_entry_point = ref None
type probe =
{
stack_offset: int;
num_stack_slots: int array;
num_stack_slots: int Stack_class.Tbl.t;
(* Record frame info held in the corresponding mutable variables. *)
probe_label: label;
(* Probe site, recorded in .note.stapsdt section
Expand Down Expand Up @@ -2045,7 +2045,7 @@ let emit_instr ~first ~fallthrough i =
{ probe_label;
probe_insn = i;
stack_offset = !stack_offset;
num_stack_slots = Array.copy num_stack_slots;
num_stack_slots = Stack_class.Tbl.copy num_stack_slots;
}
in
probes := probe :: !probes;
Expand Down Expand Up @@ -2224,9 +2224,7 @@ let fundecl fundecl =
local_realloc_sites := [];
clear_safety_checks ();
clear_stack_realloc ();
for i = 0 to Proc.num_stack_slot_classes - 1 do
num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i);
done;
Stack_class.Tbl.copy_values ~from:fundecl.fun_num_stack_slots ~to_:num_stack_slots;
prologue_required := fundecl.fun_prologue_required;
frame_required := fundecl.fun_frame_required;
all_functions := fundecl :: !all_functions;
Expand Down Expand Up @@ -2473,9 +2471,7 @@ let emit_probe_handler_wrapper p =
recall that the wrapper does however have its own frame.) *)
frame_required := true;
stack_offset := p.stack_offset;
for i = 0 to Proc.num_stack_slot_classes - 1 do
num_stack_slots.(i) <- p.num_stack_slots.(i);
done;
Stack_class.Tbl.copy_values ~from:p.num_stack_slots ~to_:num_stack_slots;
(* Account for the return address that is now pushed on the stack. *)
stack_offset := !stack_offset + 8;
(* Emit function entry code *)
Expand Down Expand Up @@ -2613,7 +2609,7 @@ let emit_probe_notes0 () =
let arg_name =
match arg.loc with
| Stack s ->
Printf.sprintf "%d(%%rsp)" (slot_offset s (stack_slot_class arg.Reg.typ))
Printf.sprintf "%d(%%rsp)" (slot_offset s (Stack_class.of_machtype arg.Reg.typ))
| Reg reg -> Proc.register_name arg.Reg.typ reg
| Unknown ->
Misc.fatal_errorf "Cannot create probe: illegal argument: %a"
Expand Down
34 changes: 6 additions & 28 deletions backend/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,6 @@ let register_class r =
| Val | Int | Addr -> 0
| Float | Float32 | Vec128 | Valx2 -> 1

let num_stack_slot_classes = 3

let stack_slot_class typ =
match (typ : machtype_component) with
| Val | Addr | Int -> 0
| Float | Float32 -> 1
| Vec128 | Valx2 -> 2

let types_are_compatible left right =
match left.typ, right.typ with
Expand All @@ -121,13 +114,6 @@ let types_are_compatible left right =
true
| (Int | Val | Addr | Float | Float32 | Vec128 | Valx2), _ -> false

let stack_class_tag c =
match c with
| 0 -> "i"
| 1 -> "f"
| 2 -> "x"
| c -> Misc.fatal_errorf "Unspecified stack slot class %d" c

let num_available_registers = [| 13; 16 |]

let first_available_register = [| 0; 100 |]
Expand Down Expand Up @@ -676,9 +662,9 @@ let trap_frame_size_in_bytes = 16

let frame_required ~fun_contains_calls ~fun_num_stack_slots =
fp || fun_contains_calls ||
fun_num_stack_slots.(0) > 0 ||
fun_num_stack_slots.(1) > 0 ||
fun_num_stack_slots.(2) > 0
Stack_class.Tbl.exists
fun_num_stack_slots
~f:(fun _stack_class num -> num > 0)

let prologue_required ~fun_contains_calls ~fun_num_stack_slots =
frame_required ~fun_contains_calls ~fun_num_stack_slots
Expand All @@ -691,9 +677,7 @@ let frame_size ~stack_offset ~contains_calls ~num_stack_slots =
let sz =
(stack_offset
+ 8
+ 8 * num_stack_slots.(0)
+ 8 * num_stack_slots.(1)
+ 16 * num_stack_slots.(2)
+ Stack_class.Tbl.total_size_in_bytes num_stack_slots
+ (if fp then 8 else 0))
in Misc.align sz 16
end else
Expand All @@ -716,14 +700,8 @@ let slot_offset loc ~stack_class ~stack_offset ~fun_contains_calls
+ n)
| Local n ->
Bytes_relative_to_stack_pointer (
stack_offset +
(* Preserves original ordering (int -> float) *)
match stack_class with
| 2 -> n * 16
| 0 -> fun_num_stack_slots.(2) * 16 + n * 8
| 1 -> fun_num_stack_slots.(2) * 16
+ fun_num_stack_slots.(0) * 8 + n * 8
| _ -> Misc.fatal_errorf "Unknown register class %d" stack_class)
stack_offset + Stack_class.Tbl.offset_in_bytes fun_num_stack_slots ~stack_class ~slot:n
)
| Outgoing n -> Bytes_relative_to_stack_pointer n
| Domainstate n ->
Bytes_relative_to_domainstate_pointer (
Expand Down
49 changes: 49 additions & 0 deletions backend/amd64/stack_class.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[@@@ocaml.warning "+a-30-40-41-42"]

module T = struct
type t =
| Int64
| Float64
| Vector128

let all = [Int64; Float64; Vector128]

(* Preserves original ordering (int -> float) *)
let frame_order = [| Vector128; Int64; Float64 |]

let equal : t -> t -> bool =
fun left right ->
match left, right with
| Int64, Int64 -> true
| Float64, Float64 -> true
| Vector128, Vector128 -> true
| (Int64 | Float64 | Vector128), _ -> false

let hash : t -> int = function Int64 -> 0 | Float64 -> 1 | Vector128 -> 2

let tag : t -> string = function
| Int64 -> "i"
| Float64 -> "f"
| Vector128 -> "x"

let print : Format.formatter -> t -> unit =
fun ppf stack_class ->
Format.fprintf ppf "%s"
(match stack_class with
| Int64 -> "int64"
| Float64 -> "float64"
| Vector128 -> "vector128")

let size_in_bytes : t -> int = function
| Int64 -> 8
| Float64 -> 8
| Vector128 -> 16

let of_machtype : Cmm.machtype_component -> t = function
| Val | Int | Addr -> Int64
| Float | Float32 -> Float64
| Vec128 | Valx2 -> Vector128
end

include T
module Tbl = Stack_class_utils.Make_tbl (T)
10 changes: 4 additions & 6 deletions backend/arm64/emit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ let femit_wreg out = function

let stack_offset = ref 0

let num_stack_slots = Array.make Proc.num_stack_slot_classes 0
let num_stack_slots = Stack_class.Tbl.make 0

let prologue_required = ref false

Expand Down Expand Up @@ -139,7 +139,7 @@ let femit_stack out r =
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
Printf.fprintf out "[%a, #%a]" femit_reg reg_domain_state_ptr femit_int ofs
| Stack ((Local _ | Incoming _ | Outgoing _) as s) ->
let ofs = slot_offset s (stack_slot_class r.typ) in
let ofs = slot_offset s (Stack_class.of_machtype r.typ) in
Printf.fprintf out "[sp, #%a]" femit_int ofs
| Reg _ | Unknown -> fatal_error "Emit.emit_stack"

Expand Down Expand Up @@ -174,7 +174,7 @@ let record_frame_label live dbg =
| {typ = Val; loc = Reg r} ->
live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Val; loc = Stack s} as reg ->
live_offset := slot_offset s (stack_slot_class reg.typ) :: !live_offset
live_offset := slot_offset s (Stack_class.of_machtype reg.typ) :: !live_offset
| {typ = Addr} as r ->
Misc.fatal_error ("bad GC root " ^ Reg.name r)
| { typ = Valx2; } as r ->
Expand Down Expand Up @@ -1680,9 +1680,7 @@ let fundecl fundecl =
call_gc_sites := [];
local_realloc_sites := [];
clear_stack_realloc ();
for i = 0 to Proc.num_stack_slot_classes - 1 do
num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i);
done;
Stack_class.Tbl.copy_values ~from:fundecl.fun_num_stack_slots ~to_:num_stack_slots;
prologue_required := fundecl.fun_prologue_required;
contains_calls := fundecl.fun_contains_calls;
emit_named_text_section !function_name;
Expand Down
32 changes: 3 additions & 29 deletions backend/arm64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,6 @@ let register_class_of_machtype_component typ =
let register_class r =
register_class_of_machtype_component r.typ

let num_stack_slot_classes = 3

let stack_slot_class typ =
match (typ : Cmm.machtype_component) with
| Val | Int | Addr -> 0
| Float | Float32 -> 1
| Vec128 -> 2
| Valx2 -> 2

let types_are_compatible left right =
match left.typ, right.typ with
| (Int | Val | Addr), (Int | Val | Addr)
Expand All @@ -101,13 +92,6 @@ let types_are_compatible left right =
| Valx2,Valx2 -> true
| (Int | Val | Addr | Float | Float32 | Vec128 | Valx2), _ -> false

let stack_class_tag c =
match c with
| 0 -> "i"
| 1 -> "f"
| 2 -> "x"
| c -> Misc.fatal_errorf "Unspecified stack slot class %d" c

let num_available_registers =
[| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)

Expand Down Expand Up @@ -448,9 +432,7 @@ let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_int
(* Layout of the stack *)

let initial_stack_offset ~num_stack_slots ~contains_calls =
(8 * num_stack_slots.(0))
+ (8 * num_stack_slots.(1))
+ (16 * num_stack_slots.(2))
Stack_class.Tbl.total_size_in_bytes num_stack_slots
+ if contains_calls then 8 else 0

let trap_frame_size_in_bytes = 16
Expand All @@ -462,9 +444,7 @@ let frame_size ~stack_offset ~contains_calls ~num_stack_slots =
Misc.align sz 16

let frame_required ~fun_contains_calls ~fun_num_stack_slots =
fun_contains_calls
|| fun_num_stack_slots.(0) > 0
|| fun_num_stack_slots.(1) > 0
fun_contains_calls || Stack_class.Tbl.exists fun_num_stack_slots ~f:(fun _stack_class num -> num > 0)

let prologue_required ~fun_contains_calls ~fun_num_stack_slots =
frame_required ~fun_contains_calls ~fun_num_stack_slots
Expand All @@ -487,13 +467,7 @@ let slot_offset (loc : Reg.stack_location) ~stack_class ~stack_offset
| Local n ->
let offset =
stack_offset +
(* Preserves original ordering: int below float. *)
(match stack_class with
| 2 -> n * 16
| 0 -> fun_num_stack_slots.(2) * 16 + n * 8
| 1 -> fun_num_stack_slots.(2) * 16 +
fun_num_stack_slots.(0) * 8 + n * 8
| _ -> Misc.fatal_errorf "Unknown stack class %d" stack_class)
Stack_class.Tbl.offset_in_bytes fun_num_stack_slots ~stack_class ~slot:n
in
Bytes_relative_to_stack_pointer offset
| Outgoing n ->
Expand Down
Loading