From 7287ddb380401e0b34eb697f0b74e57ab7c5e5cd Mon Sep 17 00:00:00 2001 From: Xavier Clerc Date: Fri, 1 Nov 2024 10:13:37 +0000 Subject: [PATCH] Available-regs analysis for CFG (#1748) --- asmcomp/asmgen.ml | 27 +- backend/cfg/cfg_available_regs.ml | 399 +++++++++++++++++++++++++ backend/cfg/cfg_available_regs.mli | 3 + backend/cfg/cfg_dataflow.ml | 7 +- backend/cfg/cfg_dataflow.mli | 1 + backend/cfg/eliminate_dead_code.ml | 2 +- backend/debug/reg_availability_set.ml | 7 + backend/debug/reg_availability_set.mli | 2 + dune | 1 + 9 files changed, 435 insertions(+), 14 deletions(-) create mode 100644 backend/cfg/cfg_available_regs.ml create mode 100644 backend/cfg/cfg_available_regs.mli diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml index 17f101b3396..aa9d1417f6c 100644 --- a/asmcomp/asmgen.ml +++ b/asmcomp/asmgen.ml @@ -358,6 +358,14 @@ type selection_output = | Mach_fundecl of Mach.fundecl | Cfg_with_layout of Cfg_with_layout.t +let available_regs ~stack_slots ~f x = + (* Skip DWARF variable range generation for complicated functions to avoid + high compilation speed penalties *) + let total_num_stack_slots = Array.fold_left ( + ) 0 (stack_slots x) in + if total_num_stack_slots > !Dwarf_flags.dwarf_max_function_complexity + then x + else f x + let compile_fundecl ~ppf_dump ~funcnames fd_cmm = Proc.init (); Reg.reset (); @@ -493,6 +501,11 @@ let compile_fundecl ~ppf_dump ~funcnames fd_cmm = Regalloc_ls.run | Upstream -> assert false) ++ Cfg_with_infos.cfg_with_layout + ++ Profile.record ~accumulate:true "cfg_available_regs" + (available_regs + ~stack_slots:(fun x -> + (Cfg_with_layout.cfg x).Cfg.fun_num_stack_slots) + ~f:Cfg_available_regs.run) ++ cfg_with_layout_profile ~accumulate:true "cfg_validate_description" (Regalloc_validate.run cfg_description) @@ -544,17 +557,9 @@ let compile_fundecl ~ppf_dump ~funcnames fd_cmm = ++ Profile.record ~accumulate:true "regalloc" (regalloc ~ppf_dump 1) ++ Profile.record ~accumulate:true "available_regs" - (fun (fundecl : Mach.fundecl) -> - (* Skip DWARF variable range generation for complicated - functions to avoid high compilation speed - penalties *) - let total_num_stack_slots = - Array.fold_left ( + ) 0 fundecl.fun_num_stack_slots - in - if total_num_stack_slots - > !Dwarf_flags.dwarf_max_function_complexity - then fundecl - else Available_regs.fundecl fundecl) + (available_regs + ~stack_slots:(fun x -> x.Mach.fun_num_stack_slots) + ~f:Available_regs.fundecl) ++ pass_dump_if ppf_dump Flambda_backend_flags.davail "Register availability analysis" ++ Profile.record ~accumulate:true "cfgize" diff --git a/backend/cfg/cfg_available_regs.ml b/backend/cfg/cfg_available_regs.ml new file mode 100644 index 00000000000..cdedd7f4bda --- /dev/null +++ b/backend/cfg/cfg_available_regs.ml @@ -0,0 +1,399 @@ +[@@@ocaml.warning "+a-30-40-41-42"] + +module DLL = Flambda_backend_utils.Doubly_linked_list +module R = Reg +module RAS = Reg_availability_set +module RD = Reg_with_debug_info +module V = Backend_var + +(* If permitted to do so by the command line flags, this pass will extend live + ranges for otherwise dead but available registers across allocations, polls + and calls when it is safe to do so. This allows the values of more variables + to be seen in the debugger, for example when the last use of some variable is + just before a call, and the debugger is standing in the callee. It may + however affect the semantics of e.g. finalizers. *) +let extend_live () = !Dwarf_flags.gdwarf_may_alter_codegen + +(* CR xclerc for xclerc: consider passing this value through the context. *) +let all_regs_that_might_be_named = ref Reg.Set.empty + +let check_invariants : + type a. + a Cfg.instruction -> + print_instr:(Format.formatter -> a Cfg.instruction -> unit) -> + avail_before:RAS.t -> + unit = + fun instr ~print_instr ~avail_before -> + match avail_before with + | Unreachable -> () + | Ok avail_before -> + (* Every register that is live across an instruction should also be + available before the instruction. *) + let live = R.Set.inter instr.live !all_regs_that_might_be_named in + if not (R.Set.subset live (RD.Set.forget_debug_info avail_before)) + then + Misc.fatal_errorf + "Named live registers not a subset of available registers: live={%a} \ + avail_before=%a missing={%a} insn=%a" + Printmach.regset live + (RAS.print ~print_reg:Printmach.reg) + (RAS.Ok avail_before) Printmach.regset + (R.Set.diff live (RD.Set.forget_debug_info avail_before)) + print_instr instr; + (* Every register that is an input to an instruction should be available. *) + let args = R.inter_set_array !all_regs_that_might_be_named instr.arg in + let avail_before_fdi = RD.Set.forget_debug_info avail_before in + if not (R.Set.subset args avail_before_fdi) + then + Misc.fatal_errorf + "Instruction has unavailable input register(s): avail_before=%a \ + avail_before_fdi={%a} inputs={%a} insn=%a" + (RAS.print ~print_reg:Printmach.reg) + (RAS.Ok avail_before) Printmach.regset avail_before_fdi Printmach.regset + args print_instr instr + +(* CR xclerc for xclerc: double check the whole `Domain` module. *) +module Domain = struct + (** CR gyorsh: + + - Why is `option` needed? `RAS` already has `Unreachable` and it looks like + it means the same as `None`. It would simplify the domain. + - `subset` doesn't take into account conflicting values removed by `inter`, + so we can end up in a weird situation that two distinct abstract values A + and B satisfy both `less_equal A B = true` and `less_equal B A = true`. + For example if both A and B contain the same reg with different debug info. + I'm not sure if it breaks soundness. Instead, they should be incomparable. + We can make `less_equal` consistent with `join` like this: + ``` + module Domain = struct + type t = RAS.t + let join = RAS.inter + let less_equal x y = RAS.equal (join y x) y + end + ``` + but probably want a more efficient implementation of `less_equal` that doesn't + allocate *) + type t = { avail_before : Reg_availability_set.t option } [@@unboxed] + + let bot = { avail_before = Some Unreachable } + + let join ({ avail_before = left_avail } as left) + ({ avail_before = right_avail } as right) : t = + match left_avail, right_avail with + | None, None -> left + | None, Some _ -> right + | Some _, None -> left + | Some left_ras, Some right_ras -> + { avail_before = Some (RAS.inter left_ras right_ras) } + + let less_equal { avail_before = left_avail } { avail_before = right_avail } : + bool = + match left_avail, right_avail with + | None, None -> true + | None, Some _ -> true + | Some _, None -> false + | Some left_ras, Some right_ras -> RAS.subset right_ras left_ras +end + +(* [Transfer] calculates, given the registers "available before" an instruction + [instr], the registers that are available both "across" and immediately after + [instr]. This is a forwards dataflow analysis. + + Registers not in [all_regs_that_might_be_named] are ignored, to improve + performance. + + "available before" can be thought of, at the assembly level, as the set of + registers available when the program counter is equal to the address of the + particular instruction under consideration (that is to say, immediately prior + to the instruction being executed). Inputs to that instruction are available + at this point even if the instruction will clobber them. Results from the + previous instruction are also available at this point. + + "available across" is the registers available during the execution of some + particular instruction. These are the registers "available before" minus + registers that may be clobbered or otherwise invalidated by the instruction. + (The notion of "available across" is only useful for [Op] instructions. + Recall that some of these may expand into multiple machine instructions + including clobbers, e.g. for [Alloc].) + + The [available_before] and [available_across] fields of each instruction are + updated by the transfer functions. *) +module Transfer = struct + type domain = Domain.t + + type context = unit + + type image = + { normal : domain; + exceptional : domain + } + + let unreachable = RAS.Unreachable + + let ok set = RAS.Ok set + + let[@inline] common : + type a. + avail_before:RD.Set.t -> + destroyed_at:(a -> Reg.t array) -> + is_interesting_constructor:(a -> bool) -> + is_end_region:(a -> bool) -> + a Cfg.instruction -> + RAS.t option * RAS.t = + fun ~avail_before ~destroyed_at ~is_interesting_constructor ~is_end_region + instr -> + (* We split the calculation of registers that become unavailable after a + call into two parts. First: anything that the target marks as destroyed + by the operation, combined with any registers that will be clobbered by + the operation writing out its results. *) + let made_unavailable_1 = + let regs_clobbered = Array.append (destroyed_at instr.desc) instr.res in + RD.Set.made_unavailable_by_clobber avail_before ~regs_clobbered + ~register_class:Proc.register_class ~stack_class:(fun r -> + Proc.stack_slot_class r.typ) + in + (* Second: the cases of (a) allocations, (b) other polling points, (c) OCaml + to OCaml function calls and (d) end-region operations. In these cases, + since the GC may run, registers always become unavailable unless: (a) + they are "live across" the instruction; and/or (b) they hold immediates + and are assigned to the stack. For the moment we assume that [Ispecific] + instructions do not run the GC. *) + (* CR-someday mshinwell: Consider factoring this out from here and + [Available_ranges.Make_ranges.end_pos_offset]. *) + let made_unavailable_2 = + match is_interesting_constructor instr.desc with + | true -> + RD.Set.filter + (fun reg -> + let holds_immediate = RD.holds_non_pointer reg in + let on_stack = RD.assigned_to_stack reg in + let live_across = Reg.Set.mem (RD.reg reg) instr.live in + let remains_available = + live_across || (holds_immediate && on_stack) + in + let reg_is_of_type_addr = + match (RD.reg reg).typ with + | Addr -> true + | Val | Int | Float | Vec128 | Float32 -> false + in + if remains_available + || (not (extend_live ())) + || is_end_region instr.desc + || (not (RD.assigned_to_stack reg)) + || RD.Set.mem reg made_unavailable_1 + || reg_is_of_type_addr + then not remains_available + else ( + instr.live <- Reg.Set.add (RD.reg reg) instr.live; + false)) + avail_before + | false -> RD.Set.empty + in + let made_unavailable = RD.Set.union made_unavailable_1 made_unavailable_2 in + let avail_across = RD.Set.diff avail_before made_unavailable in + let avail_after = + (* If a result register will never be named, we can forget about it for + the purposes of this analysis. *) + let res = Reg.inter_set_array !all_regs_that_might_be_named instr.res in + RD.Set.union (RD.Set.without_debug_info res) avail_across + in + Some (ok avail_across), ok avail_after + + let basic ({ avail_before } : domain) (instr : Cfg.basic Cfg.instruction) () : + domain = + assert (Option.is_some avail_before); + instr.available_before <- avail_before; + let avail_before = Option.get avail_before in + if !Dwarf_flags.ddebug_invariants + then check_invariants instr ~print_instr:Cfg.print_basic ~avail_before; + let avail_across, avail_after = + match avail_before with + | Unreachable -> None, unreachable + | Ok avail_before -> ( + match instr.desc with + | Op + (Name_for_debugger + { ident; which_parameter; provenance; is_assignment; regs }) -> + (* First forget about any existing debug info to do with [ident] if + the naming corresponds to an assignment operation. *) + let forgetting_ident : RD.Set.t = + if not is_assignment + then avail_before + else + RD.Set.map + (fun reg -> + match RD.debug_info reg with + | None -> reg + | Some debug_info -> + if V.same (RD.Debug_info.holds_value_of debug_info) ident + then RD.clear_debug_info reg + else reg) + avail_before + in + let avail_after = ref forgetting_ident in + let num_parts_of_value = Array.length regs in + (* Add debug info about [ident], but only for registers that are known + to be available. *) + for part_of_value = 0 to num_parts_of_value - 1 do + let reg = regs.(part_of_value) in + if RD.Set.mem_reg forgetting_ident reg + then + let regd = + RD.create ~reg ~holds_value_of:ident ~part_of_value + ~num_parts_of_value ~which_parameter ~provenance + in + avail_after + := RD.Set.add regd (RD.Set.filter_reg !avail_after reg) + done; + Some (ok avail_before), ok !avail_after + | Op (Move | Reload | Spill) -> + (* Moves are special: they enable us to propagate names. No-op moves + need to be handled specially---in this case, we may learn that a + given hard register holds the value of multiple pseudoregisters + (all of which have the same value). This makes us match up properly + with [Cfg_liveness]. *) + let move_to_same_location = + let move_to_same_location = ref true in + for i = 0 to Array.length instr.arg - 1 do + let arg = instr.arg.(i) in + let res = instr.res.(i) in + (* Note that the register classes must be the same, so we don't + need to check that. *) + if arg.loc <> res.loc then move_to_same_location := false + done; + !move_to_same_location + in + let made_unavailable = + if move_to_same_location + then RD.Set.empty + else + RD.Set.made_unavailable_by_clobber avail_before + ~regs_clobbered:instr.res ~register_class:Proc.register_class + ~stack_class:(fun r -> Proc.stack_slot_class r.typ) + in + let results = + Array.map2 + (fun arg_reg result_reg -> + match RD.Set.find_reg_exn avail_before arg_reg with + | exception Not_found -> + (* Note that [arg_reg] might not be in + [all_regs_that_might_be_named], meaning it wouldn't be + found in [avail_before]. In that case we shouldn't + propagate anything. *) + None + | arg_reg -> + if Option.is_some (RD.debug_info arg_reg) + then + Some + (RD.create_copying_debug_info ~reg:result_reg + ~debug_info_from:arg_reg) + else None) + instr.arg instr.res + in + let avail_across = RD.Set.diff avail_before made_unavailable in + let avail_after = + Array.fold_left + (fun avail_after reg_opt -> + match reg_opt with + | None -> avail_after + | Some reg -> RD.Set.add reg avail_after) + avail_across results + in + Some (ok avail_across), ok avail_after + | Op + ( Const_int _ | Const_float32 _ | Const_float _ | Const_symbol _ + | Const_vec128 _ | Stackoffset _ | Load _ | Store _ | Intop _ + | Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ + | Reinterpret_cast _ | Static_cast _ | Probe_is_enabled _ | Opaque + | Begin_region | End_region | Specific _ | Dls_get | Poll | Alloc _ + ) + | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> + let is_op_end_region = function[@ocaml.warning "-4"] + | Cfg.(Op End_region) -> true + | _ -> false + in + common ~avail_before ~destroyed_at:Proc.destroyed_at_basic + ~is_interesting_constructor:is_op_end_region + ~is_end_region:is_op_end_region instr) + in + instr.available_across <- avail_across; + { avail_before = Some avail_after } + + let terminator ({ avail_before } : domain) + (term : Cfg.terminator Cfg.instruction) () : image = + assert (Option.is_some avail_before); + term.available_before <- avail_before; + let avail_before = Option.get avail_before in + if !Dwarf_flags.ddebug_invariants + then check_invariants term ~print_instr:Cfg.print_terminator ~avail_before; + let avail_across, avail_after = + match avail_before with + | Unreachable -> None, unreachable + | Ok avail_before -> ( + match term.desc with + | Never -> assert false + | Tailcall_self _ -> + (* CR xclerc for xclerc: TODO *) + None, unreachable + | Always _ | Parity_test _ | Truth_test _ | Float_test _ | Int_test _ + | Switch _ | Call _ | Prim _ | Specific_can_raise _ | Return | Raise _ + | Tailcall_func _ | Call_no_return _ -> + common ~avail_before ~destroyed_at:Proc.destroyed_at_terminator + ~is_interesting_constructor: + Cfg.( + function + | Never -> assert false + | Call _ | Prim { op = Probe _; label_after = _ } -> true + | Always _ | Parity_test _ | Truth_test _ | Float_test _ + | Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _ + | Tailcall_func _ | Call_no_return _ | Specific_can_raise _ + | Prim { op = External _; label_after = _ } -> + false) + ~is_end_region:(fun _ -> false) + term) + in + term.available_across <- avail_across; + let avail_before_handler = + match avail_after with + | Unreachable -> unreachable + | Ok avail_at_raise -> + let without_exn_bucket = + RD.Set.filter_reg avail_at_raise Proc.loc_exn_bucket + in + let with_anonymous_exn_bucket = + RD.Set.add + (RD.create_without_debug_info ~reg:Proc.loc_exn_bucket) + without_exn_bucket + in + ok with_anonymous_exn_bucket + in + { normal = { avail_before = Some avail_after }; + exceptional = { avail_before = Some avail_before_handler } + } +end + +module Analysis = Cfg_dataflow.Forward (Domain) (Transfer) + +let compute_all_regs_that_might_be_named : Cfg.t -> Reg.Set.t = + fun cfg -> + Cfg.fold_blocks cfg ~init:Reg.Set.empty ~f:(fun _label block acc -> + DLL.fold_left block.body ~init:acc ~f:(fun acc instr -> + match[@ocaml.warning "-4"] instr.Cfg.desc with + | Cfg.(Op (Name_for_debugger { regs; _ })) -> + Reg.add_set_array acc regs + | _ -> acc)) + +let run : Cfg_with_layout.t -> Cfg_with_layout.t = + fun cfg_with_layout -> + if !Clflags.debug && not !Dwarf_flags.restrict_to_upstream_dwarf + then ( + let cfg = Cfg_with_layout.cfg cfg_with_layout in + let fun_args = R.set_of_array cfg.fun_args in + let avail_before = RAS.Ok (RD.Set.without_debug_info fun_args) in + all_regs_that_might_be_named := compute_all_regs_that_might_be_named cfg; + let init : Domain.t = { Domain.avail_before = Some avail_before } in + match Analysis.run cfg ~init ~handlers_are_entry_points:false () with + | Error () -> + Misc.fatal_errorf "Cfg_available_regs.run: dataflow analysis failed" + | Ok (_ : Domain.t Label.Tbl.t) -> ()); + cfg_with_layout diff --git a/backend/cfg/cfg_available_regs.mli b/backend/cfg/cfg_available_regs.mli new file mode 100644 index 00000000000..f45acffc95c --- /dev/null +++ b/backend/cfg/cfg_available_regs.mli @@ -0,0 +1,3 @@ +[@@@ocaml.warning "+a-30-40-41-42"] + +val run : Cfg_with_layout.t -> Cfg_with_layout.t diff --git a/backend/cfg/cfg_dataflow.ml b/backend/cfg/cfg_dataflow.ml index 5ac94c25bf1..9b75f1f1da5 100644 --- a/backend/cfg/cfg_dataflow.ml +++ b/backend/cfg/cfg_dataflow.ml @@ -302,6 +302,7 @@ module type Forward_S = sig Cfg.t -> ?max_iteration:int -> init:domain -> + handlers_are_entry_points:bool -> context -> (domain Label.Tbl.t, unit) result end @@ -372,13 +373,15 @@ module Forward (D : Domain_S) (T : Forward_transfer with type domain = D.t) : Cfg.t -> ?max_iteration:int -> init:domain -> + handlers_are_entry_points:bool -> context -> (domain Label.Tbl.t, unit) result = - fun cfg ?(max_iteration = max_int) ~init context -> + fun cfg ?(max_iteration = max_int) ~init ~handlers_are_entry_points context -> let work_state = Dataflow_impl.create cfg ~init:(fun block -> - if Label.equal block.start cfg.entry_label || block.is_trap_handler + if Label.equal block.start cfg.entry_label + || (handlers_are_entry_points && block.is_trap_handler) then Some init else None) ~store_instr:false diff --git a/backend/cfg/cfg_dataflow.mli b/backend/cfg/cfg_dataflow.mli index 5d6dce37790..33d2044a17e 100644 --- a/backend/cfg/cfg_dataflow.mli +++ b/backend/cfg/cfg_dataflow.mli @@ -55,6 +55,7 @@ module type Forward_S = sig Cfg.t -> ?max_iteration:int -> init:domain -> + handlers_are_entry_points:bool -> context -> (domain Label.Tbl.t, unit) result end diff --git a/backend/cfg/eliminate_dead_code.ml b/backend/cfg/eliminate_dead_code.ml index 463e3fc6ad4..3baaef2eed3 100644 --- a/backend/cfg/eliminate_dead_code.ml +++ b/backend/cfg/eliminate_dead_code.ml @@ -38,7 +38,7 @@ module Dataflow = Cfg_dataflow.Forward (Domain) (Transfer) let run_dead_block : Cfg_with_layout.t -> unit = fun cfg_with_layout -> let cfg = Cfg_with_layout.cfg cfg_with_layout in - match Dataflow.run cfg ~init:Reachable () with + match Dataflow.run cfg ~init:Reachable ~handlers_are_entry_points:true () with | Result.Error _ -> Misc.fatal_error "Dataflow.run_dead_code: forward analysis did not reach a fix-point" diff --git a/backend/debug/reg_availability_set.ml b/backend/debug/reg_availability_set.ml index 92073765368..4eb012a85d3 100644 --- a/backend/debug/reg_availability_set.ml +++ b/backend/debug/reg_availability_set.ml @@ -128,6 +128,13 @@ let equal t1 t2 = | Unreachable, Ok _ | Ok _, Unreachable -> false | Ok regs1, Ok regs2 -> RD.Set.equal regs1 regs2 +let subset t1 t2 = + match t1, t2 with + | Unreachable, Unreachable -> true + | Unreachable, Ok _ -> false + | Ok _, Unreachable -> false + | Ok regs1, Ok regs2 -> RD.Set.subset regs1 regs2 + let print ~print_reg ppf = function | Unreachable -> Format.fprintf ppf "" | Ok availability -> diff --git a/backend/debug/reg_availability_set.mli b/backend/debug/reg_availability_set.mli index e381a2bd774..2660ed20fa5 100644 --- a/backend/debug/reg_availability_set.mli +++ b/backend/debug/reg_availability_set.mli @@ -39,6 +39,8 @@ val canonicalise : t -> t val equal : t -> t -> bool +val subset : t -> t -> bool + (** For debugging purposes only. *) val print : print_reg:(Format.formatter -> Reg.t -> unit) -> Format.formatter -> t -> unit diff --git a/dune b/dune index 74c86d1ca38..5d007c028ed 100644 --- a/dune +++ b/dune @@ -492,6 +492,7 @@ zero_alloc_info ;; backend/cfg cfg + cfg_available_regs cfg_intf cfg_to_linear cfg_with_layout