Skip to content

Speed up linking by shrinking symbol and relocation tables #1222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions backend/afl_instrument.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ open Cmm
module V = Backend_var
module VP = Backend_var.With_provenance

let afl_area_ptr dbg = Cconst_symbol ("caml_afl_area_ptr", dbg)
let afl_prev_loc dbg = Cconst_symbol ("caml_afl_prev_loc", dbg)
let sym s = { sym_name = s; sym_global = Global }
let afl_area_ptr dbg = Cconst_symbol (sym "caml_afl_area_ptr", dbg)
let afl_prev_loc dbg = Cconst_symbol (sym "caml_afl_prev_loc", dbg)
let afl_map_size = 1 lsl 16

let rec with_afl_logging b dbg =
Expand Down
14 changes: 8 additions & 6 deletions backend/amd64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ let command_line_options =

open Format

type sym_global = Global | Local

type addressing_mode =
Ibased of string * int (* symbol + displ *)
Ibased of string * sym_global * int (* symbol + displ *)
| Iindexed of int (* reg + displ *)
| Iindexed2 of int (* reg + reg + displ *)
| Iscaled of int * int (* reg * scale + displ *)
Expand Down Expand Up @@ -140,7 +142,7 @@ let identity_addressing = Iindexed 0

let offset_addressing addr delta =
match addr with
Ibased(s, n) -> Ibased(s, n + delta)
Ibased(s, glob, n) -> Ibased(s, glob, n + delta)
| Iindexed n -> Iindexed(n + delta)
| Iindexed2 n -> Iindexed2(n + delta)
| Iscaled(scale, n) -> Iscaled(scale, n + delta)
Expand Down Expand Up @@ -175,9 +177,9 @@ let int_of_bswap_bitwidth = function

let print_addressing printreg addr ppf arg =
match addr with
| Ibased(s, 0) ->
| Ibased(s, _glob, 0) ->
fprintf ppf "\"%s\"" s
| Ibased(s, n) ->
| Ibased(s, _glob, n) ->
fprintf ppf "\"%s\" + %i" s n
| Iindexed n ->
let idx = if n <> 0 then Printf.sprintf " + %i" n else "" in
Expand Down Expand Up @@ -305,8 +307,8 @@ let float_cond_and_need_swap cond =

let equal_addressing_mode left right =
match left, right with
| Ibased (left_sym, left_displ), Ibased (right_sym, right_displ) ->
String.equal left_sym right_sym && Int.equal left_displ right_displ
| Ibased (left_sym, left_glob, left_displ), Ibased (right_sym, right_glob, right_displ) ->
String.equal left_sym right_sym && left_glob = right_glob && Int.equal left_displ right_displ
| Iindexed left_displ, Iindexed right_displ ->
Int.equal left_displ right_displ
| Iindexed2 left_displ, Iindexed2 right_displ ->
Expand Down
135 changes: 88 additions & 47 deletions backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -166,41 +166,57 @@ let mem__imp s =
let imp_s = get_imp_symbol s in
mem64_rip QWORD (emit_symbol imp_s)

let rel_plt s =
if windows && !Clflags.dlcode then mem__imp s
else
sym (if use_plt then emit_symbol s ^ "@PLT" else emit_symbol s)
(* Output a label *)

let label_name lbl =
match system with
| S_macosx | S_win64 -> "L" ^ lbl
| _ -> ".L" ^ lbl

let emit_label lbl =
label_name (Int.to_string lbl)

let rel_plt (s : Cmm.symbol) =
match s.sym_global with
| Local -> sym (label_name (emit_symbol s.sym_name))
| Global ->
if windows && !Clflags.dlcode then mem__imp s.sym_name
else
let s = emit_symbol s.sym_name in
sym (if use_plt then s ^ "@PLT" else s)

let emit_call s = I.call (rel_plt s)

let emit_jump s = I.jmp (rel_plt s)

let load_symbol_addr s arg =
if !Clflags.dlcode then
if windows then begin
(* I.mov (mem__imp s) arg (\* mov __caml_imp_foo(%rip), ... *\) *)
I.mov (sym (emit_symbol s)) arg (* movabsq $foo, ... *)
end else I.mov (mem64_rip QWORD (emit_symbol s ^ "@GOTPCREL")) arg
else if !Clflags.pic_code then
I.lea (mem64_rip NONE (emit_symbol s)) arg
else
I.mov (sym (emit_symbol s)) arg

let domain_field f =
mem64 QWORD (Domainstate.idx_of_field f * 8) R14

(* Output a label *)

let emit_label lbl =
match system with
| S_macosx | S_win64 -> "L" ^ Int.to_string lbl
| _ -> ".L" ^ Int.to_string lbl

let label s = sym (emit_label s)

let def_label ?typ s =
D.label ?typ (emit_label s)

let emit_cmm_symbol (s : Cmm.symbol) =
match s.sym_global with
| Global -> emit_symbol s.sym_name
| Local -> label_name (emit_symbol s.sym_name)

let load_symbol_addr s arg =
match s.sym_global with
| Local ->
I.lea (mem64_rip NONE (label_name (emit_symbol s.sym_name))) arg
| Global ->
if !Clflags.dlcode then
if windows then begin
(* I.mov (mem__imp s) arg (\* mov __caml_imp_foo(%rip), ... *\) *)
I.mov (sym (emit_symbol s.sym_name)) arg (* movabsq $foo, ... *)
end else I.mov (mem64_rip QWORD (emit_symbol s.sym_name ^ "@GOTPCREL")) arg
else if !Clflags.pic_code then
I.lea (mem64_rip NONE (emit_symbol s.sym_name)) arg
else
I.mov (sym (emit_symbol s.sym_name)) arg

(* Output .text section directive, or named .text.caml.<name> if enabled and
supported on the target system. *)

Expand Down Expand Up @@ -305,9 +321,11 @@ let res32 i n = emit_subreg reg_low_32_name DWORD i.res.(n)

let addressing addr typ i n =
match addr with
| Ibased(s, ofs) ->
add_used_symbol s;
mem64_rip typ (emit_symbol s) ~ofs
| Ibased(sym_name, sym_global, ofs) ->
add_used_symbol sym_name;
let sym_global : Cmm.is_global =
match sym_global with Global -> Global | Local -> Local in
mem64_rip typ (emit_cmm_symbol { sym_name ; sym_global }) ~ofs
| Iindexed d ->
mem64 typ d (arg64 i n)
| Iindexed2 d ->
Expand Down Expand Up @@ -353,9 +371,12 @@ type gc_call =

let call_gc_sites = ref ([] : gc_call list)

let call_gc_local_sym : Cmm.symbol =
{sym_name = "caml_call_gc_"; sym_global=Local}

let emit_call_gc gc =
def_label gc.gc_lbl;
emit_call "caml_call_gc";
emit_call call_gc_local_sym;
def_label gc.gc_frame;
I.jmp (label gc.gc_return_lbl)

Expand All @@ -369,7 +390,7 @@ let local_realloc_sites = ref ([] : local_realloc_call list)

let emit_local_realloc lr =
def_label lr.lr_lbl;
emit_call "caml_call_local_realloc";
emit_call (Cmm.global_symbol "caml_call_local_realloc");
I.jmp (label lr.lr_return_lbl)

(* Record calls to caml_ml_array_bound_error.
Expand Down Expand Up @@ -400,14 +421,14 @@ let bound_error_label dbg =

let emit_call_bound_error bd =
def_label bd.bd_lbl;
emit_call "caml_ml_array_bound_error";
emit_call (Cmm.global_symbol "caml_ml_array_bound_error");
def_label bd.bd_frame

let emit_call_bound_errors () =
List.iter emit_call_bound_error !bound_error_sites;
if !bound_error_call > 0 then begin
def_label !bound_error_call;
emit_call "caml_ml_array_bound_error"
emit_call (Cmm.global_symbol "caml_ml_array_bound_error")
end

(* Record jump tables *)
Expand Down Expand Up @@ -858,35 +879,35 @@ let emit_instr fallthrough i =
I.movsd (mem64_rip NONE (emit_label lbl)) (res i 0)
end
| Lop(Iconst_symbol s) ->
add_used_symbol s;
add_used_symbol s.sym_name;
load_symbol_addr s (res i 0)
| Lop(Icall_ind) ->
I.call (arg i 0);
record_frame i.live (Dbg_other i.dbg)
| Lop(Icall_imm { func; }) ->
add_used_symbol func;
add_used_symbol func.sym_name;
emit_call func;
record_frame i.live (Dbg_other i.dbg)
| Lop(Itailcall_ind) ->
output_epilogue (fun () -> I.jmp (arg i 0))
| Lop(Itailcall_imm { func; }) ->
begin
if func = !function_name then
if func.sym_name = !function_name then
match !tailrec_entry_point with
| None -> Misc.fatal_error "jump to missing tailrec entry point"
| Some tailrec_entry_point -> I.jmp (label tailrec_entry_point)
else begin
output_epilogue begin fun () ->
add_used_symbol func;
add_used_symbol func.sym_name;
emit_jump func
end
end
end
| Lop(Iextcall { func; alloc; }) ->
add_used_symbol func;
if alloc then begin
load_symbol_addr func rax;
emit_call "caml_c_call";
load_symbol_addr (Cmm.global_symbol func) rax;
emit_call (Cmm.global_symbol "caml_c_call");
record_frame i.live (Dbg_other i.dbg);
if system <> S_win64 then begin
(* TODO: investigate why such a diff.
Expand All @@ -899,7 +920,7 @@ let emit_instr fallthrough i =
I.mov (domain_field Domainstate.Domain_young_ptr) r15
end
end else begin
emit_call func
emit_call (Cmm.global_symbol func)
end
| Lop(Istackoffset n) ->
emit_stack_offset n
Expand Down Expand Up @@ -960,12 +981,12 @@ let emit_instr fallthrough i =
gc_frame = lbl_frame; } :: !call_gc_sites
end else begin
begin match n with
| 16 -> emit_call "caml_alloc1"
| 24 -> emit_call "caml_alloc2"
| 32 -> emit_call "caml_alloc3"
| 16 -> emit_call (Cmm.global_symbol "caml_alloc1")
| 24 -> emit_call (Cmm.global_symbol "caml_alloc2")
| 32 -> emit_call (Cmm.global_symbol "caml_alloc3")
| _ ->
I.sub (int n) r15;
emit_call "caml_allocN"
emit_call (Cmm.global_symbol "caml_allocN")
end;
let label = record_frame_label i.live (Dbg_alloc dbginfo) in
def_label label;
Expand Down Expand Up @@ -1229,7 +1250,7 @@ let emit_instr fallthrough i =
to control ocaml probe handlers independently from stap probe handlers.
It is placed immediately after stap semaphore, and is the same
size - hence offset 2. *)
I.mov (addressing (Ibased(semaphore_sym, 2)) WORD i 0) (res16 i 0);
I.mov (addressing (Ibased(semaphore_sym, Global, 2)) WORD i 0) (res16 i 0);
(* If the semaphore is 0, then the result is 0, otherwise 1. *)
I.cmp (int 0) (res16 i 0);
I.set (cond (Iunsigned Cne)) (res8 i 0);
Expand Down Expand Up @@ -1318,10 +1339,10 @@ let emit_instr fallthrough i =
begin match k with
| Lambda.Raise_regular ->
I.mov (int 0) (domain_field Domainstate.Domain_backtrace_pos);
emit_call "caml_raise_exn";
emit_call (Cmm.global_symbol "caml_raise_exn");
record_frame Reg.Set.empty (Dbg_raise i.dbg)
| Lambda.Raise_reraise ->
emit_call "caml_raise_exn";
emit_call (Cmm.global_symbol "caml_raise_exn");
record_frame Reg.Set.empty (Dbg_raise i.dbg)
| Lambda.Raise_notrace ->
I.mov (domain_field Domainstate.Domain_exception_pointer) rsp;
Expand Down Expand Up @@ -1379,7 +1400,11 @@ let fundecl fundecl =
D.private_extern (emit_symbol fundecl.fun_name)
else
D.global (emit_symbol fundecl.fun_name);
(* Even if the function name is Local, still emit an
actual linker symbol for it. This provides symbols
for perf, gdb, and similar tools *)
D.label (emit_symbol fundecl.fun_name);
D.label (label_name (emit_symbol fundecl.fun_name));
emit_debug_info fundecl.fun_dbg;
cfi_startproc ();
emit_all true fundecl.fun_body;
Expand All @@ -1400,15 +1425,25 @@ let fundecl fundecl =
(* Emission of data *)

let emit_item = function
| Cglobal_symbol s -> D.global (emit_symbol s)
| Cdefine_symbol s -> add_def_symbol s; _label (emit_symbol s)
| Cdefine_symbol s ->
begin match s.sym_global with
| Local ->
_label (label_name (emit_symbol s.sym_name))
| Global ->
D.global (emit_symbol s.sym_name);
add_def_symbol s.sym_name;
_label (emit_symbol s.sym_name);
_label (label_name (emit_symbol s.sym_name))
end
| Cint8 n -> D.byte (const n)
| Cint16 n -> D.word (const n)
| Cint32 n -> D.long (const_nat n)
| Cint n -> D.qword (const_nat n)
| Csingle f -> D.long (Const (Int64.of_int32 (Int32.bits_of_float f)))
| Cdouble f -> D.qword (Const (Int64.bits_of_float f))
| Csymbol_address s -> add_used_symbol s; D.qword (ConstLabel (emit_symbol s))
| Csymbol_address s ->
add_used_symbol s.sym_name;
D.qword (ConstLabel (emit_cmm_symbol s))
| Cstring s -> D.bytes s
| Cskip n -> if n > 0 then D.space n
| Calign n -> D.align ~data:true n
Expand Down Expand Up @@ -1478,6 +1513,12 @@ let begin_assembly unix =
emit_named_text_section code_begin;
emit_global_label_for_symbol code_begin;
if system = S_macosx then I.nop (); (* PR#4690 *)

D.label (emit_cmm_symbol call_gc_local_sym);
cfi_startproc ();
I.jmp (rel_plt (Cmm.global_symbol "caml_call_gc"));
cfi_endproc ();

()

let make_stack_loc ~offset i (r : Reg.t) =
Expand Down Expand Up @@ -1612,7 +1653,7 @@ let emit_probe_handler_wrapper p =
I.mov (reg saved_r15) r15;
(* Emit call to handler *)
add_used_symbol handler_code_sym;
emit_call handler_code_sym;
emit_call (Cmm.global_symbol handler_code_sym);
(* Record a frame description for the wrapper *)
let label = new_label () in
let live_offset =
Expand Down
6 changes: 4 additions & 2 deletions backend/amd64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ open Mach
(* Auxiliary for recognizing addressing modes *)

type addressing_expr =
Asymbol of string
Asymbol of Cmm.symbol
| Alinear of expression
| Aadd of expression * expression
| Ascale of expression * int
Expand Down Expand Up @@ -245,7 +245,9 @@ method select_addressing _chunk exp =
then (Iindexed 0, exp)
else match a with
| Asymbol s ->
(Ibased(s, d), Ctuple [])
let glob : Arch.sym_global =
match s.sym_global with Global -> Global | Local -> Local in
(Ibased(s.sym_name, glob, d), Ctuple [])
| Alinear e ->
(Iindexed d, e)
| Aadd(e1, e2) ->
Expand Down
Loading