-
Notifications
You must be signed in to change notification settings - Fork 85
/
Copy pathproc.ml
521 lines (445 loc) · 18.1 KB
/
proc.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* Copyright 2012 Benedikt Meurer. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
[@@@ocaml.warning "+a-40-41-42"]
(* Description of the ARM processor in 64-bit mode *)
open! Int_replace_polymorphic_compare
open Misc
open Reg
open Arch
(* Instruction selection *)
let word_addressed = false
(* Registers available for register allocation *)
(* Integer register map:
x0 - x15 general purpose (caller-save)
x16, x17 temporaries (used by call veeners)
x18 platform register (reserved)
x19 - x25 general purpose (callee-save)
x26 trap pointer
x27 alloc pointer
x28 domain state pointer
x29 frame pointer
x30 return address
sp / xzr stack pointer / zero register
Floating-point register map:
d0 - d7 general purpose (caller-save)
d8 - d15 general purpose (callee-save)
d16 - d31 general purpose (caller-save)
*)
let int_reg_name =
[| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; (* 0 - 7 *)
"x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; (* 8 - 15 *)
"x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25"; (* 16 - 22 *)
"x26"; "x27"; "x28"; (* 23 - 25 *)
"x16"; "x17" |] (* 26 - 27 *)
let float_reg_name =
[| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
"d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
"d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
"d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
let float32_reg_name =
[| "s0"; "s1"; "s2"; "s3"; "s4"; "s5"; "s6"; "s7";
"s8"; "s9"; "s10"; "s11"; "s12"; "s13"; "s14"; "s15";
"s16"; "s17"; "s18"; "s19"; "s20"; "s21"; "s22"; "s23";
"s24"; "s25"; "s26"; "s27"; "s28"; "s29"; "s30"; "s31" |]
let vec128_reg_name =
[| "q0"; "q1"; "q2"; "q3"; "q4"; "q5"; "q6"; "q7";
"q8"; "q9"; "q10"; "q11"; "q12"; "q13"; "q14"; "q15";
"q16"; "q17"; "q18"; "q19"; "q20"; "q21"; "q22"; "q23";
"q24"; "q25"; "q26"; "q27"; "q28"; "q29"; "q30"; "q31" |]
let num_register_classes = 2
let register_class_of_machtype_component typ =
match (typ : Cmm.machtype_component) with
| Val | Int | Addr -> 0
| Float | Float32 -> 1
| Vec128 -> 1
| Valx2 -> 1
let register_class r =
register_class_of_machtype_component r.typ
let types_are_compatible left right =
match left.typ, right.typ with
| (Int | Val | Addr), (Int | Val | Addr)
| Float, Float -> true
| Float32, Float32 -> true
| Vec128, Vec128 -> true
| Valx2,Valx2 -> true
| (Int | Val | Addr | Float | Float32 | Vec128 | Valx2), _ -> false
let num_available_registers =
[| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)
let first_available_register =
[| 0; 100 |]
let register_name ty r =
match (ty : Cmm.machtype_component) with
| Val | Int | Addr ->
int_reg_name.(r - first_available_register.(0))
| Float ->
float_reg_name.(r - first_available_register.(1))
| Float32 ->
float32_reg_name.(r - first_available_register.(1))
| Vec128 | Valx2 ->
vec128_reg_name.(r - first_available_register.(1))
(* Representation of hard registers by pseudo-registers *)
let hard_reg_gen typ n =
let reg_class = register_class_of_machtype_component typ in
let first = first_available_register.(reg_class) in
let v = Array.make n Reg.dummy in
for i = 0 to n - 1 do
v.(i) <- Reg.at_location typ (Reg(first + i))
done;
v
let hard_int_reg = hard_reg_gen Int (Array.length int_reg_name)
let hard_float_reg = hard_reg_gen Float (Array.length float_reg_name)
let hard_float32_reg = hard_reg_gen Float32 (Array.length float32_reg_name)
let hard_vec128_reg = hard_reg_gen Vec128 (Array.length vec128_reg_name)
let all_phys_regs =
Array.concat [hard_int_reg; hard_float_reg; hard_float32_reg; hard_vec128_reg; ]
let precolored_regs =
let phys_regs = Reg.set_of_array all_phys_regs in
fun () -> phys_regs
let phys_reg ty n =
match (ty : Cmm.machtype_component) with
| Int | Addr | Val -> hard_int_reg.(n)
| Float -> hard_float_reg.(n - 100)
| Float32 -> hard_float32_reg.(n - 100)
| Vec128 | Valx2 -> hard_vec128_reg.(n - 100)
let gc_regs_offset _ =
fatal_error "arm64: gc_reg_offset unreachable"
let reg_x8 = phys_reg Int 8
let stack_slot slot ty =
Reg.at_location ty (Stack slot)
(* Calling conventions *)
let size_domainstate_args = 64 * size_int
let loc_int last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg Int !int in
incr int; l
end else begin
ofs := Misc.align !ofs size_int;
let l = stack_slot (make_stack !ofs) Int in
ofs := !ofs + size_int; l
end
let loc_float_gen kind size last_float make_stack float ofs =
if !float <= last_float then begin
let l = phys_reg kind !float in
incr float; l
end else begin
ofs := Misc.align !ofs size;
let l = stack_slot (make_stack !ofs) kind in
ofs := !ofs + size; l
end
let loc_float = loc_float_gen Float Arch.size_float
(* float32 slots still take up a full word *)
let loc_float32 = loc_float_gen Float32 Arch.size_float
let loc_vec128 = loc_float_gen Vec128 Arch.size_vec128
let loc_int32 last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg Int !int in
incr int; l
end else begin
let l = stack_slot (make_stack !ofs) Int in
ofs := !ofs + (if macosx then 4 else 8);
l
end
let calling_conventions
first_int last_int first_float last_float make_stack first_stack arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match (arg.(i) : Cmm.machtype_component) with
| Val | Int | Addr ->
loc.(i) <- loc_int last_int make_stack int ofs
| Float ->
loc.(i) <- loc_float last_float make_stack float ofs
| Vec128 ->
loc.(i) <- loc_vec128 last_float make_stack float ofs
| Float32 ->
loc.(i) <- loc_float32 last_float make_stack float ofs
| Valx2 ->
Misc.fatal_error "Unexpected machtype_component Valx2"
done;
(* CR mslater: (SIMD) will need to be 32/64 if vec256/512 are used. *)
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)
let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
(* OCaml calling convention:
first integer args in r0...r15
first float args in d0...d15
remaining args in domain area, then on stack.
Return values in r0...r15 or d0...d15. *)
let max_arguments_for_tailcalls = 16 (* in regs *) + 64 (* in domain state *)
let last_int_register = if macosx then 7 else 15
let loc_arguments arg =
calling_conventions 0 last_int_register 100 115
outgoing (- size_domainstate_args) arg
let loc_parameters arg =
let (loc, _) =
calling_conventions 0 last_int_register 100 115
incoming (- size_domainstate_args) arg
in
loc
let loc_results_call res =
calling_conventions 0 last_int_register 100 115 outgoing (- size_domainstate_args) res
let loc_results_return res =
let (loc, _) =
calling_conventions 0 last_int_register 100 115 incoming (- size_domainstate_args) res
in
loc
(* C calling convention:
first integer args in r0...r7
first float args in d0...d7
remaining args on stack.
macOS/iOS peculiarity: int32 arguments passed on stack occupy 4 bytes,
while the AAPCS64 says 8 bytes.
Return values in r0...r1 or d0. *)
let external_calling_conventions
first_int last_int first_float last_float make_stack ty_args =
let loc = Array.make (List.length ty_args) [| Reg.dummy |] in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
List.iteri (fun i ty_arg ->
begin match (ty_arg : Cmm.exttype) with
| XInt | XInt64 ->
loc.(i) <- [| loc_int last_int make_stack int ofs |]
| XInt32 ->
loc.(i) <- [| loc_int32 last_int make_stack int ofs |]
| XFloat ->
loc.(i) <- [| loc_float last_float make_stack float ofs |]
| XVec128 ->
loc.(i) <- [| loc_vec128 last_float make_stack float ofs |]
| XFloat32 ->
loc.(i) <- [| loc_float32 last_float make_stack float ofs |]
end)
ty_args;
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
let loc_external_arguments ty_args =
external_calling_conventions 0 7 100 107 outgoing ty_args
let loc_external_results res =
let (loc, _) = calling_conventions 0 1 100 101 not_supported 0 res in loc
let loc_exn_bucket = phys_reg Int 0
(* See "DWARF for the ARM 64-bit architecture (AArch64)" available from
developer.arm.com. *)
let int_dwarf_reg_numbers =
[| 0; 1; 2; 3; 4; 5; 6; 7;
8; 9; 10; 11; 12; 13; 14; 15;
19; 20; 21; 22; 23; 24;
25; 26; 27; 28; 16; 17;
|]
let float_dwarf_reg_numbers =
[| 64; 65; 66; 67; 68; 69; 70; 71;
72; 73; 74; 75; 76; 77; 78; 79;
80; 81; 82; 83; 84; 85; 86; 87;
88; 89; 90; 91; 92; 93; 94; 95;
|]
let dwarf_register_numbers ~reg_class =
match reg_class with
| 0 -> int_dwarf_reg_numbers
| 1 -> float_dwarf_reg_numbers
| _ -> Misc.fatal_errorf "Bad register class %d" reg_class
let stack_ptr_dwarf_register_number = 31
let domainstate_ptr_dwarf_register_number = 28
(* Registers destroyed by operations *)
let destroyed_at_c_noalloc_call =
(* x19-x28, d8-d15 preserved *)
let int_regs_destroyed_at_c_noalloc_call =
[| 0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15 |]
in
let float_regs_destroyed_at_c_noalloc_call =
[|100;101;102;103;104;105;106;107;
116;117;118;119;120;121;122;123;
124;125;126;127;128;129;130;131|]
in
Array.concat [
Array.map (phys_reg Int) int_regs_destroyed_at_c_noalloc_call;
Array.map (phys_reg Float) float_regs_destroyed_at_c_noalloc_call;
Array.map (phys_reg Float32) float_regs_destroyed_at_c_noalloc_call;
Array.map (phys_reg Vec128) float_regs_destroyed_at_c_noalloc_call;
]
(* CSE needs to know that all versions of neon are destroyed. *)
let destroy_neon_reg n =
[| phys_reg Float (100 + n); phys_reg Float32 (100 + n); phys_reg Vec128 (100 + n); |]
let destroy_neon_reg7 = destroy_neon_reg 7
let destroyed_at_raise = all_phys_regs
let destroyed_at_reloadretaddr = [| |]
let destroyed_at_pushtrap = [| |]
let destroyed_at_alloc_or_poll = [| reg_x8 |]
let destroyed_at_basic (basic : Cfg_intf.S.basic) =
match basic with
| Reloadretaddr ->
destroyed_at_reloadretaddr
| Pushtrap _ ->
destroyed_at_pushtrap
| Op Poll -> destroyed_at_alloc_or_poll
| Op (Alloc _) ->
destroyed_at_alloc_or_poll
| Op(Load {memory_chunk = Single { reg = Float64 }; _ }
| Store(Single { reg = Float64 }, _, _))
-> destroy_neon_reg7
| Op (Load {memory_chunk=Single {reg=Float32}; _ })
| Op (Store (Single {reg=Float32}, _, _))
| Op (Load
{memory_chunk=(Byte_unsigned|Byte_signed|Sixteen_unsigned|
Sixteen_signed|Thirtytwo_unsigned|Thirtytwo_signed|
Word_int|Word_val|Double|Onetwentyeight_unaligned|
Onetwentyeight_aligned);
_ })
| Op (Store
((Byte_unsigned|Byte_signed|Sixteen_unsigned|Sixteen_signed|
Thirtytwo_unsigned|Thirtytwo_signed|Word_int|Word_val|Double|
Onetwentyeight_unaligned|Onetwentyeight_aligned),
_, _))
-> [||]
| Op (Static_cast
(Int_of_float _ | Float_of_int _
| Float_of_float32|Float32_of_float))
-> [||]
| Op (Static_cast
(V128_of_scalar _|Scalar_of_v128 _))
| Op (Specific _
| Move | Spill | Reload
| Floatop _
| Csel _
| Reinterpret_cast _ | Const_int _
| Const_float32 _ | Const_float _
| Const_symbol _ | Const_vec128 _
| Stackoffset _
| Intop _ | Intop_imm _ | Intop_atomic _
| Name_for_debugger _ | Probe_is_enabled _ | Opaque
| Begin_region | End_region | Dls_get)
| Poptrap | Prologue
-> [||]
| Stack_check _ -> assert false (* not supported *)
(* note: keep this function in sync with `is_destruction_point` below. *)
let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
match terminator with
| Never -> assert false
| Call {op = Indirect | Direct _; _} ->
all_phys_regs
| Always _ | Parity_test _ | Truth_test _ | Float_test _
| Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
| Tailcall_func _ | Prim {op = Probe _; _}
| Specific_can_raise _ ->
[||]
| Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; stack_ofs; _ }
| Prim {op = External { func_symbol = _; alloc; ty_res = _; ty_args = _; stack_ofs; _ }; _} ->
if alloc || stack_ofs > 0 then all_phys_regs else destroyed_at_c_noalloc_call
(* CR-soon xclerc for xclerc: consider having more destruction points.
We current return `true` when `destroyed_at_terminator` returns
`all_phys_regs`; we could also return `true` when `destroyed_at_terminator`
returns `destroyed_at_c_call` for instance. *)
(* note: keep this function in sync with `destroyed_at_terminator` above. *)
let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_intf.S.terminator) =
match terminator with
| Never -> assert false
| Call {op = Indirect | Direct _; _} ->
true
| Always _ | Parity_test _ | Truth_test _ | Float_test _
| Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
| Tailcall_func _ | Prim {op = Probe _; _}
| Specific_can_raise _ ->
false
| Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; stack_ofs = _; _}
| Prim {op = External { func_symbol = _; alloc; ty_res = _; ty_args = _; stack_ofs = _; _}; _} ->
if more_destruction_points then
true
else
if alloc then true else false
(* Layout of the stack *)
let initial_stack_offset ~num_stack_slots ~contains_calls =
Stack_class.Tbl.total_size_in_bytes num_stack_slots
+ if contains_calls then 8 else 0
let trap_frame_size_in_bytes = 16
let frame_size ~stack_offset ~contains_calls ~num_stack_slots =
let sz =
stack_offset + initial_stack_offset ~num_stack_slots ~contains_calls
in
Misc.align sz 16
let frame_required ~fun_contains_calls ~fun_num_stack_slots =
fun_contains_calls || Stack_class.Tbl.exists fun_num_stack_slots ~f:(fun _stack_class num -> num > 0)
let prologue_required ~fun_contains_calls ~fun_num_stack_slots =
frame_required ~fun_contains_calls ~fun_num_stack_slots
type slot_offset =
| Bytes_relative_to_stack_pointer of int
| Bytes_relative_to_domainstate_pointer of int
[@@ocaml.warning "-37"]
let slot_offset (loc : Reg.stack_location) ~stack_class ~stack_offset
~fun_contains_calls ~fun_num_stack_slots =
match loc with
Incoming n ->
assert (n >= 0);
let frame_size =
frame_size ~stack_offset ~contains_calls:fun_contains_calls
~num_stack_slots:fun_num_stack_slots
in
Bytes_relative_to_stack_pointer (frame_size + n)
| Local n ->
let offset =
stack_offset +
Stack_class.Tbl.offset_in_bytes fun_num_stack_slots ~stack_class ~slot:n
in
Bytes_relative_to_stack_pointer offset
| Outgoing n ->
assert (n >= 0);
Bytes_relative_to_stack_pointer n
| Domainstate n ->
Bytes_relative_to_domainstate_pointer (
n + Domainstate.(idx_of_field Domain_extra_params) * 8)
(* Calling the assembler *)
let assemble_file infile outfile =
Ccomp.command (Config.asm ^ " " ^
(String.concat " " (Misc.debug_prefix_map_flags ())) ^
" -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile)
let init () = ()
let operation_supported : Cmm.operation -> bool = function
| Cprefetch _ | Catomic _ -> false
| Cpopcnt
| Cnegf Float32 | Cabsf Float32 | Caddf Float32
| Csubf Float32 | Cmulf Float32 | Cdivf Float32
| Cpackf32
| Creinterpret_cast (Float32_of_float | Float_of_float32 |
Float32_of_int32 | Int32_of_float32 |
V128_of_v128)
| Cstatic_cast (Float_of_float32 | Float32_of_float |
Int_of_float Float32 | Float_of_int Float32 |
V128_of_scalar _ | Scalar_of_v128 _)
| Cclz _ | Cctz _ | Cbswap _
| Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _
| Caddi | Csubi | Cmuli | Cmulhi _ | Cdivi | Cmodi
| Cand | Cor | Cxor | Clsl | Clsr | Casr
| Ccmpi _ | Caddv | Cadda | Ccmpa _
| Cnegf Float64 | Cabsf Float64 | Caddf Float64
| Csubf Float64 | Cmulf Float64 | Cdivf Float64
| Creinterpret_cast (Int_of_value | Value_of_int |
Int64_of_float | Float_of_int64)
| Cstatic_cast (Float_of_int Float64 | Int_of_float Float64)
| Ccmpf _
| Ccsel _
| Craise _
| Cprobe _ | Cprobe_is_enabled _ | Copaque
| Cbeginregion | Cendregion | Ctuple_field _
| Cdls_get
| Cpoll
-> true
let trap_size_in_bytes = 16