-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathproc.ml
456 lines (381 loc) · 14.6 KB
/
proc.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# 2 "backend/arm64/proc.ml"
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Gallium, INRIA Rocquencourt *)
(* Benedikt Meurer, University of Siegen *)
(* *)
(* Copyright 2013 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* Copyright 2012 Benedikt Meurer. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Description of the ARM processor in 64-bit mode *)
open Misc
open Cmm
open Reg
open Arch
open Mach
(* Instruction selection *)
let word_addressed = false
(* Registers available for register allocation *)
(* Integer register map:
x0 - x15 general purpose (caller-save)
x16, x17 temporaries (used by call veeners)
x18 platform register (reserved)
x19 - x25 general purpose (callee-save)
x26 trap pointer
x27 alloc pointer
x28 domain state pointer
x29 frame pointer
x30 return address
sp / xzr stack pointer / zero register
Floating-point register map:
d0 - d7 general purpose (caller-save)
d8 - d15 general purpose (callee-save)
d16 - d31 general purpose (caller-save)
*)
let int_reg_name =
[| "x0"; "x1"; "x2"; "x3"; "x4"; "x5"; "x6"; "x7"; (* 0 - 7 *)
"x8"; "x9"; "x10"; "x11"; "x12"; "x13"; "x14"; "x15"; (* 8 - 15 *)
"x19"; "x20"; "x21"; "x22"; "x23"; "x24"; "x25"; (* 16 - 22 *)
"x26"; "x27"; "x28"; (* 23 - 25 *)
"x16"; "x17" |] (* 26 - 27 *)
let float_reg_name =
[| "d0"; "d1"; "d2"; "d3"; "d4"; "d5"; "d6"; "d7";
"d8"; "d9"; "d10"; "d11"; "d12"; "d13"; "d14"; "d15";
"d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
"d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
let num_register_classes = 2
let register_class r =
match r.typ with
| Val | Int | Addr -> 0
| Float -> 1
(* CR mslater: (SIMD) arm64 *)
| Vec128 -> fatal_error "arm64: got vec128 register"
let num_stack_slot_classes = 2
let stack_slot_class typ =
match typ with
| Val | Int | Addr -> 0
| Float -> 1
(* CR mslater: (SIMD) arm64 *)
| Vec128 -> fatal_error "arm64: got vec128 register"
let stack_class_tag c =
match c with
| 0 -> "i"
| 1 -> "f"
| c -> Misc.fatal_errorf "Unspecified stack slot class %d" c
let num_available_registers =
[| 23; 32 |] (* first 23 int regs allocatable; all float regs allocatable *)
let first_available_register =
[| 0; 100 |]
let register_name ty r =
match ty with
| Val | Int | Addr ->
int_reg_name.(r - first_available_register.(0))
| Float ->
float_reg_name.(r - first_available_register.(1))
(* CR mslater: (SIMD) arm64 *)
| Vec128 -> fatal_error "arm64: got vec128 register"
let rotate_registers = true
(* Representation of hard registers by pseudo-registers *)
let hard_int_reg =
let v = Array.make 28 Reg.dummy in
for i = 0 to 27 do
v.(i) <- Reg.at_location Int (Reg i)
done;
v
let hard_float_reg =
let v = Array.make 32 Reg.dummy in
for i = 0 to 31 do
v.(i) <- Reg.at_location Float (Reg(100 + i))
done;
v
let all_phys_regs =
Array.append hard_int_reg hard_float_reg
let precolored_regs =
let phys_regs = Reg.set_of_array all_phys_regs in
fun () -> phys_regs
let phys_reg ty n =
match ty with
| Int | Addr | Val -> hard_int_reg.(n)
| Float -> hard_float_reg.(n - 100)
(* CR mslater: (SIMD) arm64 *)
| Vec128 -> fatal_error "arm64: got vec128 register"
let reg_x8 = phys_reg Int 8
let reg_d7 = phys_reg Float 107
let stack_slot slot ty =
Reg.at_location ty (Stack slot)
(* Calling conventions *)
let size_domainstate_args = 64 * size_int
let loc_int last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg Int !int in
incr int; l
end else begin
ofs := Misc.align !ofs size_int;
let l = stack_slot (make_stack !ofs) Int in
ofs := !ofs + size_int; l
end
let loc_float last_float make_stack float ofs =
if !float <= last_float then begin
let l = phys_reg Float !float in
incr float; l
end else begin
ofs := Misc.align !ofs size_float;
let l = stack_slot (make_stack !ofs) Float in
ofs := !ofs + size_float; l
end
let loc_int32 last_int make_stack int ofs =
if !int <= last_int then begin
let l = phys_reg Int !int in
incr int; l
end else begin
let l = stack_slot (make_stack !ofs) Int in
ofs := !ofs + (if macosx then 4 else 8);
l
end
let calling_conventions
first_int last_int first_float last_float make_stack first_stack arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr ->
loc.(i) <- loc_int last_int make_stack int ofs
| Float ->
loc.(i) <- loc_float last_float make_stack float ofs
(* CR mslater: (SIMD) arm64 *)
| Vec128 -> fatal_error "arm64: got vec128 register"
done;
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)
let incoming ofs =
if ofs >= 0
then Incoming ofs
else Domainstate (ofs + size_domainstate_args)
let outgoing ofs =
if ofs >= 0
then Outgoing ofs
else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
(* OCaml calling convention:
first integer args in r0...r15
first float args in d0...d15
remaining args in domain area, then on stack.
Return values in r0...r15 or d0...d15. *)
let max_arguments_for_tailcalls = 16 (* in regs *) + 64 (* in domain state *)
let last_int_register = if macosx then 7 else 15
let loc_arguments arg =
calling_conventions 0 last_int_register 100 115
outgoing (- size_domainstate_args) arg
let loc_parameters arg =
let (loc, _) =
calling_conventions 0 last_int_register 100 115
incoming (- size_domainstate_args) arg
in
loc
let loc_results_call res =
calling_conventions 0 last_int_register 100 115 outgoing (- size_domainstate_args) res
let loc_results_return res =
let (loc, _) =
calling_conventions 0 last_int_register 100 115 incoming (- size_domainstate_args) res
in
loc
(* C calling convention:
first integer args in r0...r7
first float args in d0...d7
remaining args on stack.
macOS/iOS peculiarity: int32 arguments passed on stack occupy 4 bytes,
while the AAPCS64 says 8 bytes.
Return values in r0...r1 or d0. *)
let external_calling_conventions
first_int last_int first_float last_float make_stack ty_args =
let loc = Array.make (List.length ty_args) [| Reg.dummy |] in
let int = ref first_int in
let float = ref first_float in
let ofs = ref 0 in
List.iteri (fun i ty_arg ->
begin match ty_arg with
| XInt | XInt64 ->
loc.(i) <- [| loc_int last_int make_stack int ofs |]
| XInt32 ->
loc.(i) <- [| loc_int32 last_int make_stack int ofs |]
| XFloat ->
loc.(i) <- [| loc_float last_float make_stack float ofs |]
(* CR mslater: (SIMD) arm64 *)
| XVec128 -> fatal_error "arm64: got vec128 register"
end)
ty_args;
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
let loc_external_arguments ty_args =
external_calling_conventions 0 7 100 107 outgoing ty_args
let loc_external_results res =
let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res in loc
let loc_exn_bucket = phys_reg Int 0
(* See "DWARF for the ARM 64-bit architecture (AArch64)" available from
developer.arm.com. *)
let int_dwarf_reg_numbers =
[| 0; 1; 2; 3; 4; 5; 6; 7;
8; 9; 10; 11; 12; 13; 14; 15;
19; 20; 21; 22; 23; 24;
25; 26; 27; 28; 16; 17;
|]
let float_dwarf_reg_numbers =
[| 64; 65; 66; 67; 68; 69; 70; 71;
72; 73; 74; 75; 76; 77; 78; 79;
80; 81; 82; 83; 84; 85; 86; 87;
88; 89; 90; 91; 92; 93; 94; 95;
|]
let dwarf_register_numbers ~reg_class =
match reg_class with
| 0 -> int_dwarf_reg_numbers
| 1 -> float_dwarf_reg_numbers
| _ -> Misc.fatal_errorf "Bad register class %d" reg_class
let stack_ptr_dwarf_register_number = 31
let domainstate_ptr_dwarf_register_number = 28
(* Volatile registers: none *)
let regs_are_volatile _rs = false
(* Registers destroyed by operations *)
let destroyed_at_c_call =
(* x19-x28, d8-d15 preserved *)
Array.append
(Array.of_list (List.map (phys_reg Int)
[0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15]))
(Array.of_list (List.map (phys_reg Float)
[100;101;102;103;104;105;106;107;
116;117;118;119;120;121;122;123;
124;125;126;127;128;129;130;131]))
(* note: keep this function in sync with `destroyed_at_{basic,terminator}` below. *)
let destroyed_at_oper = function
| Iop(Icall_ind | Icall_imm _) | Iop(Iextcall { alloc = true; }) ->
all_phys_regs
| Iop(Iextcall { alloc = false; }) ->
destroyed_at_c_call
| Iop(Ialloc _) | Iop(Ipoll _) ->
[| reg_x8 |]
| Iop( Iintoffloat | Ifloatofint
| Iload(Single, _, _) | Istore(Single, _, _)) ->
[| reg_d7 |] (* d7 / s7 destroyed *)
| _ -> [||]
let destroyed_at_raise () = all_phys_regs
let destroyed_at_reloadretaddr = [| |]
let destroyed_at_pushtrap = [| |]
let destroyed_at_alloc_or_poll = [| reg_x8 |]
(* note: keep this function in sync with `destroyed_at_oper` above. *)
let destroyed_at_basic (basic : Cfg_intf.S.basic) =
match basic with
| Reloadretaddr ->
destroyed_at_reloadretaddr
| Pushtrap _ ->
destroyed_at_pushtrap
| Op (Intop Icheckbound | Intop_imm (Icheckbound, _)) ->
assert false
| Op( Intoffloat | Floatofint
| Load(Single, _, _) | Store(Single, _, _)) ->
[| reg_d7 |]
| Op _ | Poptrap | Prologue ->
[||]
(* note: keep this function in sync with `destroyed_at_oper` above,
and `is_destruction_point` below. *)
let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
match terminator with
| Never -> assert false
| Call {op = Indirect | Direct _; _} ->
all_phys_regs
| Prim {op = Alloc _; _} ->
[| reg_x8 |]
| Always _ | Parity_test _ | Truth_test _ | Float_test _
| Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
| Tailcall_func _ | Prim {op = (Checkbound _ | Checkalign _) | Probe _; _}
| Specific_can_raise _ ->
[||]
| Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; }
| Prim {op = External { func_symbol = _; alloc; ty_res = _; ty_args = _; }; _} ->
if alloc then all_phys_regs else destroyed_at_c_call
| Poll_and_jump _ -> destroyed_at_alloc_or_poll
(* CR-soon xclerc for xclerc: consider having more destruction points.
We current return `true` when `destroyed_at_terminator` returns
`all_phys_regs`; we could also return `true` when `destroyed_at_terminator`
returns `destroyed_at_c_call` for instance. *)
(* note: keep this function in sync with `destroyed_at_terminator` above. *)
let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_intf.S.terminator) =
match terminator with
| Never -> assert false
| Call {op = Indirect | Direct _; _} ->
true
| Prim {op = Alloc _; _} ->
false
| Always _ | Parity_test _ | Truth_test _ | Float_test _
| Int_test _ | Switch _ | Return | Raise _ | Tailcall_self _
| Tailcall_func _ | Prim {op = (Checkbound _ | Checkalign _) | Probe _; _}
| Specific_can_raise _ ->
false
| Call_no_return { func_symbol = _; alloc; ty_res = _; ty_args = _; }
| Prim {op = External { func_symbol = _; alloc; ty_res = _; ty_args = _; }; _} ->
if more_destruction_points then
true
else
if alloc then true else false
| Poll_and_jump _ -> false
(* Maximal register pressure *)
let safe_register_pressure = function
| Iextcall _ -> 7
| Ialloc _ | Ipoll _ -> 22
| _ -> 23
let max_register_pressure = function
| Iextcall _ -> [| 7; 8 |] (* 7 integer callee-saves, 8 FP callee-saves *)
| Ialloc _ | Ipoll _ -> [| 22; 32 |]
| Iintoffloat | Ifloatofint
| Iload(Single, _, _) | Istore(Single, _, _) -> [| 23; 31 |]
| _ -> [| 23; 32 |]
(* Layout of the stack *)
let initial_stack_offset = 0
let trap_frame_size_in_bytes = 16
let frame_required ~fun_contains_calls ~fun_num_stack_slots =
fun_contains_calls
|| fun_num_stack_slots.(0) > 0
|| fun_num_stack_slots.(1) > 0
let prologue_required ~fun_contains_calls ~fun_num_stack_slots =
frame_required ~fun_contains_calls ~fun_num_stack_slots
let frame_size ~stack_offset:_ ~fun_contains_calls:_ ~fun_num_stack_slots:_ =
Misc.fatal_error "Full DWARF support for arm64 not yet implemented"
type slot_offset =
| Bytes_relative_to_stack_pointer of int
| Bytes_relative_to_domainstate_pointer of int
let slot_offset _loc ~stack_class:_ ~stack_offset:_ ~fun_contains_calls:_
~fun_num_stack_slots:_ =
Misc.fatal_error "Full DWARF support for arm64 not yet implemented"
(* Calling the assembler *)
let assemble_file infile outfile =
Ccomp.command (Config.asm ^ " " ^
(String.concat " " (Misc.debug_prefix_map_flags ())) ^
" -o " ^ Filename.quote outfile ^ " " ^ Filename.quote infile)
let init () = ()
let operation_supported = function
| Cclz _ | Cctz _ | Cpopcnt
| Cprefetch _ | Catomic _
| Cvectorcast _ | Cscalarcast _
| Ccheckalign _
-> false (* Not implemented *)
| Cbswap _
| Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _
| Caddi | Csubi | Cmuli | Cmulhi _ | Cdivi | Cmodi
| Cand | Cor | Cxor | Clsl | Clsr | Casr
| Ccmpi _ | Caddv | Cadda | Ccmpa _
| Cnegf | Cabsf | Caddf | Csubf | Cmulf | Cdivf
| Cfloatofint | Cintoffloat | Cintofvalue | Cvalueofint
| Ccmpf _
| Ccsel _
| Craise _
| Ccheckbound
| Cprobe _ | Cprobe_is_enabled _ | Copaque
| Cbeginregion | Cendregion | Ctuple_field _
-> true
let trap_size_in_bytes = 16