Skip to content

Commit 8ad12b4

Browse files
committed
Merge branch 'main' into unboxed-small-ints-tryagain
2 parents 98ac389 + a53391d commit 8ad12b4

File tree

328 files changed

+15430
-3580
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

328 files changed

+15430
-3580
lines changed

.github/workflows/build.yml

+63-10
Original file line numberDiff line numberDiff line change
@@ -78,39 +78,57 @@ jobs:
7878
config: --enable-middle-end=flambda2 --disable-warn-error
7979
os: macos-latest
8080

81+
- name: flambda2_macos_arm64_runtime5_irc
82+
config: --enable-middle-end=flambda2 --enable-runtime5 --disable-warn-error
83+
os: macos-latest
84+
build_ocamlparam: '_,w=-46,regalloc=irc'
85+
ocamlparam: '_,w=-46,regalloc=irc'
86+
87+
- name: flambda2_macos_arm64_runtime5_ls
88+
config: --enable-middle-end=flambda2 --enable-runtime5 --disable-warn-error
89+
os: macos-latest
90+
build_ocamlparam: '_,w=-46,regalloc=ls'
91+
ocamlparam: '_,w=-46,regalloc=ls'
92+
93+
- name: flambda2_macos_arm64_gi
94+
config: --enable-middle-end=flambda2 --disable-warn-error
95+
os: macos-latest
96+
build_ocamlparam: '_,w=-46,regalloc=gi'
97+
ocamlparam: '_,w=-46,regalloc=gi'
98+
8199
- name: irc
82100
config: --enable-middle-end=flambda2
83101
os: ubuntu-latest
84-
build_ocamlparam: '_,w=-46,regalloc=irc,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=IRC_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
85-
ocamlparam: '_,w=-46,regalloc=irc,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=IRC_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
102+
build_ocamlparam: '_,w=-46,regalloc=irc'
103+
ocamlparam: '_,w=-46,regalloc=irc'
86104
check_arch: true
87105

88106
- name: irc_polling
89107
config: --enable-middle-end=flambda2 --enable-poll-insertion
90108
os: ubuntu-latest
91-
build_ocamlparam: '_,w=-46,regalloc=irc,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=IRC_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
92-
ocamlparam: '_,w=-46,regalloc=irc,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=IRC_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
109+
build_ocamlparam: '_,w=-46,regalloc=irc'
110+
ocamlparam: '_,w=-46,regalloc=irc'
93111
check_arch: true
94112

95113
- name: irc_frame_pointers
96114
config: --enable-middle-end=flambda2 --enable-frame-pointers
97115
os: ubuntu-latest
98-
build_ocamlparam: '_,w=-46,regalloc=irc,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=IRC_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
99-
ocamlparam: '_,w=-46,regalloc=irc,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=IRC_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1'
116+
build_ocamlparam: '_,w=-46,regalloc=irc'
117+
ocamlparam: '_,w=-46,regalloc=irc'
100118
check_arch: true
101119

102120
- name: ls
103121
config: --enable-middle-end=flambda2
104122
os: ubuntu-latest
105-
build_ocamlparam: '_,w=-46,regalloc=ls,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=LS_ORDER:layout,regalloc-validate=1'
106-
ocamlparam: '_,w=-46,regalloc=ls,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=LS_ORDER:layout,regalloc-validate=1'
123+
build_ocamlparam: '_,w=-46,regalloc=ls'
124+
ocamlparam: '_,w=-46,regalloc=ls'
107125
check_arch: true
108126

109127
- name: gi
110128
config: --enable-middle-end=flambda2
111129
os: ubuntu-latest
112-
build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1'
113-
ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1'
130+
build_ocamlparam: '_,w=-46,regalloc=gi,cfg-cse-optimize=1'
131+
ocamlparam: '_,w=-46,regalloc=gi,cfg-cse-optimize=1'
114132
check_arch: true
115133

116134
- name: cfg-selection
@@ -120,6 +138,13 @@ jobs:
120138
ocamlparam: '_,w=-46,regalloc=cfg,cfg-cse-optimize=1,cfg-selection=1,cfg-zero-alloc-checker=1'
121139
check_arch: true
122140

141+
- name: vectorizer
142+
config: --enable-middle-end=flambda2
143+
os: ubuntu-latest
144+
build_ocamlparam: '_,w=-46,regalloc=cfg,vectorize=1'
145+
ocamlparam: '_,w=-46,regalloc=cfg,vectorize=1'
146+
check_arch: true
147+
123148
env:
124149
J: "3"
125150
run_testsuite: "true"
@@ -229,11 +254,20 @@ jobs:
229254
--with-dune=$GITHUB_WORKSPACE/ocaml-414/_install/bin/dune \
230255
${{ matrix.config }}
231256
257+
- name: Setup for saving core files (not for macOS at the moment)
258+
if: matrix.os != 'macos-latest'
259+
run: |
260+
sudo mkdir /cores
261+
sudo chmod 777 /cores
262+
# Core filenames will be of the form executable.pid.timestamp:
263+
sudo bash -c 'echo "/cores/%e.%p.%t" > /proc/sys/kernel/core_pattern'
264+
232265
- name: Build, install and test Flambda backend
233266
working-directory: flambda_backend
234267
run: |
235268
if [ $run_testsuite = true ]; then target=ci; else target=compiler; fi
236269
export PATH=$GITHUB_WORKSPACE/ocaml-414/_install/bin:$PATH
270+
ulimit -c unlimited
237271
make $target \
238272
|| (if [ $expected_fail = true ]; then exit 0; else exit 1; fi);
239273
env:
@@ -247,6 +281,25 @@ jobs:
247281
if: matrix.check_arch == true
248282
run: |
249283
PATH=$GITHUB_WORKSPACE/ocaml-414/_install/bin:$PATH make check_all_arches
284+
285+
- uses: actions/upload-artifact@v4
286+
if: ${{ failure() }} && matrix.os != 'macos-latest'
287+
with:
288+
name: cores-${{ github.sha }}
289+
path: /cores
290+
291+
- uses: actions/upload-artifact@v4
292+
if: ${{ failure() }} && matrix.os != 'macos-latest'
293+
with:
294+
name: _build-${{ github.sha }}
295+
path: $GITHUB_WORKSPACE/_build
296+
297+
- uses: actions/upload-artifact@v4
298+
if: ${{ failure() }} && matrix.os != 'macos-latest'
299+
with:
300+
name: _runtest-${{ github.sha }}
301+
path: $GITHUB_WORKSPACE/_runtest
302+
250303
concurrency:
251304
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
252305
cancel-in-progress: true

.github/workflows/coverage.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ jobs:
8989
# BUILD_OCAMLPARAM: ${{ matrix.ocamlparam }}
9090
#
9191
# - name: Publish coverage report
92-
# uses: actions/upload-artifact@v3
92+
# uses: actions/upload-artifact@v4
9393
# with:
94-
# name: coverage
94+
# name: coverage-${{ github.sha }}
9595
# path: flambda_backend/_coverage/**
9696
#

.github/workflows/ocamlformat.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
path: 'flambda_backend'
2222

2323
- name: Setup OCaml ${{ matrix.ocaml-compiler }}
24-
uses: ocaml/setup-ocaml@v2
24+
uses: ocaml/setup-ocaml@v3
2525
with:
2626
ocaml-compiler: ${{ matrix.ocaml-compiler }}
2727

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ promote:
8787

8888
.PHONY: fmt
8989
fmt:
90-
ocamlformat -i $$(find . \( -name "*.ml" -or -name "*.mli" \))
90+
find . \( -name "*.ml" -or -name "*.mli" \) | xargs -P $$(nproc 2>/dev/null || echo 1) -n 20 ocamlformat -i
9191

9292
.PHONY: check-fmt
9393
check-fmt:

asmcomp/asmlink.ml

+12
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,16 @@ let sourcefile_for_dwarf ~named_startup_file filename =
349349
if named_startup_file then filename
350350
else ".startup"
351351

352+
let emit_ocamlrunparam ~ppf_dump =
353+
Asmgen.compile_phrase ~ppf_dump
354+
(Cmm.Cdata [
355+
Cmm.Cdefine_symbol {
356+
sym_name = "caml_ocamlrunparam";
357+
sym_global = Global
358+
};
359+
Cmm.Cstring (!Clflags.ocamlrunparam ^ "\000")
360+
])
361+
352362
let make_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units cached_gen =
353363
Location.input_name := "caml_startup"; (* set name of "current" input *)
354364
let startup_comp_unit =
@@ -361,6 +371,7 @@ let make_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units cached_g
361371
let compile_phrase p = Asmgen.compile_phrase ~ppf_dump p in
362372
let name_list =
363373
List.flatten (List.map (fun u -> u.defines) units) in
374+
emit_ocamlrunparam ~ppf_dump;
364375
List.iter compile_phrase (Cmm_helpers.entry_point name_list);
365376
List.iter compile_phrase
366377
(* Emit the GC roots table, for dynlink. *)
@@ -414,6 +425,7 @@ let make_shared_startup_file unix ~ppf_dump ~sourcefile_for_dwarf genfns units =
414425
Emitaux.Dwarf_helpers.init ~disable_dwarf:(not !Dwarf_flags.dwarf_for_startup_file)
415426
~sourcefile:sourcefile_for_dwarf;
416427
Emit.begin_assembly unix;
428+
emit_ocamlrunparam ~ppf_dump;
417429
List.iter compile_phrase
418430
(Cmm_helpers.emit_gc_roots_table ~symbols:[]
419431
(Generic_fns.compile ~shared:true genfns));

backend/amd64/CSE.ml

+12-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# 2 "backend/amd64/CSE.ml"
21
(**************************************************************************)
32
(* *)
43
(* OCaml *)
@@ -21,6 +20,12 @@ open Arch
2120
open Mach
2221
open CSE_utils
2322

23+
let of_simd_class (cl : Simd.operation_class) =
24+
match cl with
25+
| Pure -> Op_pure
26+
| Load { is_mutable = true } -> Op_load Mutable
27+
| Load { is_mutable = false } -> Op_load Immutable
28+
2429
class cse = object
2530

2631
inherit CSEgen.cse_generic as super
@@ -37,9 +42,9 @@ method! class_of_operation op =
3742
| Irdtsc | Irdpmc
3843
| Ilfence | Isfence | Imfence -> Op_other
3944
| Isimd op ->
40-
begin match Simd.class_of_operation op with
41-
| Pure -> Op_pure
42-
end
45+
of_simd_class (Simd.class_of_operation op)
46+
| Isimd_mem (op,_addr) ->
47+
of_simd_class (Simd.Mem.class_of_operation op)
4348
| Ipause
4449
| Icldemote _
4550
| Iprefetch _ -> Op_other
@@ -81,9 +86,9 @@ class cfg_cse = object
8186
| Irdtsc | Irdpmc
8287
| Ilfence | Isfence | Imfence -> Op_other
8388
| Isimd op ->
84-
begin match Simd.class_of_operation op with
85-
| Pure -> Op_pure
86-
end
89+
of_simd_class (Simd.class_of_operation op)
90+
| Isimd_mem (op,_addr) ->
91+
of_simd_class (Simd.Mem.class_of_operation op)
8792
| Ipause
8893
| Icldemote _
8994
| Iprefetch _ -> Op_other

backend/amd64/arch.ml

+14-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# 2 "backend/amd64/arch.ml"
21
(**************************************************************************)
32
(* *)
43
(* OCaml *)
@@ -153,6 +152,9 @@ type specific_operation =
153152
| Imfence (* memory fence *)
154153
| Ipause (* hint for spin-wait loops *)
155154
| Isimd of Simd.operation (* SIMD instruction set operations *)
155+
| Isimd_mem of Simd.Mem.operation * addressing_mode
156+
(* SIMD instruction set operations
157+
with memory args *)
156158
| Icldemote of addressing_mode (* hint to demote a cacheline to L3 *)
157159
| Iprefetch of (* memory prefetching hint *)
158160
{ is_write: bool;
@@ -273,6 +275,8 @@ let print_specific_operation printreg op ppf arg =
273275
fprintf ppf "rdpmc %a" printreg arg.(0)
274276
| Isimd simd ->
275277
Simd.print_operation printreg simd ppf arg
278+
| Isimd_mem (simd, addr) ->
279+
Simd.Mem.print_operation printreg (print_addressing printreg addr) simd ppf arg
276280
| Ipause ->
277281
fprintf ppf "pause"
278282
| Icldemote _ ->
@@ -299,13 +303,14 @@ let operation_is_pure = function
299303
| Istore_int (_, _, _) | Ioffset_loc (_, _)
300304
| Icldemote _ | Iprefetch _ -> false
301305
| Isimd op -> Simd.is_pure op
306+
| Isimd_mem (op, _addr) -> Simd.Mem.is_pure op
302307

303308
(* Specific operations that can raise *)
304309
(* Keep in sync with [Vectorize_specific] *)
305310
let operation_can_raise = function
306311
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
307312
| Ifloatarithmem _
308-
| Irdtsc | Irdpmc | Ipause | Isimd _
313+
| Irdtsc | Irdpmc | Ipause | Isimd _ | Isimd_mem _
309314
| Ilfence | Isfence | Imfence
310315
| Istore_int (_, _, _) | Ioffset_loc (_, _)
311316
| Icldemote _ | Iprefetch _ -> false
@@ -314,7 +319,7 @@ let operation_can_raise = function
314319
let operation_allocates = function
315320
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
316321
| Ifloatarithmem _
317-
| Irdtsc | Irdpmc | Ipause | Isimd _
322+
| Irdtsc | Irdpmc | Ipause | Isimd _ | Isimd_mem _
318323
| Ilfence | Isfence | Imfence
319324
| Istore_int (_, _, _) | Ioffset_loc (_, _)
320325
| Icldemote _ | Iprefetch _ -> false
@@ -405,9 +410,11 @@ let equal_specific_operation left right =
405410
&& equal_addressing_mode left_addr right_addr
406411
| Isimd l, Isimd r ->
407412
Simd.equal_operation l r
413+
| Isimd_mem (l,al), Isimd_mem (r,ar) ->
414+
Simd.Mem.equal_operation l r && equal_addressing_mode al ar
408415
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
409416
Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
410-
Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ ->
417+
Ipause | Isimd _ | Isimd_mem _ | Icldemote _ | Iprefetch _), _ ->
411418
false
412419

413420
(* addressing mode functions *)
@@ -512,7 +519,9 @@ let isomorphic_specific_operation op1 op2 =
512519
&& equal_addressing_mode_without_displ left_addr right_addr
513520
| Isimd l, Isimd r ->
514521
Simd.equal_operation l r
522+
| Isimd_mem (l,al), Isimd_mem (r,ar) ->
523+
Simd.Mem.equal_operation l r && equal_addressing_mode_without_displ al ar
515524
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
516525
Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
517-
Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ ->
526+
Ipause | Isimd _ | Isimd_mem _ | Icldemote _ | Iprefetch _), _ ->
518527
false

backend/amd64/arch.mli

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# 2 "asmcomp/amd64/arch.mli"
21
(**************************************************************************)
32
(* *)
43
(* OCaml *)
@@ -86,6 +85,9 @@ type specific_operation =
8685
| Imfence (* memory fence *)
8786
| Ipause (* hint for spin-wait loops *)
8887
| Isimd of Simd.operation (* SIMD instruction set operations *)
88+
| Isimd_mem of Simd.Mem.operation * addressing_mode
89+
(* SIMD instruction set operations
90+
with memory args *)
8991
| Icldemote of addressing_mode (* hint to demote a cacheline to L3 *)
9092
| Iprefetch of (* memory prefetching hint *)
9193
{ is_write: bool;

backend/amd64/cfg_selection.ml

+16-3
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,17 @@ let pseudoregs_for_operation op arg res =
3232
| Intop (Iadd | Isub | Imul | Iand | Ior | Ixor)
3333
| Floatop ((Float32 | Float64), (Iaddf | Isubf | Imulf | Idivf)) ->
3434
[| res.(0); arg.(1) |], res
35-
| Intop_atomic { op = Compare_and_swap; size = _; addr = _ } ->
35+
| Intop_atomic { op = Compare_set; size = _; addr = _ } ->
3636
(* first arg must be rax *)
3737
let arg = Array.copy arg in
3838
arg.(0) <- rax;
3939
arg, res
40-
| Intop_atomic { op = Fetch_and_add; size = _; addr = _ } ->
40+
| Intop_atomic { op = Compare_exchange; size = _; addr = _ } ->
41+
(* first arg must be rax, res.(0) must be rax. *)
42+
let arg = Array.copy arg in
43+
arg.(0) <- rax;
44+
arg, [| rax |]
45+
| Intop_atomic { op = Exchange | Fetch_and_add; size = _; addr = _ } ->
4146
(* first arg must be the same as res.(0) *)
4247
let arg = Array.copy arg in
4348
arg.(0) <- res.(0);
@@ -86,14 +91,22 @@ let pseudoregs_for_operation op arg res =
8691
edx (high) and eax (low). Make it simple and force the argument in rcx,
8792
and rax and rdx clobbered *)
8893
[| rcx |], res
89-
| Specific (Isimd op) -> Simd_selection.pseudoregs_for_operation op arg res
94+
| Specific (Isimd op) ->
95+
Simd_selection.pseudoregs_for_operation
96+
(Simd_proc.register_behavior op)
97+
arg res
98+
| Specific (Isimd_mem (op, _addr)) ->
99+
Simd_selection.pseudoregs_for_operation
100+
(Simd_proc.Mem.register_behavior op)
101+
arg res
90102
| Csel _ ->
91103
(* last arg must be the same as res.(0) *)
92104
let len = Array.length arg in
93105
let arg = Array.copy arg in
94106
arg.(len - 1) <- res.(0);
95107
arg, res
96108
(* Other instructions are regular *)
109+
| Intop_atomic { op = Add | Sub | Land | Lor | Lxor; _ }
97110
| Intop (Ipopcnt | Iclz _ | Ictz _ | Icomp _)
98111
| Intop_imm ((Imulh _ | Idiv | Imod | Icomp _ | Ipopcnt | Iclz _ | Ictz _), _)
99112
| Specific

0 commit comments

Comments
 (0)