Skip to content

Commit 09a3597

Browse files
authored
Externals Extraction from .cmt Files (#3699)
* code for extracting externals (not compiling) * license header * build externals with polling * fix extract externals code to build again * nit * message formatting * header for environments.ml * add support for -open and -H * add flag to envaux * use compiler envaux * enable formatting and remove outdated comments * add comment * comment * clean up printing * error handling * fix debugger flag for runtime 5 * Expand comment on the additional parameter of environment reconstruction * renaming Any -> Value + cleanup of description comments * rename shapes into value shapes * readme for the externals extraction
1 parent 29a2519 commit 09a3597

15 files changed

+1008
-24
lines changed

Makefile

+5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ minimizer: runtime-stdlib
3939
cp chamelon/dune.jst chamelon/dune
4040
RUNTIME_DIR=$(RUNTIME_DIR) $(dune) build $(ws_main) @chamelon/all
4141

42+
.PHONY: hacking-externals
43+
hacking-externals: _build/_bootinstall
44+
RUNTIME_DIR=$(RUNTIME_DIR) $(dune) build $(ws_boot) $(coverage_dune_flags) -w "extract_externals/extract_externals.exe"
45+
46+
4247
.PHONY: hacking-runtest
4348
hacking-runtest: _build/_bootinstall
4449
RUNTIME_DIR=$(RUNTIME_DIR) $(dune) build $(ws_boot) $(coverage_dune_flags) -w $(boot_targets) @runtest

debugger/command_line.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ let env_of_event =
506506
function
507507
None -> Env.empty
508508
| Some ev ->
509-
Envaux.env_from_summary ev.ev_ev.ev_typenv ev.ev_ev.ev_typsubst
509+
Envaux.env_from_summary ~allow_missing_modules:false ev.ev_ev.ev_typenv ev.ev_ev.ev_typsubst
510510

511511
let print_command depth ppf lexbuf =
512512
let exprs = expression_list_eol Lexer.lexeme lexbuf in

debugger4/command_line.ml

+1-1
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ let env_of_event =
504504
function
505505
None -> Env.empty
506506
| Some ev ->
507-
Envaux.env_from_summary ev.ev_ev.ev_typenv ev.ev_ev.ev_typsubst
507+
Envaux.env_from_summary ~allow_missing_modules:false ev.ev_ev.ev_typenv ev.ev_ev.ev_typsubst
508508

509509
let print_command depth ppf lexbuf =
510510
let exprs = expression_list_eol Lexer.lexeme lexbuf in

extract_externals/.ocamlformat

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Please make a pull request to change this file.
2+
disable=false
3+
# There is an .ocamlformat-enable file in this directory.
4+
# Keep the remainder of this file in sync with other .ocamlformat files in this repo.
5+
assignment-operator=begin-line
6+
cases-exp-indent=2
7+
doc-comments=before
8+
dock-collection-brackets=false
9+
if-then-else=keyword-first
10+
module-item-spacing=sparse
11+
parens-tuple=multi-line-only
12+
sequence-blank-line=compact
13+
space-around-lists=false
14+
space-around-variants=false
15+
type-decl=sparse
16+
version=0.24.1
17+
18+
# The existing comments are hand-formatted and lose a lot of readability
19+
# if we wrap them. We should either convert the comments we care about to
20+
# doc comments, or make this same setting change everywhere.
21+
wrap-comments=false

extract_externals/README.md

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Externals Extraction
2+
3+
The externals extraction is a small tool to extract type information about external functions from `.cmt` files.
4+
Upon installation of the compiler, it is installed as the binary `extract_externals.opt`.
5+
6+
7+
## Extracting externals
8+
9+
After successfully building and installing the compiler, you can extract external declarations as follows:
10+
11+
```
12+
extract_externals.opt -I import/path/one -I import/path/two -H hidden/import/path -open Foo -output-file externals_out.txt file1.cmt file2.cmt
13+
```
14+
15+
In other words, you can provide import flags `-I`, hidden import flags `-H`, and open flags `-open` as for the compiler.
16+
In addition, you can provide an output file to store the externals in with `-output-file` and you should provide a list of at least one `.cmt` file. If the `-output-file` flag is not present, the result will be printed to stdout.
17+
18+
The output of the extraction is a sequence of external functions with their type information in [sexp format](https://github.com/janestreet/sexplib).
19+
20+
21+
## Shape information
22+
23+
To understand the shape information that is provided in the output, see the file `shapes.mli` in the `extract_externals` directory. In short, for arguments and return values in external declarations, the following shapes are currently supported:
24+
25+
```ocaml
26+
type value_shape =
27+
| Value (** anything of C type [value] *)
28+
| Imm (** immediate, tagged with a one at the end *)
29+
| Nativeint
30+
(** block of a native word integer, e.g., 64-bit integer on amd64 target *)
31+
| Double (** block of a native double *)
32+
| Int64 (** block of a 64-bit integer *)
33+
| Int32 (** block of a 32-bit integer *)
34+
| String
35+
(** block of a char pointer with a size, representing both Bytes.t and String.t *)
36+
| FloatArray (** block containing native doubles *)
37+
| Block of (int * value_shape list) option
38+
(** Block whose tag is below no-scan tag (i.e., a normal ocaml block value). If the
39+
argment is [None], then the block could have any tag and any elements. If the
40+
argument is [Some (t, shs)], then [t] is the tag of the block and [shs] contains the
41+
shapes of its fields. In the case of [Some (t, shs)], the number of
42+
fields is known statically (i.e., the length of the list [shs]).
43+
44+
To represent arrays (which are blocks with tag 0 at run time, but whose size is not
45+
statically known), there is a separate construtor, [Array sh], which keeps track of
46+
the shapes of the elements. *)
47+
| Array of value_shape
48+
(** Block with tag 0 and a fixed size (not known statically). The shape of the
49+
elements is given by the argument. *)
50+
| Closure (** Block with closure tag. *)
51+
| Obj (** Block with object tag. *)
52+
| Or of value_shape * value_shape
53+
(** Disjunction between two shapes for, e.g., variant types. *)
54+
```
55+
56+
Note that these shapes can be overlapping (e.g., `Value` covers all other shapes).

extract_externals/dune

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
;**************************************************************************)
2+
;* *)
3+
;* OCaml *)
4+
;* *)
5+
;* Copyright 2025 Jane Street Group LLC *)
6+
;* *)
7+
;* All rights reserved. This file is distributed under the terms of *)
8+
;* the GNU Lesser General Public License version 2.1, with the *)
9+
;* special exception on linking described in the file LICENSE. *)
10+
;* *)
11+
;**************************************************************************)
12+
13+
(executable
14+
(name extract_externals)
15+
(modes byte native)
16+
(modules extract_externals value_shapes traverse_typed_tree)
17+
(libraries ocamlcommon ocamlbytecomp ocamloptcomp))
18+
19+
(install
20+
(files
21+
(extract_externals.bc as extract_externals.byte)
22+
(extract_externals.exe as extract_externals.opt))
23+
(section bin)
24+
(package ocaml))
+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
(******************************************************************************
2+
* flambda-backend *
3+
* Simon Spies, Jane Street *
4+
* -------------------------------------------------------------------------- *
5+
* MIT License *
6+
* *
7+
* Copyright (c) 2025 Jane Street Group LLC *
8+
* opensource-contacts@janestreet.com *
9+
* *
10+
* Permission is hereby granted, free of charge, to any person obtaining a *
11+
* copy of this software and associated documentation files (the "Software"), *
12+
* to deal in the Software without restriction, including without limitation *
13+
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
14+
* and/or sell copies of the Software, and to permit persons to whom the *
15+
* Software is furnished to do so, subject to the following conditions: *
16+
* *
17+
* The above copyright notice and this permission notice shall be included *
18+
* in all copies or substantial portions of the Software. *
19+
* *
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
23+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *
25+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *
26+
* DEALINGS IN THE SOFTWARE. *
27+
******************************************************************************)
28+
29+
open! Value_shapes
30+
31+
(* Argument Parsing *)
32+
let easily_readable = ref false
33+
34+
let verbose = ref false
35+
36+
let output_file = ref None
37+
38+
let include_dirs = ref []
39+
40+
let hidden_include_dirs = ref []
41+
42+
let open_modules = ref []
43+
44+
let files = ref []
45+
46+
let spec_list =
47+
[ "-readable", Arg.Set easily_readable, "Output in easily readable format";
48+
"-verbose", Arg.Set verbose, "Print errors instead of failing silently";
49+
( "-output-file",
50+
Arg.String (fun s -> output_file := Some s),
51+
"Optional output file; prints to stdout if not present" );
52+
( "-I",
53+
Arg.String
54+
(fun s ->
55+
include_dirs
56+
:= List.rev_append (String.split_on_char ',' s) !include_dirs),
57+
"A directory with .cmi files to include for lookups" );
58+
( "-H",
59+
Arg.String
60+
(fun s ->
61+
hidden_include_dirs
62+
:= List.rev_append (String.split_on_char ',' s) !hidden_include_dirs),
63+
"Hidden includes" );
64+
( "-open",
65+
Arg.String
66+
(fun s ->
67+
open_modules
68+
:= List.rev_append (String.split_on_char ',' s) !open_modules),
69+
"Modules to open" ) ]
70+
71+
let parse_arguments () =
72+
Arg.parse spec_list
73+
(fun a -> files := !files @ [a])
74+
"Usage: externals.exe <options> <files>\nOptions are:"
75+
76+
(* Pretty Printing for Externals in Readable Format*)
77+
78+
let pp_ext_funs ~readable fmt extfuns =
79+
if readable
80+
then Value_shapes.print_extfuns_readable fmt extfuns
81+
else Value_shapes.print_extfuns fmt extfuns
82+
83+
let output_shapes ~output_file ~readable externals =
84+
match output_file with
85+
| None -> pp_ext_funs ~readable Format.std_formatter externals
86+
| Some file ->
87+
Out_channel.with_open_bin file (fun out ->
88+
let fmt = Format.formatter_of_out_channel out in
89+
pp_ext_funs ~readable fmt externals;
90+
Format.pp_print_newline fmt ();
91+
Out_channel.flush out)
92+
93+
(* Typed Extraction *)
94+
let extract_shapes_from_cmt ~verbose file =
95+
match Cmt_format.read_cmt file with
96+
| exception Sys_error s ->
97+
if verbose
98+
then Format.eprintf "Exception raised while reading .cmt file: %s\n" s;
99+
[]
100+
| exception _ ->
101+
if verbose
102+
then Format.eprintf "Exception raised while reading .cmt file %s\n" file;
103+
[]
104+
| { cmt_annots = Implementation tt; _ } ->
105+
Traverse_typed_tree.extract_from_typed_tree tt
106+
| _ -> assert false
107+
108+
let extract_shapes_from_cmts ~includes ~verbose files =
109+
Clflags.include_dirs := includes @ !Clflags.include_dirs;
110+
Clflags.open_modules := !open_modules @ !Clflags.open_modules;
111+
Clflags.hidden_include_dirs
112+
:= !hidden_include_dirs @ !Clflags.hidden_include_dirs;
113+
Compmisc.init_path ();
114+
List.iter
115+
(fun file ->
116+
if not (String.ends_with file ~suffix:".cmt")
117+
then Misc.fatal_errorf "File %s is not a .cmt file; aborting\n" file)
118+
files;
119+
List.concat_map (extract_shapes_from_cmt ~verbose) files
120+
121+
let externals_version = "v0.1"
122+
123+
let extract_and_output_from_cmts ~readable ~includes ~output_file ~verbose files
124+
=
125+
let externals = extract_shapes_from_cmts ~includes ~verbose files in
126+
output_shapes ~output_file ~readable
127+
{ version = externals_version; extfuns = externals }
128+
129+
let _ =
130+
parse_arguments ();
131+
extract_and_output_from_cmts ~readable:!easily_readable
132+
~includes:!include_dirs ~output_file:!output_file ~verbose:!verbose !files
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
(******************************************************************************
2+
* flambda-backend *
3+
* Simon Spies, Jane Street *
4+
* -------------------------------------------------------------------------- *
5+
* MIT License *
6+
* *
7+
* Copyright (c) 2025 Jane Street Group LLC *
8+
* opensource-contacts@janestreet.com *
9+
* *
10+
* Permission is hereby granted, free of charge, to any person obtaining a *
11+
* copy of this software and associated documentation files (the "Software"), *
12+
* to deal in the Software without restriction, including without limitation *
13+
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
14+
* and/or sell copies of the Software, and to permit persons to whom the *
15+
* Software is furnished to do so, subject to the following conditions: *
16+
* *
17+
* The above copyright notice and this permission notice shall be included *
18+
* in all copies or substantial portions of the Software. *
19+
* *
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
23+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *
25+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *
26+
* DEALINGS IN THE SOFTWARE. *
27+
******************************************************************************)

0 commit comments

Comments
 (0)