forked from ocaml/ocaml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_ocaml_cost.py
82 lines (69 loc) · 2.87 KB
/
analyze_ocaml_cost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Script to extract ocaml's approximated code sizes and real code sizes for a module.
The compiler must have been build by dune beforehand.
"""
import sys
import subprocess
import re
import json
from collections import namedtuple
import matplotlib.pyplot as plt
import argparse
from math import log
parser = argparse.ArgumentParser(description="Plot the groundtruth code size vs flambda approximated one")
parser.add_argument("module",
help="the module name to compile (from flambdatest/mlexamples)")
parser.add_argument("--plot", dest="plot_path",
help="Plot the result to a file")
parser.add_argument("--dump", dest="dump_path",
help="Dump code sizes to a file")
args = parser.parse_args()
cmd = ["../../_build/default/optmain.exe", "-nostdlib", "-nopervasives", "-c", "-g", args.module + ".ml"]
r = subprocess.run(cmd, stdout=subprocess.PIPE, check=True, cwd="flambdatest/mlexamples", env={"PRINT_SIZES": "t"})
ocaml_symbol_to_size = {}
regex = re.compile(r"\x1b.+?m")
for l in r.stdout.decode().split("\n"):
l = re.sub(regex, "" , l)
l = l.strip().split()
if len(l) < 2:
continue
symbol = l[0]
code_size = int(l[1])
ocaml_symbol_to_size[symbol] = code_size
r = subprocess.run(["nm", "-S", "-f", "posix", args.module + ".o"], stdout=subprocess.PIPE, check=True, cwd="flambdatest/mlexamples")
groundtruth_symbol_to_size = {}
for l in r.stdout.decode().split("\n"):
l = l.strip().split()
# nm returns entry of the form SYMBOL_NAME TYPE OFFSET CODE_SIZE
# For some entries CODE_SIZE might be missing, so skip those.
if len(l) < 4:
continue
symbol = l[0]
type_ = l[1]
code_size = int(l[3], 16)
groundtruth_symbol_to_size[symbol] = code_size
common_symbols = set(groundtruth_symbol_to_size.keys()) & set(ocaml_symbol_to_size.keys())
Symbol = namedtuple("Symbol", ["name", "groundtruth_size", "ocaml_size"])
max_ocaml_size = max(ocaml_symbol_to_size.values())
max_groundtruth_size = max(groundtruth_symbol_to_size.values())
symbols = []
for name in common_symbols:
g = groundtruth_symbol_to_size[name]
o = ocaml_symbol_to_size[name]
symbols.append(Symbol(name, g, o))
# Create the plot
if args.plot_path is not None:
symbol_plot = sorted(symbols, key=lambda x: x.groundtruth_size)
ocaml_plot_size = [x.ocaml_size for x in symbol_plot]
groundtruth_plot_size = [x.groundtruth_size for x in symbol_plot]
_, ax = plt.subplots(1)
ax.scatter(list(map(log, ocaml_plot_size)), list(map(log, groundtruth_plot_size)))
ax.set_xlabel("Ocaml size")
ax.set_ylabel("Binary size")
ax.set_ylim(bottom=0)
ax.set_xlim(left=0)
ax.plot([0, 1], [0, 1], transform=ax.transAxes)
plt.title(f"{args.module}.ml")
plt.savefig(args.plot_path)
if args.dump_path is not None:
with open(args.dump_path, "w") as f:
json.dump([s._asdict() for s in symbols], f)