Skip to content

Commit 5abddac

Browse files
authored
Merge pull request #25 from rust-lang/make-benchmarks-reliable2
various cleanups
2 parents ec3755e + 6cfff74 commit 5abddac

File tree

13 files changed

+204
-77
lines changed

13 files changed

+204
-77
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CLANG := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/clang++
2+
OPT := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/opt
3+
4+
PASSES1 := verify,annotation2metadata,forceattrs,inferattrs,coro-early,function<eager-inv>(ee-instrument<>,lower-expect,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sroa<modify-cfg>,early-cse<>,callsite-splitting),openmp-opt,ipsccp,called-value-propagation,globalopt,function<eager-inv>(mem2reg,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>),always-inline,require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),deadargelim,coro-cleanup,globalopt,globaldce,rpo-function-attrs,recompute-globalsaa,function<eager-inv>(float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),globaldce,constmerge,function(annotation-remarks),canonicalize-aliases,name-anon-globals,verify
5+
6+
PASSES2 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals
7+
#PASSES2 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals,EnzymeNewPM
8+
9+
PASSES3 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,slp-vectorizer,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,mergefunc,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals

enzyme/benchmarks/ReverseMode/adbench/ba.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ int main(const int argc, const char* argv[]) {
427427
}
428428
}
429429

430-
{
430+
for (int j=0;j<5;j++) {
431431

432432
struct BAInput input;
433433
read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams,
@@ -659,7 +659,7 @@ int main(const int argc, const char* argv[]) {
659659
}
660660
}
661661

662-
{
662+
for(int j=0;j<5;j++){
663663

664664
struct BAInput input;
665665
read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams,

enzyme/benchmarks/ReverseMode/adbench/gmm.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -213,17 +213,11 @@ int main(const int argc, const char* argv[]) {
213213

214214
std::vector<std::string> paths = { "10k/gmm_d10_K200.txt" };
215215

216-
//getTests(paths, "data/1k", "1k/");
217-
if (std::getenv("BENCH_LARGE")) {
218-
getTests(paths, "data/2.5k", "2.5k/");
219-
getTests(paths, "data/10k", "10k/");
220-
}
221-
222216
getTests(paths, "data/1k", "1k/");
223-
if (std::getenv("BENCH_LARGE")) {
217+
//if (std::getenv("BENCH_LARGE")) {
224218
getTests(paths, "data/2.5k", "2.5k/");
225219
getTests(paths, "data/10k", "10k/");
226-
}
220+
//}
227221

228222
std::ofstream jsonfile("results.json", std::ofstream::trunc);
229223
json test_results;
@@ -274,7 +268,7 @@ int main(const int argc, const char* argv[]) {
274268

275269
struct GMMOutput result = { 0, std::vector<double>(Jcols) };
276270

277-
//if (0) {
271+
if (0) {
278272
try {
279273
struct timeval start, end;
280274
gettimeofday(&start, NULL);
@@ -294,7 +288,7 @@ int main(const int argc, const char* argv[]) {
294288
} catch (std::bad_alloc) {
295289
printf("Adept combined 88888888 ooms\n");
296290
}
297-
//}
291+
}
298292
}
299293

300294
for (size_t i = 0; i < 5; i++)

enzyme/benchmarks/ReverseMode/adbench/lstm.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,8 @@ double calculate_safe_primal(struct LSTMInput &input) {
243243
int main(const int argc, const char* argv[]) {
244244
printf("starting main\n");
245245

246-
//std::vector<std::string> paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" };
247-
std::vector<std::string> paths = { "lstm_l4_c4096.txt" };
246+
std::vector<std::string> paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" };
247+
//std::vector<std::string> paths = { "lstm_l4_c4096.txt" };
248248

249249
std::ofstream jsonfile("results.json", std::ofstream::trunc);
250250
json test_results;
@@ -289,7 +289,7 @@ int main(const int argc, const char* argv[]) {
289289

290290
}
291291

292-
{
292+
if (0){
293293

294294
struct LSTMInput input = {};
295295

@@ -323,7 +323,7 @@ int main(const int argc, const char* argv[]) {
323323

324324
}
325325

326-
{
326+
for (int j=0; j<5; j++){
327327

328328
struct LSTMInput input = {};
329329

@@ -390,7 +390,7 @@ int main(const int argc, const char* argv[]) {
390390
}
391391
}
392392

393-
{
393+
for (int j=0; j<5; j++){
394394

395395
struct LSTMInput input = {};
396396

enzyme/benchmarks/ReverseMode/ba/Makefile.make

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,28 @@
44

55
dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)
66

7+
include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config
8+
9+
ifeq ($(strip $(CLANG)),)
10+
$(error PASSES1 is not set)
11+
endif
12+
13+
ifeq ($(strip $(PASSES1)),)
14+
$(error PASSES1 is not set)
15+
endif
16+
17+
ifeq ($(strip $(PASSES2)),)
18+
$(error PASSES2 is not set)
19+
endif
20+
21+
ifeq ($(strip $(PASSES3)),)
22+
$(error PASSES3 is not set)
23+
endif
24+
25+
ifneq ($(strip $(PASSES4)),)
26+
$(error PASSES4 is set)
27+
endif
28+
729
clean:
830
rm -f *.ll *.o results.txt results.json
931
cargo +enzyme clean
@@ -12,16 +34,13 @@ $(dir)/benchmarks/ReverseMode/ba/target/release/libbars.a: src/lib.rs Cargo.toml
1234
RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib --features=libm
1335

1436
%-unopt.ll: %.cpp
15-
clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
16-
17-
%-raw.ll: %-unopt.ll
18-
opt $^ $(LOAD) $(ENZYME) -o $@ -S
37+
$(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm
1938

20-
%-opt.ll: %-raw.ll
21-
opt $^ -o $@ -S
39+
%-opt.ll: %-unopt.ll
40+
$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
2241

2342
ba.o: ba-opt.ll $(dir)/benchmarks/ReverseMode/ba/target/release/libbars.a
24-
clang++ $(BENCH) -pthread -O2 $^ -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
43+
$(CLANG) -pthread -O3 -fno-math-errno $^ -o $@ $(BENCHLINK) -lm
2544

2645
results.json: ba.o
2746
numactl -C 1 ./$^

enzyme/benchmarks/ReverseMode/ba/ba.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,15 @@ void radial_distort(double const* rad_params, double *proj)
115115
proj[1] = proj[1] * L;
116116
}
117117

118+
void radial_distort_restrict(double const *__restrict rad_params, double *__restrict proj)
119+
{
120+
double rsq, L;
121+
rsq = sqsum(2, proj);
122+
L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq;
123+
proj[0] = proj[0] * L;
124+
proj[1] = proj[1] * L;
125+
}
126+
118127
void project_restrict(double const *__restrict cam, double const *__restrict X,
119128
double *__restrict proj) {
120129
double const* C = &cam[3];
@@ -129,7 +138,7 @@ void project_restrict(double const *__restrict cam, double const *__restrict X,
129138
proj[0] = Xcam[0] / Xcam[2];
130139
proj[1] = Xcam[1] / Xcam[2];
131140

132-
radial_distort(&cam[9], proj);
141+
radial_distort_restrict(&cam[9], proj);
133142

134143
proj[0] = proj[0] * cam[6] + cam[7];
135144
proj[1] = proj[1] * cam[6] + cam[8];

enzyme/benchmarks/ReverseMode/ba/src/safe.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,9 @@ fn rust_ba_objective(
182182

183183
#[no_mangle]
184184
extern "C" fn rust2_ba_objective(
185-
n: usize,
186-
m: usize,
187-
p: usize,
185+
n: i32,
186+
m: i32,
187+
p: i32,
188188
cams: *const f64,
189189
x: *const f64,
190190
w: *const f64,
@@ -193,6 +193,9 @@ extern "C" fn rust2_ba_objective(
193193
reproj_err: *mut f64,
194194
w_err: *mut f64,
195195
) {
196+
let n = n as usize;
197+
let m = m as usize;
198+
let p = p as usize;
196199
let cams = unsafe { std::slice::from_raw_parts(cams, n * 11) };
197200
let x = unsafe { std::slice::from_raw_parts(x, m * 3) };
198201
let w = unsafe { std::slice::from_raw_parts(w, p) };

enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ pub unsafe fn compute_reproj_error(
110110

111111
#[no_mangle]
112112
unsafe extern "C" fn rust2_unsafe_ba_objective(
113-
n: usize,
114-
m: usize,
115-
p: usize,
113+
n: i32,
114+
m: i32,
115+
p: i32,
116116
cams: *const f64,
117117
x: *const f64,
118118
w: *const f64,
@@ -121,6 +121,9 @@ unsafe extern "C" fn rust2_unsafe_ba_objective(
121121
reproj_err: *mut f64,
122122
w_err: *mut f64,
123123
) {
124+
let n = n as usize;
125+
let m = m as usize;
126+
let p = p as usize;
124127
for i in 0..p {
125128
let cam_idx = *obs.add(i * 2 + 0) as usize;
126129
let pt_idx = *obs.add(i * 2 + 1) as usize;

enzyme/benchmarks/ReverseMode/fft/Makefile.make

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,50 @@
44

55
dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)
66

7+
include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config
8+
9+
ifeq ($(strip $(CLANG)),)
10+
$(error PASSES1 is not set)
11+
endif
12+
13+
ifeq ($(strip $(PASSES1)),)
14+
$(error PASSES1 is not set)
15+
endif
16+
17+
ifeq ($(strip $(PASSES2)),)
18+
$(error PASSES2 is not set)
19+
endif
20+
21+
ifeq ($(strip $(PASSES3)),)
22+
$(error PASSES3 is not set)
23+
endif
24+
25+
ifneq ($(strip $(PASSES4)),)
26+
$(error PASSES4 is set)
27+
endif
28+
729
clean:
830
rm -f *.ll *.o results.txt results.json
931

1032
$(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a: src/lib.rs Cargo.toml
1133
RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib
1234

1335
%-unopt.ll: %.cpp
14-
clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
36+
$(CLANG) $(BENCH) $^ -DCPP=1 -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit
37+
%-unoptr.ll: %.cpp
38+
$(CLANG) $(BENCH) $^ -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit
1539

16-
%-raw.ll: %-unopt.ll
17-
opt $^ $(LOAD) $(ENZYME) -o $@ -S
1840

19-
%-opt.ll: %-raw.ll
20-
opt $^ -o $@ -S
41+
%-opt.ll: %-unopt.ll
42+
$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
43+
%-optr.ll: %-unoptr.ll
44+
$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
2145

2246
fft.o: fft-opt.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
23-
clang++ $(BENCH) -pthread -O2 $^ -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
24-
#clang++ $(LOAD) $(BENCH) fft.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o fft.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
47+
$(CLANG) -DCPP=1 -pthread -O3 -fno-math-errno -fno-plt -lpthread -lm $^ -o $@ $(BENCHLINK) -lm
48+
fftr.o: fft-optr.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
49+
$(CLANG) -pthread -O3 -fno-math-errno -fno-plt -lpthread -lm $^ -o $@ $(BENCHLINK) -lm
2550

26-
results.json: fft.o
27-
./$^ 1048576 | tee $@
51+
results.json: fftr.o fft.o
52+
numactl -C 1 ./fft.o 1048576 | tee results.json
53+
numactl -C 1 ./fftr.o 1048576 | tee resultsr.json

0 commit comments

Comments
 (0)