Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit 6f07842

Browse files
committed
Optimize OpenMP device pass pipeline
1 parent fe17768 commit 6f07842

File tree

1 file changed

+20
-8
lines changed

1 file changed

+20
-8
lines changed

numba/openmp.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,9 +1942,13 @@ def call_conv(self):
19421942
# What to do here?
19431943
flags.forceinline = True
19441944
#flags.fastmath = True
1945+
flags.fastmath = state_copy.flags.fastmath
19451946
flags.release_gil = True
19461947
flags.nogil = True
19471948
flags.inline = "always"
1949+
# Giorgis: Is the following flag helping codegen optimization?
1950+
#if selected_device == 1:
1951+
# flags.compute_capability = (7, 0)
19481952
# Create a pipeline that only lowers the outlined target code. No need to
19491953
# compile because it has already gone through those passes.
19501954
if config.DEBUG_OPENMP >= 1:
@@ -2069,43 +2073,51 @@ def _get_target_image_in_memory(self, mod, filename_prefix):
20692073
# Lower openmp intrinsics.
20702074
with ll.create_module_pass_manager() as pm:
20712075
pm.add_intrinsics_openmp_pass()
2076+
pm.add_cfg_simplification_pass()
20722077
pm.run(mod)
20732078

20742079
if config.DEBUG_OPENMP_LLVM_PASS >= 1:
20752080
with open(filename_prefix + "-intrinsics_omp.ll", "w") as f:
20762081
f.write(str(mod))
20772082

2078-
mod.link_in(self.libs_mod)
2083+
mod.link_in(self.libs_mod, preserve=True)
20792084
# Internalize non-kernel function definitions.
20802085
for func in mod.functions:
2081-
if "__omp_offload_numba" in func.name:
2082-
continue
20832086
if func.is_declaration:
20842087
continue
2088+
if func.linkage != ll.Linkage.external:
2089+
continue
2090+
if "__omp_offload_numba" in func.name:
2091+
continue
20852092
func.linkage = "internal"
20862093

2094+
with ll.create_module_pass_manager() as pm:
2095+
self.tm.add_analysis_passes(pm)
2096+
pm.add_global_dce_pass()
2097+
pm.run(mod)
2098+
20872099
if config.DEBUG_OPENMP_LLVM_PASS >= 1:
20882100
with open(filename_prefix + "-intrinsics_omp-linked.ll", "w") as f:
20892101
f.write(str(mod))
20902102

20912103
# Run passes for optimization, including target-specific passes.
20922104
# Run function passes.
20932105
with ll.create_function_pass_manager(mod) as pm:
2094-
with create_pass_manager_builder(opt=3, loop_vectorize=True, slp_vectorize=True) as pmb:
2106+
self.tm.add_analysis_passes(pm)
2107+
with create_pass_manager_builder(opt=3, slp_vectorize=True, loop_vectorize=True) as pmb:
20952108
self.tm.adjust_pass_manager(pmb)
20962109
pmb.populate(pm)
2097-
self.tm.add_analysis_passes(pm)
20982110
for func in mod.functions:
20992111
pm.initialize()
21002112
pm.run(func)
21012113
pm.finalize()
21022114

21032115
# Run module passes.
21042116
with ll.create_module_pass_manager() as pm:
2105-
with create_pass_manager_builder(opt=3, loop_vectorize=True, slp_vectorize=True) as pmb:
2117+
self.tm.add_analysis_passes(pm)
2118+
with create_pass_manager_builder(opt=3, slp_vectorize=True, loop_vectorize=True) as pmb:
21062119
self.tm.adjust_pass_manager(pmb)
21072120
pmb.populate(pm)
2108-
self.tm.add_analysis_passes(pm)
21092121
pm.run(mod)
21102122

21112123
if config.DEBUG_OPENMP_LLVM_PASS >= 1:
@@ -2118,7 +2130,7 @@ def _get_target_image_in_memory(self, mod, filename_prefix):
21182130

21192131
if config.DEBUG_OPENMP_LLVM_PASS >= 1:
21202132
with open(filename_prefix + "-intrinsics_omp-linked-opt.s", "w") as f:
2121-
f.write(str(ptx))
2133+
f.write(ptx)
21222134

21232135
linker_kwargs = {}
21242136
for x in ompx_attrs:

0 commit comments

Comments
 (0)