@@ -1942,9 +1942,13 @@ def call_conv(self):
1942
1942
# What to do here?
1943
1943
flags .forceinline = True
1944
1944
#flags.fastmath = True
1945
+ flags .fastmath = state_copy .flags .fastmath
1945
1946
flags .release_gil = True
1946
1947
flags .nogil = True
1947
1948
flags .inline = "always"
1949
+ # Giorgis: Is the following flag helping codegen optimization?
1950
+ #if selected_device == 1:
1951
+ # flags.compute_capability = (7, 0)
1948
1952
# Create a pipeline that only lowers the outlined target code. No need to
1949
1953
# compile because it has already gone through those passes.
1950
1954
if config .DEBUG_OPENMP >= 1 :
@@ -2069,43 +2073,51 @@ def _get_target_image_in_memory(self, mod, filename_prefix):
2069
2073
# Lower openmp intrinsics.
2070
2074
with ll .create_module_pass_manager () as pm :
2071
2075
pm .add_intrinsics_openmp_pass ()
2076
+ pm .add_cfg_simplification_pass ()
2072
2077
pm .run (mod )
2073
2078
2074
2079
if config .DEBUG_OPENMP_LLVM_PASS >= 1 :
2075
2080
with open (filename_prefix + "-intrinsics_omp.ll" , "w" ) as f :
2076
2081
f .write (str (mod ))
2077
2082
2078
- mod .link_in (self .libs_mod )
2083
+ mod .link_in (self .libs_mod , preserve = True )
2079
2084
# Internalize non-kernel function definitions.
2080
2085
for func in mod .functions :
2081
- if "__omp_offload_numba" in func .name :
2082
- continue
2083
2086
if func .is_declaration :
2084
2087
continue
2088
+ if func .linkage != ll .Linkage .external :
2089
+ continue
2090
+ if "__omp_offload_numba" in func .name :
2091
+ continue
2085
2092
func .linkage = "internal"
2086
2093
2094
+ with ll .create_module_pass_manager () as pm :
2095
+ self .tm .add_analysis_passes (pm )
2096
+ pm .add_global_dce_pass ()
2097
+ pm .run (mod )
2098
+
2087
2099
if config .DEBUG_OPENMP_LLVM_PASS >= 1 :
2088
2100
with open (filename_prefix + "-intrinsics_omp-linked.ll" , "w" ) as f :
2089
2101
f .write (str (mod ))
2090
2102
2091
2103
# Run passes for optimization, including target-specific passes.
2092
2104
# Run function passes.
2093
2105
with ll .create_function_pass_manager (mod ) as pm :
2094
- with create_pass_manager_builder (opt = 3 , loop_vectorize = True , slp_vectorize = True ) as pmb :
2106
+ self .tm .add_analysis_passes (pm )
2107
+ with create_pass_manager_builder (opt = 3 , slp_vectorize = True , loop_vectorize = True ) as pmb :
2095
2108
self .tm .adjust_pass_manager (pmb )
2096
2109
pmb .populate (pm )
2097
- self .tm .add_analysis_passes (pm )
2098
2110
for func in mod .functions :
2099
2111
pm .initialize ()
2100
2112
pm .run (func )
2101
2113
pm .finalize ()
2102
2114
2103
2115
# Run module passes.
2104
2116
with ll .create_module_pass_manager () as pm :
2105
- with create_pass_manager_builder (opt = 3 , loop_vectorize = True , slp_vectorize = True ) as pmb :
2117
+ self .tm .add_analysis_passes (pm )
2118
+ with create_pass_manager_builder (opt = 3 , slp_vectorize = True , loop_vectorize = True ) as pmb :
2106
2119
self .tm .adjust_pass_manager (pmb )
2107
2120
pmb .populate (pm )
2108
- self .tm .add_analysis_passes (pm )
2109
2121
pm .run (mod )
2110
2122
2111
2123
if config .DEBUG_OPENMP_LLVM_PASS >= 1 :
@@ -2118,7 +2130,7 @@ def _get_target_image_in_memory(self, mod, filename_prefix):
2118
2130
2119
2131
if config .DEBUG_OPENMP_LLVM_PASS >= 1 :
2120
2132
with open (filename_prefix + "-intrinsics_omp-linked-opt.s" , "w" ) as f :
2121
- f .write (str ( ptx ) )
2133
+ f .write (ptx )
2122
2134
2123
2135
linker_kwargs = {}
2124
2136
for x in ompx_attrs :
0 commit comments