Skip to content
This repository was archived by the owner on Jan 25, 2023. It is now read-only.

Commit 2893849

Browse files
committed
Latest version.
1 parent a31ce71 commit 2893849

File tree

5 files changed

+133
-37
lines changed

5 files changed

+133
-37
lines changed

numba/lowering.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def post_lower(self):
156156
"""
157157
# Add omp offloading metadata if present.
158158
if hasattr(self, 'omp_offload'):
159+
print("Has omp_offload")
159160
omp_offload_metadata = self.module.get_or_insert_named_metadata('omp_offload.info')
160161
for oi in self.omp_offload:
161162
omp_offload_metadata.add(oi)

numba/npyufunc/parfor.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,11 @@ def get_next_offload_number(lowerer):
112112
return cur
113113

114114
class openmp_region_start(ir.Stmt):
115-
def __init__(self, tags, loc):
115+
def __init__(self, tags, region_number, loc):
116116
self.tags = tags
117-
self.omp_region_var = None
117+
self.region_number = region_number
118118
self.loc = loc
119+
self.omp_region_var = None
119120
self.omp_metadata = None
120121

121122
def has_target(self):
@@ -132,6 +133,8 @@ def lower(self, lowerer):
132133
builder = lowerer.builder
133134
library = lowerer.library
134135

136+
builder.module.device_triples = "spir64"
137+
135138
llvm_token_t = lc.Type.token()
136139
fnty = lir.FunctionType(llvm_token_t, [])
137140
pre_fn = builder.module.declare_intrinsic('llvm.directive.region.entry', (), fnty)
@@ -143,7 +146,8 @@ def lower(self, lowerer):
143146
lir.IntType(32)(lb.getFileIdForFile(self.loc.filename)), # File ID of the file with the entry.
144147
lowerer.fndesc.mangled_name, # Mangled name of the function with the entry.
145148
lir.IntType(32)(self.loc.line), # Line in the source file where with the entry.
146-
lir.IntType(32)(get_next_offload_number(lowerer)), # Order the entry was created.
149+
lir.IntType(32)(self.region_number), # Order the entry was created.
150+
#lir.IntType(32)(get_next_offload_number(lowerer)), # Order the entry was created.
147151
lir.IntType(32)(0) # Entry kind. Should always be 0 I think.
148152
])
149153
add_offload_info(lowerer, self.omp_metadata)
@@ -1433,7 +1437,7 @@ def _create_gufunc_for_parfor_body(
14331437
rhs = instr.value
14341438
if isinstance(rhs, ir.Expr) and rhs.op == 'call':
14351439
# print("definitions", gufunc_ir._definitions)
1436-
# pdb.set_trace()
1440+
#pdb.set_trace()
14371441
# func_def = get_definition(gufunc_ir, rhs.func)
14381442
callname = find_callname(gufunc_ir, rhs)
14391443
# print("func_def", func_def, type(func_def))

numba/parfor.py

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2739,7 +2739,7 @@ def lower_parfor_sequential(typingctx, func_ir, typemap, calltypes, openmp):
27392739
new_blocks = {}
27402740
for (block_label, block) in func_ir.blocks.items():
27412741
block_label, parfor_found = _lower_parfor_sequential_block(
2742-
block_label, block, new_blocks, typemap, calltypes, parfor_found, openmp, typingctx)
2742+
block_label, block, new_blocks, typemap, calltypes, parfor_found, openmp, typingctx, func_ir)
27432743
# old block stays either way
27442744
new_blocks[block_label] = block
27452745
func_ir.blocks = new_blocks
@@ -2761,11 +2761,10 @@ def lower_parfor_sequential(typingctx, func_ir, typemap, calltypes, openmp):
27612761
next_region_end_index = 0
27622762
for i, inst in enumerate(block.body):
27632763
if inst in openmp_ends:
2764-
block.body[next_region_end_index=1:i+1] = block.body[next_region_end_index:i]
2764+
block.body[next_region_end_index+1:i+1] = block.body[next_region_end_index:i]
27652765
block.body[next_region_end_index] = inst
27662766
next_region_end_index += 1
27672767
dprint("Found openmp end")
2768-
break
27692768
dprint_func_ir(func_ir, "after elevating openmp end")
27702769
return
27712770

@@ -2914,6 +2913,14 @@ def add_prints(block, typemap, calltypes):
29142913

29152914
return new_block
29162915

2916+
def get_next_region_number(func_ir):
2917+
if hasattr(func_ir, 'offload_number'):
2918+
cur = func_ir.offload_number
2919+
else:
2920+
cur = 0
2921+
func_ir.offload_number = cur + 1
2922+
return cur
2923+
29172924
def _lower_parfor_openmp(
29182925
block_label,
29192926
block,
@@ -2922,7 +2929,8 @@ def _lower_parfor_openmp(
29222929
calltypes,
29232930
parfor_found,
29242931
openmp,
2925-
typingctx):
2932+
typingctx,
2933+
func_ir):
29262934
add_directives = True
29272935

29282936
global omp_count
@@ -3023,8 +3031,12 @@ def _lower_parfor_openmp(
30233031
ompubvar, ompubvar_assign = create_assign_var("ompub", types.uintp, subexpr, scope, loc, typemap)
30243032
inst.init_block.body.append(ompubvar_assign)
30253033

3034+
region_number = get_next_region_number(func_ir)
3035+
30263036
# ----------------------------------------------------------------
30273037

3038+
print("PyArg_UnpackTuple symbol", llvmlite.binding.address_of_symbol('PyArg_UnpackTuple'))
3039+
# if openmp == True or openmp == 'cpu' or openmp == 'target':
30283040
if openmp == True or openmp == 'cpu':
30293041
openmp_start_tags = [ openmp_tag("DIR.OMP.PARALLEL.LOOP") ]
30303042
openmp_end_tags = [ openmp_tag("DIR.OMP.END.PARALLEL.LOOP") ]
@@ -3046,7 +3058,7 @@ def _lower_parfor_openmp(
30463058
openmp_start_tags.append(openmp_tag("QUAL.OMP.NORMALIZED.UB", ompubvar))
30473059

30483060
# Add OpenMP LLVM directives.
3049-
or_start = numba.npyufunc.parfor.openmp_region_start(openmp_start_tags, loc)
3061+
or_start = numba.npyufunc.parfor.openmp_region_start(openmp_start_tags, region_number, loc)
30503062
# Append OpenMP directive right to the block right before the loop.
30513063

30523064
if omp_count != -1:
@@ -3067,12 +3079,24 @@ def _lower_parfor_openmp(
30673079
block.body.insert(0, or_end)
30683080
elif openmp == 'target':
30693081
openmp_target_start_tags = [ openmp_tag("DIR.OMP.TARGET"),
3070-
openmp_tag("QUAL.OMP.OFFLOAD.ENTRY.IDX", 0) ]
3082+
openmp_tag("QUAL.OMP.OFFLOAD.ENTRY.IDX", region_number),
3083+
openmp_tag("QUAL.OMP.PRIVATE", index_variable),
3084+
openmp_tag("QUAL.OMP.FIRSTPRIVATE", omplbvar),
3085+
openmp_tag("QUAL.OMP.FIRSTPRIVATE", ompubvar)]
30713086
openmp_target_end_tags = [ openmp_tag("DIR.OMP.END.TARGET") ]
3072-
openmp_teams_start_tags = [ openmp_tag("DIR.OMP.TEAMS") ]
3087+
3088+
openmp_teams_start_tags = [ openmp_tag("DIR.OMP.TEAMS"),
3089+
openmp_tag("QUAL.OMP.PRIVATE", index_variable),
3090+
openmp_tag("QUAL.OMP.SHARED", omplbvar),
3091+
openmp_tag("QUAL.OMP.SHARED", ompubvar)]
30733092
openmp_teams_end_tags = [ openmp_tag("DIR.OMP.END.TEAMS") ]
3093+
30743094
openmp_distparloop_start_tags = [ openmp_tag("DIR.OMP.DISTRIBUTE.PARLOOP"),
3075-
openmp_tag("QUAL.OMP.SCHEDULE.STATIC", 1) ]
3095+
openmp_tag("QUAL.OMP.SCHEDULE.STATIC", 1),
3096+
openmp_tag("QUAL.OMP.PRIVATE", index_variable),
3097+
openmp_tag("QUAL.OMP.FIRSTPRIVATE", omplbvar),
3098+
openmp_tag("QUAL.OMP.NORMALIZED.IV", index_variable),
3099+
openmp_tag("QUAL.OMP.NORMALIZED.UB", ompubvar)]
30763100
openmp_distparloop_end_tags = [ openmp_tag("DIR.OMP.END.DISTRIBUTE.PARLOOP") ]
30773101

30783102
# for i in range(ndims):
@@ -3097,35 +3121,28 @@ def _lower_parfor_openmp(
30973121
openmp_start_tags.append(openmp_tag("QUAL.OMP.COLLAPSE", ndims))
30983122

30993123
# Add OpenMP LLVM directives.
3100-
target_start = numba.npyufunc.parfor.openmp_region_start(openmp_target_start_tags, loc)
3101-
# Append OpenMP directive right to the block right before the loop.
3102-
if add_directives:
3103-
inst.init_block.body.append(target_start)
3124+
target_start = numba.npyufunc.parfor.openmp_region_start(openmp_target_start_tags, region_number, loc)
3125+
teams_start = numba.npyufunc.parfor.openmp_region_start(openmp_teams_start_tags, region_number, loc)
3126+
distparloop_start = numba.npyufunc.parfor.openmp_region_start(openmp_distparloop_start_tags, region_number, loc)
31043127

3105-
teams_start = numba.npyufunc.parfor.openmp_region_start(openmp_teams_start_tags, loc)
3106-
# Append OpenMP directive right to the block right before the loop.
3128+
# Append OpenMP directives to the block right before the loop.
31073129
if add_directives:
3130+
inst.init_block.body.append(target_start)
31083131
inst.init_block.body.append(teams_start)
3109-
3110-
distparloop_start = numba.npyufunc.parfor.openmp_region_start(openmp_distparloop_start_tags, loc)
3111-
# Append OpenMP directive right to the block right before the loop.
3112-
if add_directives:
31133132
inst.init_block.body.append(distparloop_start)
31143133

31153134
distparloop_end = numba.npyufunc.parfor.openmp_region_end(distparloop_start, openmp_distparloop_end_tags, loc)
3116-
# Prepend OpenMP directive right to the block right after the loop.
3117-
if add_directives:
3118-
block.body.insert(0, distparloop_end)
3119-
31203135
teams_end = numba.npyufunc.parfor.openmp_region_end(teams_start, openmp_teams_end_tags, loc)
3121-
# Prepend OpenMP directive right to the block right after the loop.
3122-
if add_directives:
3123-
block.body.insert(0, teams_end)
3124-
31253136
target_end = numba.npyufunc.parfor.openmp_region_end(target_start, openmp_target_end_tags, loc)
3126-
# Prepend OpenMP directive right to the block right after the loop.
3137+
3138+
# Prepend OpenMP directives to the block right after the loop.
31273139
if add_directives:
3140+
# Because we always insert at position 0, we insert in the reverse
3141+
# order we want them to be in the IR (which is itself the reverse
3142+
# order that the region.entry directives were inserted.
31283143
block.body.insert(0, target_end)
3144+
block.body.insert(0, teams_end)
3145+
block.body.insert(0, distparloop_end)
31293146

31303147
inst.init_block.body.append(pi_assign)
31313148
inst.init_block.body.append(omplbvar_assign)
@@ -3166,6 +3183,7 @@ def _lower_parfor_openmp(
31663183

31673184
# Make init block jump to top test.
31683185
inst.init_block.body.append(ir.Jump(top_test_label, loc))
3186+
#inst.init_block.body.append(ir.Jump(jump_to_body_block_label, loc))
31693187
if not collapse and ndims > 1:
31703188
parfor_copy = copy.copy(inst)
31713189
parfor_copy.init_block = ir.Block(scope, loc)
@@ -3191,7 +3209,7 @@ def _lower_parfor_openmp(
31913209
# Add parfor body to blocks
31923210
for (l, b) in inst.loop_body.items():
31933211
l, parfor_found = _lower_parfor_sequential_block(
3194-
l, b, new_blocks, typemap, calltypes, parfor_found, False, typingctx)
3212+
l, b, new_blocks, typemap, calltypes, parfor_found, False, typingctx, func_ir)
31953213
#new_blocks[l] = add_prints(b, typemap, calltypes)
31963214
new_blocks[l] = b
31973215

@@ -3296,10 +3314,11 @@ def _lower_parfor_sequential_block(
32963314
calltypes,
32973315
parfor_found,
32983316
openmp,
3299-
typingctx):
3317+
typingctx,
3318+
func_ir):
33003319
if openmp != False:
33013320
return _lower_parfor_openmp(block_label, block, new_blocks, typemap,
3302-
calltypes, parfor_found, openmp, typingctx)
3321+
calltypes, parfor_found, openmp, typingctx, func_ir)
33033322

33043323
scope = block.scope
33053324
i = _find_first_parfor(block.body)

numba/targets/codegen.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
'i886', 'i986'])
2323

2424

25+
Todd = True
26+
2527
def _is_x86(triple):
2628
arch = triple.split('-')[0]
2729
return arch in _x86arch
@@ -58,6 +60,32 @@ def display(self, filename=None, view=False):
5860
def __repr__(self):
5961
return self.dot
6062

63+
def test_link():
64+
ll.initialize()
65+
ll.initialize_all_targets()
66+
ll.initialize_native_asmprinter()
67+
68+
target = ll.Target.from_triple(ll.get_process_triple())
69+
tm = target.create_target_machine()
70+
71+
72+
llvm_module = ll.parse_assembly("""
73+
declare i32 @PyArg_UnpackTuple(i8*, i8*, i64, i64, ...)
74+
declare double @sin(double)
75+
define i64 @foo() {
76+
ret i64 ptrtoint (i32 (i8*, i8*, i64, i64, ...)* @PyArg_UnpackTuple to i64)
77+
}
78+
""")
79+
80+
engine = ll.create_mcjit_compiler(llvm_module, tm)
81+
addr = engine.get_function_address('PyArg_UnpackTuple')
82+
print('PyArg_UnpackTuple', addr) # printing 0x0
83+
84+
85+
addr = engine.get_function_address('foo')
86+
foo = ctypes.CFUNCTYPE(ctypes.c_int64)(addr)
87+
print('foo', addr)
88+
print("PyArg_UnpackTuple", foo()) # print non zero
6189

6290
class CodeLibrary(object):
6391
"""
@@ -190,17 +218,20 @@ def add_ir_module(self, ir_module):
190218
self.add_llvm_module(ll_module)
191219

192220
def add_llvm_module(self, ll_module):
221+
print("CodeLibrary::add_llvm_module", self._name)
193222
self._optimize_functions(ll_module)
194223
# TODO: we shouldn't need to recreate the LLVM module object
195224
ll_module = remove_redundant_nrt_refct(ll_module)
196225
self._final_module.link_in(ll_module)
226+
print("CodeLibrary::add_llvm_module end", self._name)
197227

198228
def finalize(self):
199229
"""
200230
Finalize the library. After this call, nothing can be added anymore.
201231
Finalization involves various stages of code optimization and
202232
linking.
203233
"""
234+
print("CodeLibrary::finalize", self._name)
204235
require_global_compiler_lock()
205236

206237
# Report any LLVM-related problems to the user
@@ -211,6 +242,7 @@ def finalize(self):
211242
if config.DUMP_FUNC_OPT:
212243
dump("FUNCTION OPTIMIZED DUMP %s" % self._name, self.get_llvm_str())
213244

245+
print("Before link_in")
214246
# Link libraries for shared code
215247
for library in self._linking_libraries:
216248
self._final_module.link_in(
@@ -222,8 +254,12 @@ def finalize(self):
222254

223255
self._final_module.verify()
224256
self._finalize_final_module()
257+
print("CodeLibrary::finalize end", self._name)
258+
if self._name == 'f1':
259+
import pdb
260+
#pdb.set_trace()
225261

226-
def _finalize_dyanmic_globals(self):
262+
def _finalize_dynamic_globals(self):
227263
# Scan for dynamic globals
228264
for gv in self._final_module.global_variables:
229265
if gv.name.startswith('numba.dynamic.globals'):
@@ -241,7 +277,11 @@ def _finalize_final_module(self):
241277
"""
242278
Make the underlying LLVM module ready to use.
243279
"""
244-
self._finalize_dyanmic_globals()
280+
if self._name == "f1":
281+
import pdb
282+
#pdb.set_trace()
283+
284+
self._finalize_dynamic_globals()
245285
self._verify_declare_only_symbols()
246286

247287
# Remember this on the module, for the object cache hooks
@@ -261,6 +301,7 @@ def _finalize_final_module(self):
261301
dump("OPTIMIZED DUMP %s" % self._name, self.get_llvm_str())
262302

263303
if config.DUMP_ASSEMBLY:
304+
test_link()
264305
# CUDA backend cannot return assembly this early, so don't
265306
# attempt to dump assembly if nothing is produced.
266307
asm = self.get_asm_str()
@@ -499,6 +540,8 @@ def scan_unresolved_symbols(self, module, engine):
499540
prefix = self.PREFIX
500541

501542
for gv in module.global_variables:
543+
if Todd:
544+
print("scan_unresolved_symbols", gv)
502545
if gv.name.startswith(prefix):
503546
sym = gv.name[len(prefix):]
504547
# Avoid remapping to existing GV
@@ -515,17 +558,23 @@ def scan_defined_symbols(self, module):
515558
Scan and track all defined symbols.
516559
"""
517560
for fn in module.functions:
561+
if Todd:
562+
print("scan_defined_symbols", fn)
518563
if not fn.is_declaration:
519564
self._defined.add(fn.name)
520565

521566
def resolve(self, engine):
522567
"""
523568
Fix unresolved symbols if they are defined.
524569
"""
570+
if Todd:
571+
print("RuntimeLinker resolve", self._unresolved, self._defined)
525572
# An iterator to get all unresolved but available symbols
526573
pending = [name for name in self._unresolved if name in self._defined]
527574
# Resolve pending symbols
528575
for name in pending:
576+
if Todd:
577+
print("name", name)
529578
# Get runtime address
530579
fnptr = engine.get_function_address(name)
531580
# Fix all usage
@@ -570,13 +619,17 @@ def _load_defined_symbols(self, mod):
570619
"""Extract symbols from the module
571620
"""
572621
for gsets in (mod.functions, mod.global_variables):
622+
if Todd:
623+
print("_load_defined_symbols", gsets, self._defined_symbols)
573624
self._defined_symbols |= {gv.name for gv in gsets
574625
if not gv.is_declaration}
575626

576627
def add_module(self, module):
577628
"""Override ExecutionEngine.add_module
578629
to keep info about defined symbols.
579630
"""
631+
if Todd:
632+
print("add_module", module)
580633
self._load_defined_symbols(module)
581634
return self._ee.add_module(module)
582635

0 commit comments

Comments
 (0)