Skip to content

Commit fa996e3

Browse files
(numba/dppl) Copy ir & funcdesc using same cache. Introduce new function to copy IR. Do not copy non-copyable objects (#78)
1 parent 0539b82 commit fa996e3

File tree

2 files changed

+199
-101
lines changed

2 files changed

+199
-101
lines changed

dppl_lowerer.py

Lines changed: 178 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -962,16 +962,189 @@ def __init__(self, *args, **kwargs):
962962
super().__init__(*args, **kwargs)
963963

964964

965+
def relatively_deep_copy(obj, memo):
966+
# WARNING: there are some issues with genarators which were not investigated and root cause is not found.
967+
# Though copied IR seems to work fine there are some extra references kept on generator objects which may result
968+
# in memory "leak"
969+
970+
obj_id = id(obj)
971+
if obj_id in memo:
972+
return memo[obj_id]
973+
974+
from numba.core.dispatcher import _DispatcherBase
975+
from numba.core.types.functions import Function, Dispatcher
976+
from numba.core.bytecode import FunctionIdentity
977+
from numba.core.typing.templates import Signature
978+
from numba.dppl.compiler import DPPLFunctionTemplate
979+
from numba.core.compiler import CompileResult
980+
from numba.np.ufunc.dufunc import DUFunc
981+
from ctypes import _CFuncPtr
982+
from cffi.api import FFI
983+
from types import ModuleType
984+
from numba.core.types.abstract import Type
985+
986+
# objects which shouldn't or can't be copied and it's ok not to copy it.
987+
if isinstance(obj, (FunctionIdentity, _DispatcherBase, Function, Type, Dispatcher, ModuleType,
988+
Signature, DPPLFunctionTemplate, CompileResult,
989+
DUFunc, _CFuncPtr, FFI,
990+
type, str, bool, type(None))):
991+
return obj
992+
993+
from numba.core.ir import Global, FreeVar
994+
from numba.core.ir import FunctionIR
995+
from numba.core.postproc import PostProcessor
996+
from numba.core.funcdesc import FunctionDescriptor
997+
998+
if isinstance(obj, FunctionDescriptor):
999+
cpy = FunctionDescriptor(native=obj.native, modname=obj.modname, qualname=obj.qualname,
1000+
unique_name=obj.unique_name, doc=obj.doc,
1001+
typemap=relatively_deep_copy(obj.typemap, memo),
1002+
restype=obj.restype,
1003+
calltypes=relatively_deep_copy(obj.calltypes, memo),
1004+
args=obj.args, kws=obj.kws, mangler=None,
1005+
argtypes=relatively_deep_copy(obj.argtypes, memo),
1006+
inline=obj.inline, noalias=obj.noalias, env_name=obj.env_name,
1007+
global_dict=obj.global_dict)
1008+
# mangler parameter is not saved in FunctionDescriptor, but used to generated name.
1009+
# So pass None as mangler parameter and then copy mangled_name by hands
1010+
cpy.mangled_name = obj.mangled_name
1011+
1012+
memo[obj_id] = cpy
1013+
1014+
return cpy
1015+
1016+
if isinstance(obj, FunctionIR):
1017+
# PostProcessor do the following:
1018+
# 1. canonicolize cfg, modifying IR
1019+
# 2. fills internal generators status
1020+
# 3. creates and fills VariableLifetime object
1021+
# We can't copy this objects. So in order to have copy of it we need run PostProcessor on copied IR.
1022+
# This means, that in case PostProcess wasn't run for original object copied object would defer.
1023+
# In order to avoid this we are running PostProcess on original object firstly.
1024+
# This means that copy of IR actually has a side effect on it.
1025+
pp = PostProcessor(obj)
1026+
pp.run()
1027+
cpy = FunctionIR(blocks=relatively_deep_copy(obj.blocks, memo),
1028+
is_generator=relatively_deep_copy(obj.is_generator, memo),
1029+
func_id=relatively_deep_copy(obj.func_id, memo),
1030+
loc=obj.loc,
1031+
definitions=relatively_deep_copy(obj._definitions, memo),
1032+
arg_count=obj.arg_count,
1033+
arg_names=relatively_deep_copy(obj.arg_names, memo))
1034+
pp = PostProcessor(cpy)
1035+
pp.run()
1036+
1037+
memo[obj_id] = cpy
1038+
1039+
return cpy
1040+
1041+
if isinstance(obj, Global):
1042+
cpy = Global(name=obj.name, value=obj.value, loc=obj.loc)
1043+
memo[obj_id] = cpy
1044+
1045+
return cpy
1046+
1047+
if isinstance(obj, FreeVar):
1048+
cpy = FreeVar(index=obj.index, name=obj.name, value=obj.value, loc=obj.loc)
1049+
memo[obj_id] = cpy
1050+
1051+
return cpy
1052+
1053+
# for containers we need to copy container itself first. And then fill it with copied items.
1054+
if isinstance(obj, list):
1055+
cpy = copy.copy(obj)
1056+
cpy.clear()
1057+
for item in obj:
1058+
cpy.append(relatively_deep_copy(item, memo))
1059+
memo[obj_id] = cpy
1060+
return cpy
1061+
elif isinstance(obj, dict):
1062+
cpy = copy.copy(obj)
1063+
cpy.clear()
1064+
for key, item in obj.items():
1065+
cpy[relatively_deep_copy(key, memo)] = relatively_deep_copy(item, memo)
1066+
memo[obj_id] = cpy
1067+
return cpy
1068+
elif isinstance(obj, tuple):
1069+
# subclass constructors could have different parameters than superclass.
1070+
# e.g. tuple and namedtuple constructors accepts quite different parameters.
1071+
# it is better to have separate section for namedtuple
1072+
tpl = tuple([relatively_deep_copy(item, memo) for item in obj])
1073+
if type(obj) == tuple:
1074+
cpy = tpl
1075+
else:
1076+
cpy = type(obj)(*tpl)
1077+
memo[obj_id] = cpy
1078+
return cpy
1079+
elif isinstance(obj, set):
1080+
cpy = copy.copy(obj)
1081+
cpy.clear()
1082+
for item in obj:
1083+
cpy.add(relatively_deep_copy(item, memo))
1084+
memo[obj_id] = cpy
1085+
return cpy
1086+
1087+
# some python objects are not copyable. In such case exception would be raised
1088+
# it is just a convinient point to find such objects
1089+
try:
1090+
cpy = copy.copy(obj)
1091+
except Exception as e:
1092+
raise e
1093+
1094+
# __slots__ for subclass specify only members declared in subclass. So to get all members we need to go through
1095+
# all supeclasses
1096+
def get_slots_members(obj):
1097+
keys = []
1098+
typ = obj
1099+
if not isinstance(typ, type):
1100+
typ = type(obj)
1101+
1102+
try:
1103+
if len(typ.__slots__):
1104+
keys.extend(typ.__slots__)
1105+
if len(typ.__bases__):
1106+
for base in typ.__bases__:
1107+
keys.extend(get_slots_members(base))
1108+
except:
1109+
pass
1110+
1111+
return keys
1112+
1113+
memo[obj_id] = cpy
1114+
keys = []
1115+
1116+
# Objects have either __dict__ or __slots__ or neither.
1117+
# If object has none of it and it is copyable we already made a copy, just return it
1118+
# If object is not copyable we shouldn't reach this point.
1119+
try:
1120+
keys = obj.__dict__.keys()
1121+
except:
1122+
try:
1123+
obj.__slots__
1124+
keys = get_slots_members(obj)
1125+
except:
1126+
return cpy
1127+
1128+
for key in keys:
1129+
attr = getattr(obj, key)
1130+
attr_cpy = relatively_deep_copy(attr, memo)
1131+
setattr(cpy, key, attr_cpy)
1132+
1133+
return cpy
1134+
1135+
9651136
class DPPLLower(Lower):
9661137
def __init__(self, context, library, fndesc, func_ir, metadata=None):
9671138
Lower.__init__(self, context, library, fndesc, func_ir, metadata)
968-
fndesc_cpu = copy.copy(fndesc)
969-
fndesc_cpu.calltypes = fndesc.calltypes.copy()
970-
fndesc_cpu.typemap = fndesc.typemap.copy()
1139+
memo = {}
1140+
1141+
fndesc_cpu = relatively_deep_copy(fndesc, memo)
1142+
func_ir_cpu = relatively_deep_copy(func_ir, memo)
1143+
9711144

9721145
cpu_context = context.cpu_context if isinstance(context, DPPLTargetContext) else context
973-
self.gpu_lower = Lower(context, library, fndesc, func_ir.copy(), metadata)
974-
self.cpu_lower = Lower(cpu_context, library, fndesc_cpu, func_ir.copy(), metadata)
1146+
self.gpu_lower = Lower(context, library, fndesc, func_ir, metadata)
1147+
self.cpu_lower = Lower(cpu_context, library, fndesc_cpu, func_ir_cpu, metadata)
9751148

9761149
def lower(self):
9771150
# Basically we are trying to lower on GPU first and if failed - try to lower on CPU.
@@ -991,11 +1164,9 @@ def lower(self):
9911164
# different solution should be used.
9921165

9931166
try:
994-
#lowering.lower_extensions[parfor.Parfor] = lower_parfor_rollback
9951167
lowering.lower_extensions[parfor.Parfor].append(lower_parfor_rollback)
9961168
self.gpu_lower.lower()
9971169
self.base_lower = self.gpu_lower
998-
#lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel
9991170
lowering.lower_extensions[parfor.Parfor].pop()
10001171
except Exception as e:
10011172
if numba.dppl.compiler.DEBUG:
@@ -1015,80 +1186,13 @@ def create_cpython_wrapper(self, release_gil=False):
10151186

10161187

10171188
def copy_block(block):
1018-
def relatively_deep_copy(obj, memo):
1019-
obj_id = id(obj)
1020-
if obj_id in memo:
1021-
return memo[obj_id]
1022-
1023-
from numba.core.dispatcher import Dispatcher
1024-
from numba.core.types.functions import Function
1025-
from types import ModuleType
1026-
1027-
if isinstance(obj, (Dispatcher, Function, ModuleType)):
1028-
return obj
1029-
1030-
if isinstance(obj, list):
1031-
cpy = copy.copy(obj)
1032-
cpy.clear()
1033-
for item in obj:
1034-
cpy.append(relatively_deep_copy(item, memo))
1035-
memo[obj_id] = cpy
1036-
return cpy
1037-
elif isinstance(obj, dict):
1038-
cpy = copy.copy(obj)
1039-
cpy.clear()
1040-
# do we need to copy keys?
1041-
for key, item in obj.items():
1042-
cpy[relatively_deep_copy(key, memo)] = relatively_deep_copy(item, memo)
1043-
memo[obj_id] = cpy
1044-
return cpy
1045-
elif isinstance(obj, tuple):
1046-
cpy = type(obj)([relatively_deep_copy(item, memo) for item in obj])
1047-
memo[obj_id] = cpy
1048-
return cpy
1049-
elif isinstance(obj, set):
1050-
cpy = copy.copy(obj)
1051-
cpy.clear()
1052-
for item in obj:
1053-
cpy.add(relatively_deep_copy(item, memo))
1054-
memo[obj_id] = cpy
1055-
return cpy
1056-
1057-
cpy = copy.copy(obj)
1058-
1059-
memo[obj_id] = cpy
1060-
keys = []
1061-
try:
1062-
keys = obj.__dict__.keys()
1063-
except:
1064-
try:
1065-
keys = obj.__slots__
1066-
except:
1067-
return cpy
1068-
1069-
for key in keys:
1070-
attr = getattr(obj, key)
1071-
attr_cpy = relatively_deep_copy(attr, memo)
1072-
setattr(cpy, key, attr_cpy)
1073-
1074-
return cpy
1075-
10761189
memo = {}
10771190
new_block = ir.Block(block.scope, block.loc)
10781191
new_block.body = [relatively_deep_copy(stmt, memo) for stmt in block.body]
10791192
return new_block
10801193

10811194

10821195
def lower_parfor_rollback(lowerer, parfor):
1083-
try:
1084-
cache_parfor_races = copy.copy(parfor.races)
1085-
cache_parfor_params = copy.copy(parfor.params)
1086-
cache_parfor_loop_body = {key: copy_block(block) for key, block in parfor.loop_body.items()}
1087-
cache_parfor_init_block = parfor.init_block.copy()
1088-
cache_parfor_loop_nests = parfor.loop_nests.copy()
1089-
except Exception as e:
1090-
raise CopyIRException("Failed to copy IR") from e
1091-
10921196
try:
10931197
_lower_parfor_gufunc(lowerer, parfor)
10941198
if numba.dppl.compiler.DEBUG:
@@ -1098,12 +1202,6 @@ def lower_parfor_rollback(lowerer, parfor):
10981202
msg = "Failed to lower parfor on DPPL-device.\nTo see details set environment variable NUMBA_DPPL_DEBUG=1"
10991203
warnings.warn(NumbaPerformanceWarning(msg, parfor.loc))
11001204
raise e
1101-
finally:
1102-
parfor.params = cache_parfor_params
1103-
parfor.loop_body = cache_parfor_loop_body
1104-
parfor.init_block = cache_parfor_init_block
1105-
parfor.loop_nests = cache_parfor_loop_nests
1106-
parfor.races = cache_parfor_races
11071205

11081206

11091207
def dppl_lower_array_expr(lowerer, expr):

tests/dppl/test_dppl_fallback.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,20 @@
66
from numba import dppl
77
from numba.dppl.testing import unittest
88
from numba.dppl.testing import DPPLTestCase
9+
from numba.tests.support import captured_stderr
910
import dpctl
1011
import sys
1112
import io
1213

1314

1415
@unittest.skipUnless(dpctl.has_gpu_queues(), 'test only on GPU system')
1516
class TestDPPLFallback(DPPLTestCase):
16-
17-
def capture_stderr(self, func):
18-
backup = sys.stderr
19-
sys.stderr = io.StringIO()
20-
result = func()
21-
out = sys.stderr.getvalue()
22-
sys.stderr.close()
23-
sys.stderr = backup
24-
25-
return out, result
26-
27-
def test_dppl_fallback(self):
28-
17+
def test_dppl_fallback_inner_call(self):
2918
@numba.jit
3019
def fill_value(i):
3120
return i
3221

33-
def np_rand_fallback():
22+
def inner_call_fallback():
3423
x = 10
3524
a = np.empty(shape=x, dtype=np.float32)
3625

@@ -39,17 +28,28 @@ def np_rand_fallback():
3928

4029
return a
4130

42-
def run_dppl():
43-
dppl = numba.njit(parallel={'offload':True})(np_rand_fallback)
44-
return dppl()
31+
with captured_stderr() as msg:
32+
dppl = numba.njit(parallel={'offload':True})(inner_call_fallback)
33+
dppl_result = dppl()
34+
35+
ref_result = inner_call_fallback()
36+
37+
np.testing.assert_array_equal(dppl_result, ref_result)
38+
self.assertTrue('Failed to lower parfor on DPPL-device' in msg.getvalue())
39+
40+
def test_dppl_fallback_reductions(self):
41+
def reduction(a):
42+
return np.amax(a)
4543

46-
ref = np_rand_fallback
44+
a = np.ones(10)
45+
with captured_stderr() as msg:
46+
dppl = numba.njit(parallel={'offload':True})(reduction)
47+
dppl_result = dppl(a)
4748

48-
err, dppl_result = self.capture_stderr(run_dppl)
49-
ref_result = ref()
49+
ref_result = reduction(a)
5050

5151
np.testing.assert_array_equal(dppl_result, ref_result)
52-
self.assertTrue('Failed to lower parfor on DPPL-device' in err)
52+
self.assertTrue('Failed to lower parfor on DPPL-device' in msg.getvalue())
5353

5454

5555
if __name__ == '__main__':

0 commit comments

Comments
 (0)