@@ -962,16 +962,189 @@ def __init__(self, *args, **kwargs):
962
962
super ().__init__ (* args , ** kwargs )
963
963
964
964
965
+ def relatively_deep_copy (obj , memo ):
966
+ # WARNING: there are some issues with genarators which were not investigated and root cause is not found.
967
+ # Though copied IR seems to work fine there are some extra references kept on generator objects which may result
968
+ # in memory "leak"
969
+
970
+ obj_id = id (obj )
971
+ if obj_id in memo :
972
+ return memo [obj_id ]
973
+
974
+ from numba .core .dispatcher import _DispatcherBase
975
+ from numba .core .types .functions import Function , Dispatcher
976
+ from numba .core .bytecode import FunctionIdentity
977
+ from numba .core .typing .templates import Signature
978
+ from numba .dppl .compiler import DPPLFunctionTemplate
979
+ from numba .core .compiler import CompileResult
980
+ from numba .np .ufunc .dufunc import DUFunc
981
+ from ctypes import _CFuncPtr
982
+ from cffi .api import FFI
983
+ from types import ModuleType
984
+ from numba .core .types .abstract import Type
985
+
986
+ # objects which shouldn't or can't be copied and it's ok not to copy it.
987
+ if isinstance (obj , (FunctionIdentity , _DispatcherBase , Function , Type , Dispatcher , ModuleType ,
988
+ Signature , DPPLFunctionTemplate , CompileResult ,
989
+ DUFunc , _CFuncPtr , FFI ,
990
+ type , str , bool , type (None ))):
991
+ return obj
992
+
993
+ from numba .core .ir import Global , FreeVar
994
+ from numba .core .ir import FunctionIR
995
+ from numba .core .postproc import PostProcessor
996
+ from numba .core .funcdesc import FunctionDescriptor
997
+
998
+ if isinstance (obj , FunctionDescriptor ):
999
+ cpy = FunctionDescriptor (native = obj .native , modname = obj .modname , qualname = obj .qualname ,
1000
+ unique_name = obj .unique_name , doc = obj .doc ,
1001
+ typemap = relatively_deep_copy (obj .typemap , memo ),
1002
+ restype = obj .restype ,
1003
+ calltypes = relatively_deep_copy (obj .calltypes , memo ),
1004
+ args = obj .args , kws = obj .kws , mangler = None ,
1005
+ argtypes = relatively_deep_copy (obj .argtypes , memo ),
1006
+ inline = obj .inline , noalias = obj .noalias , env_name = obj .env_name ,
1007
+ global_dict = obj .global_dict )
1008
+ # mangler parameter is not saved in FunctionDescriptor, but used to generated name.
1009
+ # So pass None as mangler parameter and then copy mangled_name by hands
1010
+ cpy .mangled_name = obj .mangled_name
1011
+
1012
+ memo [obj_id ] = cpy
1013
+
1014
+ return cpy
1015
+
1016
+ if isinstance (obj , FunctionIR ):
1017
+ # PostProcessor do the following:
1018
+ # 1. canonicolize cfg, modifying IR
1019
+ # 2. fills internal generators status
1020
+ # 3. creates and fills VariableLifetime object
1021
+ # We can't copy this objects. So in order to have copy of it we need run PostProcessor on copied IR.
1022
+ # This means, that in case PostProcess wasn't run for original object copied object would defer.
1023
+ # In order to avoid this we are running PostProcess on original object firstly.
1024
+ # This means that copy of IR actually has a side effect on it.
1025
+ pp = PostProcessor (obj )
1026
+ pp .run ()
1027
+ cpy = FunctionIR (blocks = relatively_deep_copy (obj .blocks , memo ),
1028
+ is_generator = relatively_deep_copy (obj .is_generator , memo ),
1029
+ func_id = relatively_deep_copy (obj .func_id , memo ),
1030
+ loc = obj .loc ,
1031
+ definitions = relatively_deep_copy (obj ._definitions , memo ),
1032
+ arg_count = obj .arg_count ,
1033
+ arg_names = relatively_deep_copy (obj .arg_names , memo ))
1034
+ pp = PostProcessor (cpy )
1035
+ pp .run ()
1036
+
1037
+ memo [obj_id ] = cpy
1038
+
1039
+ return cpy
1040
+
1041
+ if isinstance (obj , Global ):
1042
+ cpy = Global (name = obj .name , value = obj .value , loc = obj .loc )
1043
+ memo [obj_id ] = cpy
1044
+
1045
+ return cpy
1046
+
1047
+ if isinstance (obj , FreeVar ):
1048
+ cpy = FreeVar (index = obj .index , name = obj .name , value = obj .value , loc = obj .loc )
1049
+ memo [obj_id ] = cpy
1050
+
1051
+ return cpy
1052
+
1053
+ # for containers we need to copy container itself first. And then fill it with copied items.
1054
+ if isinstance (obj , list ):
1055
+ cpy = copy .copy (obj )
1056
+ cpy .clear ()
1057
+ for item in obj :
1058
+ cpy .append (relatively_deep_copy (item , memo ))
1059
+ memo [obj_id ] = cpy
1060
+ return cpy
1061
+ elif isinstance (obj , dict ):
1062
+ cpy = copy .copy (obj )
1063
+ cpy .clear ()
1064
+ for key , item in obj .items ():
1065
+ cpy [relatively_deep_copy (key , memo )] = relatively_deep_copy (item , memo )
1066
+ memo [obj_id ] = cpy
1067
+ return cpy
1068
+ elif isinstance (obj , tuple ):
1069
+ # subclass constructors could have different parameters than superclass.
1070
+ # e.g. tuple and namedtuple constructors accepts quite different parameters.
1071
+ # it is better to have separate section for namedtuple
1072
+ tpl = tuple ([relatively_deep_copy (item , memo ) for item in obj ])
1073
+ if type (obj ) == tuple :
1074
+ cpy = tpl
1075
+ else :
1076
+ cpy = type (obj )(* tpl )
1077
+ memo [obj_id ] = cpy
1078
+ return cpy
1079
+ elif isinstance (obj , set ):
1080
+ cpy = copy .copy (obj )
1081
+ cpy .clear ()
1082
+ for item in obj :
1083
+ cpy .add (relatively_deep_copy (item , memo ))
1084
+ memo [obj_id ] = cpy
1085
+ return cpy
1086
+
1087
+ # some python objects are not copyable. In such case exception would be raised
1088
+ # it is just a convinient point to find such objects
1089
+ try :
1090
+ cpy = copy .copy (obj )
1091
+ except Exception as e :
1092
+ raise e
1093
+
1094
+ # __slots__ for subclass specify only members declared in subclass. So to get all members we need to go through
1095
+ # all supeclasses
1096
+ def get_slots_members (obj ):
1097
+ keys = []
1098
+ typ = obj
1099
+ if not isinstance (typ , type ):
1100
+ typ = type (obj )
1101
+
1102
+ try :
1103
+ if len (typ .__slots__ ):
1104
+ keys .extend (typ .__slots__ )
1105
+ if len (typ .__bases__ ):
1106
+ for base in typ .__bases__ :
1107
+ keys .extend (get_slots_members (base ))
1108
+ except :
1109
+ pass
1110
+
1111
+ return keys
1112
+
1113
+ memo [obj_id ] = cpy
1114
+ keys = []
1115
+
1116
+ # Objects have either __dict__ or __slots__ or neither.
1117
+ # If object has none of it and it is copyable we already made a copy, just return it
1118
+ # If object is not copyable we shouldn't reach this point.
1119
+ try :
1120
+ keys = obj .__dict__ .keys ()
1121
+ except :
1122
+ try :
1123
+ obj .__slots__
1124
+ keys = get_slots_members (obj )
1125
+ except :
1126
+ return cpy
1127
+
1128
+ for key in keys :
1129
+ attr = getattr (obj , key )
1130
+ attr_cpy = relatively_deep_copy (attr , memo )
1131
+ setattr (cpy , key , attr_cpy )
1132
+
1133
+ return cpy
1134
+
1135
+
965
1136
class DPPLLower (Lower ):
966
1137
def __init__ (self , context , library , fndesc , func_ir , metadata = None ):
967
1138
Lower .__init__ (self , context , library , fndesc , func_ir , metadata )
968
- fndesc_cpu = copy .copy (fndesc )
969
- fndesc_cpu .calltypes = fndesc .calltypes .copy ()
970
- fndesc_cpu .typemap = fndesc .typemap .copy ()
1139
+ memo = {}
1140
+
1141
+ fndesc_cpu = relatively_deep_copy (fndesc , memo )
1142
+ func_ir_cpu = relatively_deep_copy (func_ir , memo )
1143
+
971
1144
972
1145
cpu_context = context .cpu_context if isinstance (context , DPPLTargetContext ) else context
973
- self .gpu_lower = Lower (context , library , fndesc , func_ir . copy () , metadata )
974
- self .cpu_lower = Lower (cpu_context , library , fndesc_cpu , func_ir . copy () , metadata )
1146
+ self .gpu_lower = Lower (context , library , fndesc , func_ir , metadata )
1147
+ self .cpu_lower = Lower (cpu_context , library , fndesc_cpu , func_ir_cpu , metadata )
975
1148
976
1149
def lower (self ):
977
1150
# Basically we are trying to lower on GPU first and if failed - try to lower on CPU.
@@ -991,11 +1164,9 @@ def lower(self):
991
1164
# different solution should be used.
992
1165
993
1166
try :
994
- #lowering.lower_extensions[parfor.Parfor] = lower_parfor_rollback
995
1167
lowering .lower_extensions [parfor .Parfor ].append (lower_parfor_rollback )
996
1168
self .gpu_lower .lower ()
997
1169
self .base_lower = self .gpu_lower
998
- #lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel
999
1170
lowering .lower_extensions [parfor .Parfor ].pop ()
1000
1171
except Exception as e :
1001
1172
if numba .dppl .compiler .DEBUG :
@@ -1015,80 +1186,13 @@ def create_cpython_wrapper(self, release_gil=False):
1015
1186
1016
1187
1017
1188
def copy_block (block ):
1018
- def relatively_deep_copy (obj , memo ):
1019
- obj_id = id (obj )
1020
- if obj_id in memo :
1021
- return memo [obj_id ]
1022
-
1023
- from numba .core .dispatcher import Dispatcher
1024
- from numba .core .types .functions import Function
1025
- from types import ModuleType
1026
-
1027
- if isinstance (obj , (Dispatcher , Function , ModuleType )):
1028
- return obj
1029
-
1030
- if isinstance (obj , list ):
1031
- cpy = copy .copy (obj )
1032
- cpy .clear ()
1033
- for item in obj :
1034
- cpy .append (relatively_deep_copy (item , memo ))
1035
- memo [obj_id ] = cpy
1036
- return cpy
1037
- elif isinstance (obj , dict ):
1038
- cpy = copy .copy (obj )
1039
- cpy .clear ()
1040
- # do we need to copy keys?
1041
- for key , item in obj .items ():
1042
- cpy [relatively_deep_copy (key , memo )] = relatively_deep_copy (item , memo )
1043
- memo [obj_id ] = cpy
1044
- return cpy
1045
- elif isinstance (obj , tuple ):
1046
- cpy = type (obj )([relatively_deep_copy (item , memo ) for item in obj ])
1047
- memo [obj_id ] = cpy
1048
- return cpy
1049
- elif isinstance (obj , set ):
1050
- cpy = copy .copy (obj )
1051
- cpy .clear ()
1052
- for item in obj :
1053
- cpy .add (relatively_deep_copy (item , memo ))
1054
- memo [obj_id ] = cpy
1055
- return cpy
1056
-
1057
- cpy = copy .copy (obj )
1058
-
1059
- memo [obj_id ] = cpy
1060
- keys = []
1061
- try :
1062
- keys = obj .__dict__ .keys ()
1063
- except :
1064
- try :
1065
- keys = obj .__slots__
1066
- except :
1067
- return cpy
1068
-
1069
- for key in keys :
1070
- attr = getattr (obj , key )
1071
- attr_cpy = relatively_deep_copy (attr , memo )
1072
- setattr (cpy , key , attr_cpy )
1073
-
1074
- return cpy
1075
-
1076
1189
memo = {}
1077
1190
new_block = ir .Block (block .scope , block .loc )
1078
1191
new_block .body = [relatively_deep_copy (stmt , memo ) for stmt in block .body ]
1079
1192
return new_block
1080
1193
1081
1194
1082
1195
def lower_parfor_rollback (lowerer , parfor ):
1083
- try :
1084
- cache_parfor_races = copy .copy (parfor .races )
1085
- cache_parfor_params = copy .copy (parfor .params )
1086
- cache_parfor_loop_body = {key : copy_block (block ) for key , block in parfor .loop_body .items ()}
1087
- cache_parfor_init_block = parfor .init_block .copy ()
1088
- cache_parfor_loop_nests = parfor .loop_nests .copy ()
1089
- except Exception as e :
1090
- raise CopyIRException ("Failed to copy IR" ) from e
1091
-
1092
1196
try :
1093
1197
_lower_parfor_gufunc (lowerer , parfor )
1094
1198
if numba .dppl .compiler .DEBUG :
@@ -1098,12 +1202,6 @@ def lower_parfor_rollback(lowerer, parfor):
1098
1202
msg = "Failed to lower parfor on DPPL-device.\n To see details set environment variable NUMBA_DPPL_DEBUG=1"
1099
1203
warnings .warn (NumbaPerformanceWarning (msg , parfor .loc ))
1100
1204
raise e
1101
- finally :
1102
- parfor .params = cache_parfor_params
1103
- parfor .loop_body = cache_parfor_loop_body
1104
- parfor .init_block = cache_parfor_init_block
1105
- parfor .loop_nests = cache_parfor_loop_nests
1106
- parfor .races = cache_parfor_races
1107
1205
1108
1206
1109
1207
def dppl_lower_array_expr (lowerer , expr ):
0 commit comments