IntelPython · PokhodenkoSA · Jun 16, 2021 · May 11, 2021 · May 11, 2021 · May 11, 2021
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ https://intelpython.github.io/dpnp/
 
 ## Dependencies
 
-* numba 0.53.* (IntelPython/numba)
+* numba 0.53..0.54 (IntelPython/numba)
 * dpctl 0.8.*
 * dpnp 0.6.* (optional)
 * llvm-spirv (SPIRV generation from LLVM IR)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
@@ -18,13 +18,13 @@ requirements:
         - python
         - setuptools
         - cython
-        - numba 0.53*
+        - numba >=0.53.0,<0.55*
         - dpctl 0.8.*
         - dpnp >=0.6*,<0.7*  # [linux]
         - wheel
     run:
         - python
-        - numba 0.53*
+        - numba >=0.53.0,<0.55*
         - dpctl 0.8.*
         - spirv-tools
         - llvm-spirv

diff --git a/docs/user_guides/getting_started.rst b/docs/user_guides/getting_started.rst
@@ -6,7 +6,7 @@ Installation
 
 Numba-dppy depends on following components:
 
-* numba 0.53.* (`Intel Python Numba`_)
+* numba 0.53..0.54 (`Intel Python Numba`_)
 * dpctl 0.8.* (`Intel Python dpctl`_)
 * dpnp >=0.6.* (optional, `Intel Python DPNP`_)
 * `llvm-spirv`_ (SPIRV generation from LLVM IR)

diff --git a/numba_dppy/codegen.py b/numba_dppy/codegen.py
@@ -15,7 +15,7 @@
 from llvmlite import binding as ll
 from llvmlite.llvmpy import core as lc
 
-from numba.core.codegen import BaseCPUCodegen, CodeLibrary
+from numba.core.codegen import CPUCodegen, CPUCodeLibrary
 from numba.core import utils
 
 from numba_dppy import config
@@ -35,7 +35,7 @@
 }
 
 
-class SPIRVCodeLibrary(CodeLibrary):
+class SPIRVCodeLibrary(CPUCodeLibrary):
     def _optimize_functions(self, ll_module):
         pass
 
@@ -67,7 +67,7 @@ def get_asm_str(self):
         return None
 
 
-class JITSPIRVCodegen(BaseCPUCodegen):
+class JITSPIRVCodegen(CPUCodegen):
     """
     This codegen implementation generates optimized SPIR 2.0
     """

diff --git a/numba_dppy/compiler.py b/numba_dppy/compiler.py
@@ -18,6 +18,7 @@
 from numba.core.typing.templates import ConcreteTemplate
 from numba.core import types, compiler, ir
 from numba.core.typing.templates import AbstractTemplate
+from numba.core.compiler_lock import global_compiler_lock
 import ctypes
 from types import FunctionType
 from inspect import signature
@@ -89,6 +90,7 @@ def define_pipelines(self):
         return pms
 
 
+@global_compiler_lock
 def compile_with_dppy(pyfunc, return_type, args, debug=None):
     # First compilation will trigger the initialization of the OpenCL backend.
     from .descriptor import dppy_target
@@ -99,9 +101,9 @@ def compile_with_dppy(pyfunc, return_type, args, debug=None):
     flags = compiler.Flags()
     # Do not compile (generate native code), just lower (to LLVM)
     flags.debuginfo = config.DEBUGINFO_DEFAULT
-    flags.set("no_compile")
-    flags.set("no_cpython_wrapper")
-    flags.unset("nrt")
+    flags.no_compile = True
+    flags.no_cpython_wrapper = True
+    flags.nrt = False
 
     if debug is not None:
         flags.debuginfo = debug

diff --git a/numba_dppy/descriptor.py b/numba_dppy/descriptor.py
@@ -23,13 +23,11 @@
 
 class DPPYTarget(TargetDescriptor):
     options = CPUTargetOptions
-    # typingctx = DPPYTypingContext()
-    # targetctx = DPPYTargetContext(typingctx)
 
     @utils.cached_property
     def _toplevel_target_context(self):
         # Lazily-initialized top-level target context, for all threads
-        return DPPYTargetContext(self.typing_context)
+        return DPPYTargetContext(self.typing_context, self._target_name)
 
     @utils.cached_property
     def _toplevel_typing_context(self):
@@ -52,4 +50,4 @@ def typing_context(self):
 
 
 # The global DPPY target
-dppy_target = DPPYTarget()
+dppy_target = DPPYTarget("SyclDevice")
diff --git a/numba_dppy/dppy_lowerer.py b/numba_dppy/dppy_lowerer.py
@@ -716,7 +716,7 @@ def _lower_parfor_gufunc(lowerer, parfor):
 
     # compile parfor body as a separate function to be used with GUFuncWrapper
     flags = copy.copy(parfor.flags)
-    flags.set("error_model", "numpy")
+    flags.error_model = "numpy"
 
     # Can't get here unless flags.set('auto_parallel', ParallelOptions(True))
     index_var_typ = typemap[parfor.loop_nests[0].index_variable.name]

diff --git a/numba_dppy/dppy_offload_dispatcher.py b/numba_dppy/dppy_offload_dispatcher.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 from numba.core import dispatcher, compiler
-from numba.core.registry import cpu_target, dispatcher_registry
+from numba.core.registry import cpu_target
+from numba.core.target_extension import dispatcher_registry, target_registry
 from numba_dppy import config
+from numba_dppy.target import SyclDevice
 
 
 class DppyOffloadDispatcher(dispatcher.Dispatcher):
@@ -60,5 +62,7 @@ def __init__(
             )
 
 
-dispatcher_registry["__dppy_offload_gpu__"] = DppyOffloadDispatcher
-dispatcher_registry["__dppy_offload_cpu__"] = DppyOffloadDispatcher
+target_registry['__dppy_offload_gpu__'] = SyclDevice
+target_registry['__dppy_offload_cpu__'] = SyclDevice
+
+dispatcher_registry[SyclDevice] = DppyOffloadDispatcher
diff --git a/numba_dppy/dppy_passes.py b/numba_dppy/dppy_passes.py
@@ -49,7 +49,7 @@
 
 from numba.parfors.parfor import (
     PreParforPass as _parfor_PreParforPass,
-    replace_functions_map,
+    swap_functions_map,
 )
 from numba.parfors.parfor import ParforPass as _parfor_ParforPass
 from numba.parfors.parfor import Parfor
@@ -173,7 +173,7 @@ def run_pass(self, state):
 
         # Ensure we have an IR and type information.
         assert state.func_ir
-        functions_map = replace_functions_map.copy()
+        functions_map = swap_functions_map.copy()
         functions_map.pop(("dot", "numpy"), None)
         functions_map.pop(("sum", "numpy"), None)
         functions_map.pop(("prod", "numpy"), None)
@@ -188,6 +188,7 @@ def run_pass(self, state):
             state.type_annotation.typemap,
             state.type_annotation.calltypes,
             state.typingctx,
+            state.targetctx,
             state.flags.auto_parallel,
             state.parfor_diagnostics.replaced_fns,
             replace_functions_map=functions_map,
@@ -223,6 +224,7 @@ def run_pass(self, state):
             state.type_annotation.calltypes,
             state.return_type,
             state.typingctx,
+            state.targetctx,
             state.flags.auto_parallel,
             state.flags,
             state.metadata,

diff --git a/numba_dppy/driver/dpctl_capi_fn_builder.py b/numba_dppy/driver/dpctl_capi_fn_builder.py
@@ -19,7 +19,7 @@
 
 import llvmlite.llvmpy.core as lc
 from llvmlite.ir import builder
-from numba.core import types
+from numba.core import types, cgutils
 
 import numba_dppy.utils as utils
 
@@ -48,7 +48,7 @@ def _build_dpctl_function(builder, return_ty, arg_list, func_name):
 
         """
         func_ty = lc.Type.function(return_ty, arg_list)
-        fn = builder.module.get_or_insert_function(func_ty, func_name)
+        fn = cgutils.get_or_insert_function(builder.module, func_ty, func_name)
         return fn
 
     @staticmethod

diff --git a/numba_dppy/numpy_usm_shared.py b/numba_dppy/numpy_usm_shared.py
@@ -172,13 +172,13 @@ def allocator_UsmArray(context, builder, size, align):
 
     # Get the Numba external allocator for USM memory.
     ext_allocator_fnty = ir.FunctionType(cgutils.voidptr_t, [])
-    ext_allocator_fn = mod.get_or_insert_function(
+    ext_allocator_fn = cgutils.get_or_insert_function(mod,
         ext_allocator_fnty, name="usmarray_get_ext_allocator"
     )
     ext_allocator = builder.call(ext_allocator_fn, [])
     # Get the Numba function to allocate an aligned array with an external allocator.
     fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32, cgutils.voidptr_t])
-    fn = mod.get_or_insert_function(
+    fn = cgutils.get_or_insert_function(mod,
         fnty, name="NRT_MemInfo_alloc_safe_aligned_external"
     )
     fn.return_value.add_attribute("noalias")

diff --git a/numba_dppy/ocl/oclimpl.py b/numba_dppy/ocl/oclimpl.py
@@ -73,7 +73,7 @@ def _declare_function(context, builder, name, sig, cargs, mangler=mangle_c):
     llargs = [context.get_value_type(t) for t in sig.args]
     fnty = Type.function(llretty, llargs)
     mangled = mangler(name, cargs)
-    fn = mod.get_or_insert_function(fnty, mangled)
+    fn = cgutils.get_or_insert_function(mod, fnty, mangled)
     fn.calling_convention = target.CC_SPIR_FUNC
     return fn
 
@@ -226,7 +226,7 @@ def insert_and_call_atomic_fn(
     llargs = [ll_p, context.get_value_type(sig.args[2])]
     fnty = ir.FunctionType(llretty, llargs)
 
-    fn = mod.get_or_insert_function(fnty, name)
+    fn = cgutils.get_or_insert_function(mod, fnty, name)
     fn.calling_convention = target.CC_SPIR_FUNC
 
     generic_ptr = context.addrspacecast(builder, ptr, address_space.GENERIC)
@@ -291,7 +291,7 @@ def native_atomic_add(context, builder, sig, args):
     )
 
     fnty = ir.FunctionType(retty, spirv_fn_arg_types)
-    fn = builder.module.get_or_insert_function(fnty, mangled_fn_name)
+    fn = cgutils.get_or_insert_function(builder.module, fnty, mangled_fn_name)
     fn.calling_convention = target.CC_SPIR_FUNC
 
     sycl_memory_order = atomic_helper.sycl_memory_order.relaxed

diff --git a/numba_dppy/printimpl.py b/numba_dppy/printimpl.py
@@ -28,7 +28,7 @@
 def declare_print(lmod):
     voidptrty = lc.Type.pointer(lc.Type.int(8), addrspace=address_space.GENERIC)
     printfty = lc.Type.function(lc.Type.int(), [voidptrty], var_arg=True)
-    printf = lmod.get_or_insert_function(printfty, "printf")
+    printf = cgutils.get_or_insert_function(lmod, printfty, "printf")
     return printf
 
 

diff --git a/numba_dppy/target.py b/numba_dppy/target.py
@@ -29,6 +29,7 @@
 from . import codegen
 from numba_dppy.dppy_array_type import DPPYArray, DPPYArrayModel
 from numba_dppy.utils import npytypes_array_to_dppy_array, address_space, calling_conv
+from numba.core.target_extension import GPU, target_registry
 
 
 CC_SPIR_KERNEL = "spir_kernel"
@@ -104,6 +105,13 @@ def _init_data_model_manager():
 
 spirv_data_model_manager = _init_data_model_manager()
 
+class SyclDevice(GPU):
+    """Mark the hardware target as SYCL Device.
+    """
+
+
+target_registry['SyclDevice'] = SyclDevice
+
 
 class DPPYTargetContext(BaseContext):
     """A numba_dppy-specific target context inheriting Numba's ``BaseContext``.
@@ -185,7 +193,7 @@ def _finalize_wrapper_module(self, fn):
         fn.calling_convention = CC_SPIR_KERNEL
 
         # Mark kernels
-        ocl_kernels = mod.get_or_insert_named_metadata("opencl.kernels")
+        ocl_kernels = cgutils.get_or_insert_named_metadata(mod, "opencl.kernels")
         ocl_kernels.add(
             lc.MetaData.get(
                 mod,
@@ -209,7 +217,7 @@ def _finalize_wrapper_module(self, fn):
         ]
 
         for name in others:
-            nmd = mod.get_or_insert_named_metadata(name)
+            nmd = cgutils.get_or_insert_named_metadata(mod, name)
             if not nmd.operands:
                 nmd.add(empty_md)
 
@@ -247,13 +255,17 @@ def _generate_kernel_wrapper(self, func, argtypes):
         module.get_function(func.name).linkage = "internal"
         return wrapper
 
+    def __init__(self, typingctx, target='SyclDevice'):
+        super().__init__(typingctx, target)
+
     def init(self):
         self._internal_codegen = codegen.JITSPIRVCodegen("numba_dppy.jit")
         self._target_data = ll.create_target_data(
             codegen.SPIR_DATA_LAYOUT[utils.MACHINE_BITS]
         )
         # Override data model manager to SPIR model
-        self.data_model_manager = spirv_data_model_manager
+        import numba.cpython.unicode
+        self.data_model_manager = _init_data_model_manager()
         self.extra_compile_options = dict()
 
         from numba.np.ufunc_db import _lazy_init_db
@@ -265,6 +277,10 @@ def init(self):
         self.ufunc_db = copy.deepcopy(ufunc_db)
         self.cpu_context = cpu_target.target_context
 
+    # Overrides
+    def create_module(self, name):
+        return self._internal_codegen._create_empty_module(name)
+
     def replace_numpy_ufunc_with_opencl_supported_functions(self):
         from numba_dppy.ocl.mathimpl import lower_ocl_impl, sig_mapper
 
@@ -373,7 +389,7 @@ def declare_function(self, module, fndesc):
 
         """
         fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
-        fn = module.get_or_insert_function(fnty, name=fndesc.mangled_name)
+        fn = cgutils.get_or_insert_function(module, fnty, name=fndesc.mangled_name)
         if not self.enable_debuginfo:
             fn.attributes.add("alwaysinline")
         ret = super(DPPYTargetContext, self).declare_function(module, fndesc)

diff --git a/numba_dppy/target_dispatcher.py b/numba_dppy/target_dispatcher.py
@@ -15,6 +15,7 @@
 from numba.core import registry, serialize, dispatcher
 from numba import types
 from numba.core.errors import UnsupportedError
+from numba.core.target_extension import resolve_dispatcher_from_str, target_registry, dispatcher_registry
 import dpctl
 from numba.core.compiler_lock import global_compiler_lock
 
@@ -90,30 +91,31 @@ def get_current_disp(self):
 
             if target is None:
                 if dpctl.get_current_device_type() == dpctl.device_type.gpu:
-                    return registry.dispatcher_registry[
+                    return dispatcher_registry[target_registry[
                         TargetDispatcher.target_offload_gpu
-                    ]
+                    ]]
                 elif dpctl.get_current_device_type() == dpctl.device_type.cpu:
-                    return registry.dispatcher_registry[
+                    return dispatcher_registry[target_registry[
                         TargetDispatcher.target_offload_cpu
-                    ]
+                    ]]
                 else:
                     if dpctl.is_in_device_context():
                         raise UnsupportedError("Unknown dppy device type")
                     if offload:
                         if dpctl.has_gpu_queues():
-                            return registry.dispatcher_registry[
+                            return dispatcher_registry[target_registry[
                                 TargetDispatcher.target_offload_gpu
-                            ]
+                            ]]
                         elif dpctl.has_cpu_queues():
-                            return registry.dispatcher_registry[
+                            return dispatcher_registry[target_registry[
                                 TargetDispatcher.target_offload_cpu
-                            ]
+                            ]]
 
         if target is None:
             target = "cpu"
 
-        return registry.dispatcher_registry[target]
+        return resolve_dispatcher_from_str(target)
+
 
     def _reduce_states(self):
         return dict(

diff --git a/setup.py b/setup.py
@@ -143,7 +143,7 @@ def spirv_compile():
 packages = find_packages(include=["numba_dppy", "numba_dppy.*"])
 build_requires = ["cython"]
 install_requires = [
-    "numba >={},<{}".format("0.53.1", "0.54"),
+    "numba >={},<{}".format("0.53.1", "0.55"),
     "dpctl",
 ]