IntelPython · AlexanderKalistratov · Oct 2, 2020 · Oct 2, 2020 · Oct 2, 2020 · Oct 2, 2020
diff --git a/numba/core/decorators.py b/numba/core/decorators.py
@@ -148,12 +148,7 @@ def bar(x, y):
         target = options.pop('target')
         warnings.warn("The 'target' keyword argument is deprecated.", NumbaDeprecationWarning)
     else:
-        target = options.pop('_target', 'cpu')
-
-    parallel_option = options.get('parallel')
-    if isinstance(parallel_option, dict) and parallel_option.get('offload') is True:
-        from numba.dppl import dppl_offload_dispatcher
-        target = '__dppl_offload_gpu__'
+        target = options.pop('_target', None)
 
     options['boundscheck'] = boundscheck
 
@@ -187,22 +182,8 @@ def bar(x, y):
 
 
 def _jit(sigs, locals, target, cache, targetoptions, **dispatcher_args):
-    dispatcher = registry.dispatcher_registry[target]
-
-    def wrapper(func):
-        if extending.is_jitted(func):
-            raise TypeError(
-                "A jit decorator was called on an already jitted function "
-                f"{func}.  If trying to access the original python "
-                f"function, use the {func}.py_func attribute."
-            )
-
-        if not inspect.isfunction(func):
-            raise TypeError(
-                "The decorated object is not a function (got type "
-                f"{type(func)})."
-            )
 
+    def wrapper(func, dispatcher):
         if config.ENABLE_CUDASIM and target == 'cuda':
             from numba import cuda
             return cuda.jit(func)
@@ -226,7 +207,33 @@ def wrapper(func):
                 disp.disable_compile()
         return disp
 
-    return wrapper
+    def __wrapper(func):
+        if extending.is_jitted(func):
+            raise TypeError(
+                "A jit decorator was called on an already jitted function "
+                f"{func}.  If trying to access the original python "
+                f"function, use the {func}.py_func attribute."
+            )
+
+        if not inspect.isfunction(func):
+            raise TypeError(
+                "The decorated object is not a function (got type "
+                f"{type(func)})."
+            )
+
+        if (target == 'npyufunc' or targetoptions.get('no_cpython_wrapper')
+            or sigs or config.DISABLE_JIT or not targetoptions.get('nopython')):
+            target_ = target
+            if target_ is None:
+                target_ = 'cpu'
+            disp = registry.dispatcher_registry[target_]
+            return wrapper(func, disp)
+
+        from numba.dppl.target_dispatcher import TargetDispatcher
+        disp = TargetDispatcher(func, wrapper, target, targetoptions.get('parallel'))
+        return disp
+
+    return __wrapper
 
 
 def generated_jit(function=None, target='cpu', cache=False,

diff --git a/numba/core/dispatcher.py b/numba/core/dispatcher.py
@@ -673,7 +673,14 @@ def _set_uuid(self, u):
         self._recent.append(self)
 
 
-class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
+import abc
+
+class DispatcherMeta(abc.ABCMeta):
+    def __instancecheck__(self, other):
+        return type(type(other)) == DispatcherMeta
+
+
+class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase, metaclass=DispatcherMeta):
     """
     Implementation of user-facing dispatcher objects (i.e. created using
     the @jit decorator).

diff --git a/numba/core/registry.py b/numba/core/registry.py
@@ -2,6 +2,7 @@
 
 from numba.core.descriptors import TargetDescriptor
 from numba.core import utils, typing, dispatcher, cpu
+from numba.core.compiler_lock import global_compiler_lock
 
 # -----------------------------------------------------------------------------
 # Default CPU target descriptors
@@ -26,16 +27,19 @@ class CPUTarget(TargetDescriptor):
     _nested = _NestedContext()
 
     @utils.cached_property
+    @global_compiler_lock
     def _toplevel_target_context(self):
         # Lazily-initialized top-level target context, for all threads
         return cpu.CPUContext(self.typing_context)
 
     @utils.cached_property
+    @global_compiler_lock
     def _toplevel_typing_context(self):
         # Lazily-initialized top-level typing context, for all threads
         return typing.Context()
 
     @property
+    @global_compiler_lock
     def target_context(self):
         """
         The target context for CPU targets.
@@ -47,6 +51,7 @@ def target_context(self):
             return self._toplevel_target_context
 
     @property
+    @global_compiler_lock
     def typing_context(self):
         """
         The typing context for CPU targets.
@@ -57,6 +62,7 @@ def typing_context(self):
         else:
             return self._toplevel_typing_context
 
+    @global_compiler_lock
     def nested_context(self, typing_context, target_context):
         """
         A context manager temporarily replacing the contexts with the

diff --git a/numba/dppl/dppl_offload_dispatcher.py b/numba/dppl/dppl_offload_dispatcher.py
@@ -1,22 +1,23 @@
-from numba.core import dispatcher, compiler
-from numba.core.registry import cpu_target, dispatcher_registry
-import numba.dppl_config as dppl_config
-
-
-class DpplOffloadDispatcher(dispatcher.Dispatcher):
-    targetdescr = cpu_target
-
-    def __init__(self, py_func, locals={}, targetoptions={}, impl_kind='direct', pipeline_class=compiler.Compiler):
-        if dppl_config.dppl_present:
-            from numba.dppl.compiler import DPPLCompiler
-            targetoptions['parallel'] = True
-            dispatcher.Dispatcher.__init__(self, py_func, locals=locals,
-                    targetoptions=targetoptions, impl_kind=impl_kind, pipeline_class=DPPLCompiler)
-        else:
-            print("---------------------------------------------------------------------")
-            print("WARNING : DPPL pipeline ignored. Ensure OpenCL drivers are installed.")
-            print("---------------------------------------------------------------------")
-            dispatcher.Dispatcher.__init__(self, py_func, locals=locals,
-                targetoptions=targetoptions, impl_kind=impl_kind, pipeline_class=pipeline_class)
-
-dispatcher_registry['__dppl_offload_gpu__'] = DpplOffloadDispatcher
+from numba.core import dispatcher, compiler
+from numba.core.registry import cpu_target, dispatcher_registry
+import numba.dppl_config as dppl_config
+
+
+class DpplOffloadDispatcher(dispatcher.Dispatcher):
+    targetdescr = cpu_target
+
+    def __init__(self, py_func, locals={}, targetoptions={}, impl_kind='direct', pipeline_class=compiler.Compiler):
+        if dppl_config.dppl_present:
+            from numba.dppl.compiler import DPPLCompiler
+            targetoptions['parallel'] = True
+            dispatcher.Dispatcher.__init__(self, py_func, locals=locals,
+                    targetoptions=targetoptions, impl_kind=impl_kind, pipeline_class=DPPLCompiler)
+        else:
+            print("---------------------------------------------------------------------")
+            print("WARNING : DPPL pipeline ignored. Ensure OpenCL drivers are installed.")
+            print("---------------------------------------------------------------------")
+            dispatcher.Dispatcher.__init__(self, py_func, locals=locals,
+                targetoptions=targetoptions, impl_kind=impl_kind, pipeline_class=pipeline_class)
+
+dispatcher_registry['__dppl_offload_gpu__'] = DpplOffloadDispatcher
+dispatcher_registry['__dppl_offload_cpu__'] = DpplOffloadDispatcher
diff --git a/numba/dppl/examples/dppl_with_context.py b/numba/dppl/examples/dppl_with_context.py
@@ -0,0 +1,34 @@
+import numpy as np
+from numba import dppl, njit, prange
+import dpctl
+
+
+@njit
+def g(a):
+    return a + 1
+
+
+@njit
+def f(a, b, c, N):
+    for i in prange(N):
+        a[i] = b[i] + g(c[i])
+
+
+def main():
+    N = 10
+    a = np.ones(N)
+    b = np.ones(N)
+    c = np.ones(N)
+
+    if dpctl.has_gpu_queues():
+        with dpctl.device_context(dpctl.device_type.gpu):
+            f(a, b, c, N)
+    elif dpctl.has_cpu_queues():
+        with dpctl.device_context(dpctl.device_type.cpu):
+            f(a, b, c, N)
+    else:
+        print("No device found")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/numba/dppl/target_dispatcher.py b/numba/dppl/target_dispatcher.py
@@ -0,0 +1,98 @@
+from numba.core import registry, serialize, dispatcher
+from numba import types
+from numba.core.errors import UnsupportedError
+import dpctl
+import dpctl.ocldrv as ocldr
+from numba.core.compiler_lock import global_compiler_lock
+
+
+class TargetDispatcher(serialize.ReduceMixin, metaclass=dispatcher.DispatcherMeta):
+    __numba__ = 'py_func'
+
+    target_offload_gpu = '__dppl_offload_gpu__'
+    target_offload_cpu = '__dppl_offload_cpu__'
+    target_dppl = 'dppl'
+
+    def __init__(self, py_func, wrapper, target, parallel_options, compiled=None):
+
+        self.__py_func = py_func
+        self.__target = target
+        self.__wrapper = wrapper
+        self.__compiled = compiled if compiled is not None else {}
+        self.__parallel = parallel_options
+        self.__doc__ = py_func.__doc__
+        self.__name__ = py_func.__name__
+        self.__module__ = py_func.__module__
+
+    def __call__(self, *args, **kwargs):
+        return self.get_compiled()(*args, **kwargs)
+
+    def __getattr__(self, name):
+        return getattr(self.get_compiled(), name)
+
+    def __get__(self, obj, objtype=None):
+        return self.get_compiled().__get__(obj, objtype)
+
+    def __repr__(self):
+        return self.get_compiled().__repr__()
+
+    @classmethod
+    def _rebuild(cls, py_func, wrapper, target, parallel, compiled):
+        self = cls(py_func, wrapper, target, parallel, compiled)
+        return self
+
+    def get_compiled(self, target=None):
+        if target is None:
+            target = self.__target
+
+        disp = self.get_current_disp()
+        if not disp in self.__compiled.keys():
+            with global_compiler_lock:
+                if not disp in self.__compiled.keys():
+                    self.__compiled[disp] = self.__wrapper(self.__py_func, disp)
+
+        return self.__compiled[disp]
+
+    def __is_with_context_target(self, target):
+        return target is None or target == TargetDispatcher.target_dppl
+
+    def get_current_disp(self):
+        target = self.__target
+        parallel = self.__parallel
+        offload = isinstance(parallel, dict) and parallel.get('offload') is True
+
+        if (dpctl.is_in_device_context() or offload):
+            if not self.__is_with_context_target(target):
+                raise UnsupportedError(f"Can't use 'with' context with explicitly specified target '{target}'")
+            if parallel is False or (isinstance(parallel, dict) and parallel.get('offload') is False):
+                raise UnsupportedError(f"Can't use 'with' context with parallel option '{parallel}'")
+
+            from numba.dppl import dppl_offload_dispatcher
+
+            if target is None:
+                if dpctl.get_current_device_type() == dpctl.device_type.gpu:
+                    return registry.dispatcher_registry[TargetDispatcher.target_offload_gpu]
+                elif dpctl.get_current_device_type() == dpctl.device_type.cpu:
+                    return registry.dispatcher_registry[TargetDispatcher.target_offload_cpu]
+                else:
+                    if dpctl.is_in_device_context():
+                        raise UnsupportedError('Unknown dppl device type')
+                    if offload:
+                        if dpctl.has_gpu_queues():
+                            return registry.dispatcher_registry[TargetDispatcher.target_offload_gpu]
+                        elif dpctl.has_cpu_queues():
+                            return registry.dispatcher_registry[TargetDispatcher.target_offload_cpu]
+
+        if target is None:
+            target = 'cpu'
+
+        return registry.dispatcher_registry[target]
+
+    def _reduce_states(self):
+        return dict(
+            py_func=self.__py_func,
+            wrapper=self.__wrapper,
+            target=self.__target,
+            parallel=self.__parallel,
+            compiled=self.__compiled
+        )