IntelPython · PokhodenkoSA · Mar 4, 2021 · Mar 4, 2021 · Mar 4, 2021 · Mar 4, 2021
diff --git a/numba_dppy/context.py b/numba_dppy/context.py
@@ -0,0 +1,45 @@
+# Copyright 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from contextlib import contextmanager
+
+import dpctl
+from numba._dispatcher import set_use_tls_target_stack
+from numba.core.dispatcher import TargetConfig
+
+from numba_dppy.dppy_offload_dispatcher import DppyOffloadDispatcher
+
+
+@contextmanager
+def switch_target(retarget):
+    # __enter__
+    tc = TargetConfig()
+    tc.push(retarget)
+    set_use_tls_target_stack(True)
+    yield
+    # __exit__
+    tc.pop()
+    set_use_tls_target_stack(False)
+
+
+def retarget_to_gpu(cpu_disp):
+    dispatcher = DppyOffloadDispatcher(cpu_disp.py_func)
+    return lambda *args, **kwargs: dispatcher(*args, **kwargs)
+
+
+@contextmanager
+def device_context(*args, **kwargs):
+    with switch_target(retarget_to_gpu):
+        with dpctl.device_context(*args, **kwargs) as queue:
+            yield queue
diff --git a/numba_dppy/dispatcher.py b/numba_dppy/dispatcher.py
@@ -25,7 +25,7 @@
 # from numba.npyufunc.deviceufunc import (UFuncMechanism, GenerializedUFunc,
 #                                       GUFuncCallSteps)
 
-from .. import dispatcher, utils, typing
+from numba.core import dispatcher, utils, typing
 from .compiler import DPPYCompiler
 
 

diff --git a/numba_dppy/dppy_passes.py b/numba_dppy/dppy_passes.py
@@ -226,6 +226,7 @@ def run_pass(self, state):
             state.typingctx,
             state.flags.auto_parallel,
             state.flags,
+            state.metadata,
             state.parfor_diagnostics,
         )
 

diff --git a/numba_dppy/tests/kernel_tests/test_arg_accessor.py b/numba_dppy/tests/kernel_tests/test_arg_accessor.py
@@ -15,7 +15,7 @@
 import numpy as np
 import numba_dppy as dppy
 import pytest
-import dpctl
+from numba_dppy.context import device_context
 from numba_dppy.tests.skip_tests import skip_test
 
 
@@ -77,6 +77,6 @@ def test_kernel_arg_accessor(filter_str, input_arrays, kernel):
 
     a, b, actual = input_arrays
     expected = a + b
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         call_kernel(global_size, local_size, a, b, actual, kernel)
     np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
diff --git a/numba_dppy/tests/kernel_tests/test_arg_types.py b/numba_dppy/tests/kernel_tests/test_arg_types.py
@@ -15,7 +15,7 @@
 import numpy as np
 import numba_dppy as dppy
 import pytest
-import dpctl
+from numba_dppy.context import device_context
 from numba_dppy.tests.skip_tests import skip_test
 
 global_size = 1054
@@ -63,7 +63,7 @@ def test_kernel_arg_types(filter_str, input_arrays):
     kernel = dppy.kernel(mul_kernel)
     a, actual, c = input_arrays
     expected = a * c
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         kernel[global_size, local_size](a, actual, c)
     np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
 
@@ -82,7 +82,7 @@ def test_bool_type(filter_str):
     kernel = dppy.kernel(check_bool_kernel)
     a = np.array([2], np.int64)
 
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, True)
         assert a[0] == 111
         kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, False)

diff --git a/numba_dppy/tests/kernel_tests/test_atomic_op.py b/numba_dppy/tests/kernel_tests/test_atomic_op.py
@@ -17,7 +17,8 @@
 
 import numba_dppy as dppy
 import pytest
-import dpctl
+
+from numba_dppy.context import device_context
 
 
 global_size = 100
@@ -94,7 +95,7 @@ def test_kernel_atomic_simple(filter_str, input_arrays, kernel_result_pair):
 
     a, dtype = input_arrays
     kernel, expected = kernel_result_pair
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
     assert a[0] == expected
 
@@ -120,7 +121,7 @@ def test_kernel_atomic_local(filter_str, input_arrays, return_list_of_op):
     a, dtype = input_arrays
     op_type, expected = return_list_of_op
     kernel = get_kernel_local(op_type, dtype)
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         kernel[global_size, global_size](a)
     assert a[0] == expected
 
@@ -162,6 +163,6 @@ def test_kernel_atomic_multi_dim(
     dim = return_list_of_dim
     kernel = get_kernel_multi_dim(op_type, len(dim))
     a = np.zeros(dim, return_dtype)
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
     assert a[0] == expected
diff --git a/numba_dppy/tests/kernel_tests/test_barrier.py b/numba_dppy/tests/kernel_tests/test_barrier.py
@@ -15,7 +15,7 @@
 import numpy as np
 import numba_dppy as dppy
 import pytest
-import dpctl
+from numba_dppy.context import device_context
 from numba_dppy.tests.skip_tests import skip_test
 
 
@@ -46,7 +46,7 @@ def twice(A):
     arr = np.random.random(N).astype(np.float32)
     orig = arr.copy()
 
-    with dpctl.device_context(filter_str) as gpu_queue:
+    with device_context(filter_str) as gpu_queue:
         twice[N, N // 2](arr)
 
     # The computation is correct?
@@ -69,7 +69,7 @@ def twice(A):
     arr = np.random.random(N).astype(np.float32)
     orig = arr.copy()
 
-    with dpctl.device_context(filter_str) as gpu_queue:
+    with device_context(filter_str) as gpu_queue:
         twice[N, dppy.DEFAULT_LOCAL_SIZE](arr)
 
     # The computation is correct?
@@ -97,7 +97,7 @@ def reverse_array(A):
     arr = np.arange(blocksize).astype(np.float32)
     orig = arr.copy()
 
-    with dpctl.device_context(filter_str) as gpu_queue:
+    with device_context(filter_str) as gpu_queue:
         reverse_array[blocksize, blocksize](arr)
 
     expected = orig[::-1] + orig

diff --git a/numba_dppy/tests/kernel_tests/test_caching.py b/numba_dppy/tests/kernel_tests/test_caching.py
@@ -16,7 +16,7 @@
 import numpy as np
 import numba_dppy as dppy
 import pytest
-import dpctl
+from numba_dppy.context import device_context
 from numba_dppy.tests.skip_tests import skip_test
 
 list_of_filter_strs = [
@@ -46,7 +46,7 @@ def test_caching_kernel(filter_str):
     b = np.array(np.random.random(N), dtype=np.float32)
     c = np.ones_like(a)
 
-    with dpctl.device_context(filter_str) as gpu_queue:
+    with device_context(filter_str) as gpu_queue:
         func = dppy.kernel(data_parallel_sum)
         caching_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(a, b, c)
 

diff --git a/numba_dppy/tests/kernel_tests/test_math_functions.py b/numba_dppy/tests/kernel_tests/test_math_functions.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import dpctl
+from numba_dppy.context import device_context
 import numba_dppy as dppy
 import numpy as np
 import pytest
@@ -67,7 +67,7 @@ def f(a, b):
         i = dppy.get_global_id(0)
         b[i] = uop(a[i])
 
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         f[a.size, dppy.DEFAULT_LOCAL_SIZE](a, actual)
 
     expected = np_uop(a)

diff --git a/numba_dppy/tests/kernel_tests/test_print.py b/numba_dppy/tests/kernel_tests/test_print.py
@@ -15,7 +15,7 @@
 import numpy as np
 import numba_dppy as dppy
 import pytest
-import dpctl
+from numba_dppy.context import device_context
 from numba_dppy.tests.skip_tests import skip_test
 
 list_of_filter_strs = [
@@ -31,7 +31,7 @@ def filter_str(request):
 @pytest.mark.xfail
 def test_print_only_str(filter_str):
     try:
-        with dpctl.device_context(filter_str):
+        with device_context(filter_str):
             pass
     except Exception:
         pytest.skip()
@@ -45,7 +45,7 @@ def f():
     # replaced by a puts() which fails due to lack of addrspace in the
     # puts function signature right now, and would fail in general due
     # to lack of support for puts() in OpenCL.
-    with dpctl.device_context(filter_str), captured_stdout() as stdout:
+    with device_context(filter_str), captured_stdout() as stdout:
         f[3, dppy.DEFAULT_LOCAL_SIZE]()
 
 
@@ -75,7 +75,7 @@ def f(a):
     a = input_arrays
     global_size = 3
 
-    with dpctl.device_context(filter_str):
+    with device_context(filter_str):
         f[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
         captured = capfd.readouterr()
         assert "test" in captured.out
diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_creation.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_creation.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 ################################################################################
 
-import dpctl
+from numba_dppy.context import device_context
 import numpy as np
 from numba import njit
 import pytest
@@ -106,7 +106,7 @@ def test_unary_ops(filter_str, unary_op, input_array, capfd):
     expected = np.empty(shape=a.shape, dtype=a.dtype)
 
     f = njit(fn)
-    with dpctl.device_context(filter_str), dpnp_debug():
+    with device_context(filter_str), dpnp_debug():
         actual = f(a)
         captured = capfd.readouterr()
         assert "dpnp implementation" in captured.out
@@ -130,7 +130,7 @@ def test_binary_op(filter_str, binary_op, input_array, dtype, get_shape, capfd):
     expected = np.empty(shape=a.shape, dtype=a.dtype)
 
     f = njit(fn)
-    with dpctl.device_context(filter_str), dpnp_debug():
+    with device_context(filter_str), dpnp_debug():
         actual = f(a, dtype)
         captured = capfd.readouterr()
         assert "dpnp implementation" in captured.out
@@ -159,7 +159,7 @@ def test_full(filter_str, full_name, input_array, get_shape, capfd):
     expected = np.empty(shape=a.shape, dtype=a.dtype)
 
     f = njit(fn)
-    with dpctl.device_context(filter_str), dpnp_debug():
+    with device_context(filter_str), dpnp_debug():
         actual = f(a, np.array([2]))
         captured = capfd.readouterr()
         assert "dpnp implementation" in captured.out

diff --git a/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_ops.py b/numba_dppy/tests/njit_tests/dpnp/test_numpy_array_ops.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 ################################################################################
 
-import dpctl
+from numba_dppy.context import device_context
 import numpy as np
 from numba import njit
 import pytest
@@ -103,7 +103,7 @@ def test_unary_ops(filter_str, unary_op, input_arrays, get_shape, capfd):
     expected = np.empty(shape=a.shape, dtype=a.dtype)
 
     f = njit(op)
-    with dpctl.device_context(filter_str), dpnp_debug():
+    with device_context(filter_str), dpnp_debug():
         actual = f(a)
         captured = capfd.readouterr()
         assert "dpnp implementation" in captured.out
@@ -142,7 +142,7 @@ def test_take(filter_str, input_arrays, indices, capfd):
     expected = np.empty(shape=a.shape, dtype=a.dtype)
 
     f = njit(fn)
-    with dpctl.device_context(filter_str), dpnp_debug():
+    with device_context(filter_str), dpnp_debug():
         actual = f(a, indices)
         captured = capfd.readouterr()
         assert "dpnp implementation" in captured.out