IntelPython · diptorupd · Dec 9, 2020 · Nov 19, 2020 · Nov 23, 2020 · Dec 2, 2020
diff --git a/numba_dppy/device_init.py b/numba_dppy/device_init.py
@@ -18,6 +18,14 @@
     CLK_GLOBAL_MEM_FENCE,
 )
 
+"""
+We are importing dpnp stub module to make Numba recognize the
+module when we rename Numpy functions.
+"""
+from .dpnp_glue.stubs import (
+    dpnp
+)
+
 DEFAULT_LOCAL_SIZE = []
 
 from . import initialize
@@ -35,9 +43,4 @@ def is_available():
     return dpctl.has_gpu_queues()
 
 
-#def ocl_error():
-#    """Returns None or an exception if the OpenCL driver fails to initialize.
-#    """
-#    return driver.driver.initialization_error
-
 initialize.initialize_all()
diff --git a/numba_dppy/dpctl_functions.py b/numba_dppy/dpctl_functions.py
@@ -0,0 +1,30 @@
+from numba import types
+from numba.core.typing import signature
+
+
+class _DPCTL_FUNCTIONS:
+    @classmethod
+    def dpctl_get_current_queue(cls):
+        ret_type = types.voidptr
+        sig = signature(ret_type)
+        return types.ExternalFunction("DPCTLQueueMgr_GetCurrentQueue", sig)
+
+    @classmethod
+    def dpctl_malloc_shared(cls):
+        ret_type = types.voidptr
+        sig = signature(ret_type, types.int64, types.voidptr)
+        return types.ExternalFunction("DPCTLmalloc_shared", sig)
+
+    @classmethod
+    def dpctl_queue_memcpy(cls):
+        ret_type = types.void
+        sig = signature(
+            ret_type, types.voidptr, types.voidptr, types.voidptr, types.int64
+        )
+        return types.ExternalFunction("DPCTLQueue_Memcpy", sig)
+
+    @classmethod
+    def dpctl_free_with_queue(cls):
+        ret_type = types.void
+        sig = signature(ret_type, types.voidptr, types.voidptr)
+        return types.ExternalFunction("DPCTLfree_with_queue", sig)
diff --git a/numba_dppy/dpnp_glue/__init__.py b/numba_dppy/dpnp_glue/__init__.py
diff --git a/numba_dppy/dpnp_glue/dpnp_fptr_interface.pyx b/numba_dppy/dpnp_glue/dpnp_fptr_interface.pyx
@@ -8,6 +8,7 @@ cdef extern from "backend_iface_fptr.hpp" namespace "DPNPFuncName":  # need this
     cdef enum DPNPFuncName "DPNPFuncName":
         DPNP_FN_ABSOLUTE
         DPNP_FN_ADD
+        DPNP_FN_ARANGE
         DPNP_FN_ARCCOS
         DPNP_FN_ARCCOSH
         DPNP_FN_ARCSIN
@@ -18,40 +19,77 @@ cdef extern from "backend_iface_fptr.hpp" namespace "DPNPFuncName":  # need this
         DPNP_FN_ARGMAX
         DPNP_FN_ARGMIN
         DPNP_FN_ARGSORT
+        DPNP_FN_BITWISE_AND
+        DPNP_FN_BITWISE_OR
+        DPNP_FN_BITWISE_XOR
         DPNP_FN_CBRT
         DPNP_FN_CEIL
+        DPNP_FN_CHOLESKY
+        DPNP_FN_COPYSIGN
+        DPNP_FN_CORRELATE
         DPNP_FN_COS
         DPNP_FN_COSH
         DPNP_FN_COV
         DPNP_FN_DEGREES
+        DPNP_FN_DET
         DPNP_FN_DIVIDE
         DPNP_FN_DOT
         DPNP_FN_EIG
+        DPNP_FN_EIGVALS
         DPNP_FN_EXP
         DPNP_FN_EXP2
         DPNP_FN_EXPM1
         DPNP_FN_FABS
+        DPNP_FN_FFT_FFT
         DPNP_FN_FLOOR
+        DPNP_FN_FLOOR_DIVIDE
         DPNP_FN_FMOD
-        DPNP_FN_GAUSSIAN
         DPNP_FN_HYPOT
+        DPNP_FN_INVERT
+        DPNP_FN_LEFT_SHIFT
         DPNP_FN_LOG
         DPNP_FN_LOG10
         DPNP_FN_LOG1P
         DPNP_FN_LOG2
         DPNP_FN_MATMUL
+        DPNP_FN_MATRIX_RANK
         DPNP_FN_MAX
         DPNP_FN_MAXIMUM
         DPNP_FN_MEAN
         DPNP_FN_MEDIAN
         DPNP_FN_MIN
         DPNP_FN_MINIMUM
+        DPNP_FN_MODF
         DPNP_FN_MULTIPLY
         DPNP_FN_POWER
         DPNP_FN_PROD
-        DPNP_FN_UNIFORM
         DPNP_FN_RADIANS
+        DPNP_FN_REMAINDER
         DPNP_FN_RECIP
+        DPNP_FN_RIGHT_SHIFT
+        DPNP_FN_RNG_BETA
+        DPNP_FN_RNG_BINOMIAL
+        DPNP_FN_RNG_CHISQUARE
+        DPNP_FN_RNG_EXPONENTIAL
+        DPNP_FN_RNG_GAMMA
+        DPNP_FN_RNG_GAUSSIAN
+        DPNP_FN_RNG_GEOMETRIC
+        DPNP_FN_RNG_GUMBEL
+        DPNP_FN_RNG_HYPERGEOMETRIC
+        DPNP_FN_RNG_LAPLACE
+        DPNP_FN_RNG_LOGNORMAL
+        DPNP_FN_RNG_MULTINOMIAL
+        DPNP_FN_RNG_MULTIVARIATE_NORMAL
+        DPNP_FN_RNG_NEGATIVE_BINOMIAL
+        DPNP_FN_RNG_NORMAL
+        DPNP_FN_RNG_POISSON
+        DPNP_FN_RNG_RAYLEIGH
+        DPNP_FN_RNG_STANDARD_CAUCHY
+        DPNP_FN_RNG_STANDARD_EXPONENTIAL
+        DPNP_FN_RNG_STANDARD_GAMMA
+        DPNP_FN_RNG_STANDARD_NORMAL
+        DPNP_FN_RNG_UNIFORM
+        DPNP_FN_RNG_WEIBULL
         DPNP_FN_SIGN
         DPNP_FN_SIN
         DPNP_FN_SINH
@@ -109,6 +147,8 @@ cdef DPNPFuncName get_DPNPFuncName_from_str(name):
         return DPNPFuncName.DPNP_FN_ARGSORT
     elif name == "dpnp_cov":
         return DPNPFuncName.DPNP_FN_COV
+    elif name == "dpnp_eig":
+        return DPNPFuncName.DPNP_FN_EIG
     else:
         return  DPNPFuncName.DPNP_FN_DOT
 

diff --git a/numba_dppy/dpnp_glue/dpnpdecl.py b/numba_dppy/dpnp_glue/dpnpdecl.py
@@ -0,0 +1,10 @@
+from numba.core.typing.templates import (AttributeTemplate, infer_getattr)
+import numba_dppy
+from numba import types
+
+@infer_getattr
+class DppyDpnpTemplate(AttributeTemplate):
+    key = types.Module(numba_dppy)
+
+    def resolve_dpnp(self, mod):
+        return types.Module(numba_dppy.dpnp)
diff --git a/numba_dppy/dpnp_glue/dpnpimpl.py b/numba_dppy/dpnp_glue/dpnpimpl.py
@@ -0,0 +1,89 @@
+from numba.core.imputils import lower_builtin
+import numba_dppy.experimental_numpy_lowering_overload as dpnp_lowering
+from numba import types
+from numba.core.typing import signature
+from numba.core.extending import overload, register_jitable
+from . import stubs
+import numpy as np
+from numba_dppy.dpctl_functions import _DPCTL_FUNCTIONS
+
+
+def get_dpnp_fptr(fn_name, type_names):
+    from . import dpnp_fptr_interface as dpnp_glue
+
+    f_ptr = dpnp_glue.get_dpnp_fn_ptr(fn_name, type_names)
+    return f_ptr
+
+
+@register_jitable
+def _check_finite_matrix(a):
+    for v in np.nditer(a):
+        if not np.isfinite(v.item()):
+            raise np.linalg.LinAlgError("Array must not contain infs or NaNs.")
+
+
+@register_jitable
+def _dummy_liveness_func(a):
+    """pass a list of variables to be preserved through dead code elimination"""
+    return a[0]
+
+
+class RetrieveDpnpFnPtr(types.ExternalFunctionPointer):
+    def __init__(self, fn_name, type_names, sig, get_pointer):
+        self.fn_name = fn_name
+        self.type_names = type_names
+        super(RetrieveDpnpFnPtr, self).__init__(sig, get_pointer)
+
+
+class _DPNP_EXTENSION:
+    def __init__(self, name):
+        dpnp_lowering.ensure_dpnp(name)
+
+    @classmethod
+    def dpnp_sum(cls, fn_name, type_names):
+        ret_type = types.void
+        sig = signature(ret_type, types.voidptr, types.voidptr, types.int64)
+        f_ptr = get_dpnp_fptr(fn_name, type_names)
+
+        def get_pointer(obj):
+            return f_ptr
+
+        return types.ExternalFunctionPointer(sig, get_pointer=get_pointer)
+
+
+@overload(stubs.dpnp.sum)
+def dpnp_sum_impl(a):
+    dpnp_extension = _DPNP_EXTENSION("sum")
+    dpctl_functions = _DPCTL_FUNCTIONS()
+
+    dpnp_sum = dpnp_extension.dpnp_sum("dpnp_sum", [a.dtype.name, "NONE"])
+
+    get_sycl_queue = dpctl_functions.dpctl_get_current_queue()
+    allocate_usm_shared = dpctl_functions.dpctl_malloc_shared()
+    copy_usm = dpctl_functions.dpctl_queue_memcpy()
+    free_usm = dpctl_functions.dpctl_free_with_queue()
+
+    def dpnp_sum_impl(a):
+        if a.size == 0:
+            raise ValueError("Passed Empty array")
+
+        sycl_queue = get_sycl_queue()
+        a_usm = allocate_usm_shared(a.size * a.itemsize, sycl_queue)
+        copy_usm(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize)
+
+        out_usm = allocate_usm_shared(a.itemsize, sycl_queue)
+
+        dpnp_sum(a_usm, out_usm, a.size)
+
+        out = np.empty(1, dtype=a.dtype)
+        copy_usm(sycl_queue, out.ctypes, out_usm, out.size * out.itemsize)
+
+        free_usm(a_usm, sycl_queue)
+        free_usm(out_usm, sycl_queue)
+
+
+        _dummy_liveness_func([out.size])
+
+        return out[0]
+
+    return dpnp_sum_impl
diff --git a/numba_dppy/dpnp_glue/stubs.py b/numba_dppy/dpnp_glue/stubs.py
@@ -0,0 +1,9 @@
+from numba_dppy.ocl.stubs import Stub
+
+class dpnp(Stub):
+    """dpnp namespace
+    """
+    _description_ = '<dpnp>'
+
+    class sum(Stub):
+        pass
diff --git a/numba_dppy/dppy_passbuilder.py b/numba_dppy/dppy_passbuilder.py
@@ -27,6 +27,8 @@
         DPPYNoPythonBackend
         )
 
+from .rename_numpy_functions_pass import DPPYRewriteOverloadedFunctions
+
 class DPPYPassBuilder(object):
     """
     This is the DPPY pass builder to run Intel GPU/CPU specific
@@ -44,6 +46,11 @@ def default_numba_nopython_pipeline(state, pm):
         pm.add_pass(IRProcessing, "processing IR")
         pm.add_pass(WithLifting, "Handle with contexts")
 
+        # this pass rewrites name of NumPy functions we intend to overload
+        pm.add_pass(DPPYRewriteOverloadedFunctions,
+                "Rewrite name of Numpy functions to overload already overloaded function",
+        )
+
         # this pass adds required logic to overload default implementation of
         # Numpy functions
         pm.add_pass(DPPYAddNumpyOverloadPass, "dppy add typing template for Numpy functions")

diff --git a/numba_dppy/dppy_passes.py b/numba_dppy/dppy_passes.py
@@ -3,6 +3,7 @@
 import warnings
 
 import numpy as np
+import numba
 from numba.core import ir
 import weakref
 from collections import namedtuple, deque
@@ -49,7 +50,7 @@ def __init__(self):
     def run_pass(self, state):
         if dpnp_available():
             typingctx = state.typingctx
-            from numba.core.typing.templates import builtin_registry as reg, infer_global
+            from numba.core.typing.templates import (builtin_registry as reg, infer_global)
             from numba.core.typing.templates import (AbstractTemplate, CallableTemplate, signature)
             from numba.core.typing.npydecl import MatMulTyperMixin