Skip to content

Update to Numba 0.54 #457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ https://intelpython.github.io/dpnp/

## Dependencies

* numba 0.53.* (IntelPython/numba)
* numba 0.53..0.54 (IntelPython/numba)
* dpctl 0.8.*
* dpnp 0.6.* (optional)
* llvm-spirv (SPIRV generation from LLVM IR)
Expand Down
4 changes: 2 additions & 2 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ requirements:
- python
- setuptools
- cython
- numba 0.53*
- numba >=0.53.0,<0.55*
- dpctl 0.8.*
- dpnp >=0.6*,<0.7* # [linux]
- wheel
run:
- python
- numba 0.53*
- numba >=0.53.0,<0.55*
- dpctl 0.8.*
- spirv-tools
- llvm-spirv
Expand Down
2 changes: 1 addition & 1 deletion docs/user_guides/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Installation

Numba-dppy depends on following components:

* numba 0.53.* (`Intel Python Numba`_)
* numba 0.53..0.54 (`Intel Python Numba`_)
* dpctl 0.8.* (`Intel Python dpctl`_)
* dpnp >=0.6.* (optional, `Intel Python DPNP`_)
* `llvm-spirv`_ (SPIRV generation from LLVM IR)
Expand Down
6 changes: 3 additions & 3 deletions numba_dppy/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from llvmlite import binding as ll
from llvmlite.llvmpy import core as lc

from numba.core.codegen import BaseCPUCodegen, CodeLibrary
from numba.core.codegen import CPUCodegen, CPUCodeLibrary
from numba.core import utils

from numba_dppy import config
Expand All @@ -35,7 +35,7 @@
}


class SPIRVCodeLibrary(CodeLibrary):
class SPIRVCodeLibrary(CPUCodeLibrary):
def _optimize_functions(self, ll_module):
pass

Expand Down Expand Up @@ -67,7 +67,7 @@ def get_asm_str(self):
return None


class JITSPIRVCodegen(BaseCPUCodegen):
class JITSPIRVCodegen(CPUCodegen):
"""
This codegen implementation generates optimized SPIR 2.0
"""
Expand Down
8 changes: 5 additions & 3 deletions numba_dppy/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from numba.core.typing.templates import ConcreteTemplate
from numba.core import types, compiler, ir
from numba.core.typing.templates import AbstractTemplate
from numba.core.compiler_lock import global_compiler_lock
import ctypes
from types import FunctionType
from inspect import signature
Expand Down Expand Up @@ -89,6 +90,7 @@ def define_pipelines(self):
return pms


@global_compiler_lock
def compile_with_dppy(pyfunc, return_type, args, debug=None):
# First compilation will trigger the initialization of the OpenCL backend.
from .descriptor import dppy_target
Expand All @@ -99,9 +101,9 @@ def compile_with_dppy(pyfunc, return_type, args, debug=None):
flags = compiler.Flags()
# Do not compile (generate native code), just lower (to LLVM)
flags.debuginfo = config.DEBUGINFO_DEFAULT
flags.set("no_compile")
flags.set("no_cpython_wrapper")
flags.unset("nrt")
flags.no_compile = True
flags.no_cpython_wrapper = True
flags.nrt = False

if debug is not None:
flags.debuginfo = debug
Expand Down
6 changes: 2 additions & 4 deletions numba_dppy/descriptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,11 @@

class DPPYTarget(TargetDescriptor):
options = CPUTargetOptions
# typingctx = DPPYTypingContext()
# targetctx = DPPYTargetContext(typingctx)

@utils.cached_property
def _toplevel_target_context(self):
# Lazily-initialized top-level target context, for all threads
return DPPYTargetContext(self.typing_context)
return DPPYTargetContext(self.typing_context, self._target_name)

@utils.cached_property
def _toplevel_typing_context(self):
Expand All @@ -52,4 +50,4 @@ def typing_context(self):


# The global DPPY target
dppy_target = DPPYTarget()
dppy_target = DPPYTarget("SyclDevice")
2 changes: 1 addition & 1 deletion numba_dppy/dppy_lowerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def _lower_parfor_gufunc(lowerer, parfor):

# compile parfor body as a separate function to be used with GUFuncWrapper
flags = copy.copy(parfor.flags)
flags.set("error_model", "numpy")
flags.error_model = "numpy"

# Can't get here unless flags.set('auto_parallel', ParallelOptions(True))
index_var_typ = typemap[parfor.loop_nests[0].index_variable.name]
Expand Down
10 changes: 7 additions & 3 deletions numba_dppy/dppy_offload_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
# limitations under the License.

from numba.core import dispatcher, compiler
from numba.core.registry import cpu_target, dispatcher_registry
from numba.core.registry import cpu_target
from numba.core.target_extension import dispatcher_registry, target_registry
from numba_dppy import config
from numba_dppy.target import SyclDevice


class DppyOffloadDispatcher(dispatcher.Dispatcher):
Expand Down Expand Up @@ -60,5 +62,7 @@ def __init__(
)


dispatcher_registry["__dppy_offload_gpu__"] = DppyOffloadDispatcher
dispatcher_registry["__dppy_offload_cpu__"] = DppyOffloadDispatcher
target_registry['__dppy_offload_gpu__'] = SyclDevice
target_registry['__dppy_offload_cpu__'] = SyclDevice

dispatcher_registry[SyclDevice] = DppyOffloadDispatcher
6 changes: 4 additions & 2 deletions numba_dppy/dppy_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

from numba.parfors.parfor import (
PreParforPass as _parfor_PreParforPass,
replace_functions_map,
swap_functions_map,
)
from numba.parfors.parfor import ParforPass as _parfor_ParforPass
from numba.parfors.parfor import Parfor
Expand Down Expand Up @@ -173,7 +173,7 @@ def run_pass(self, state):

# Ensure we have an IR and type information.
assert state.func_ir
functions_map = replace_functions_map.copy()
functions_map = swap_functions_map.copy()
functions_map.pop(("dot", "numpy"), None)
functions_map.pop(("sum", "numpy"), None)
functions_map.pop(("prod", "numpy"), None)
Expand All @@ -188,6 +188,7 @@ def run_pass(self, state):
state.type_annotation.typemap,
state.type_annotation.calltypes,
state.typingctx,
state.targetctx,
state.flags.auto_parallel,
state.parfor_diagnostics.replaced_fns,
replace_functions_map=functions_map,
Expand Down Expand Up @@ -223,6 +224,7 @@ def run_pass(self, state):
state.type_annotation.calltypes,
state.return_type,
state.typingctx,
state.targetctx,
state.flags.auto_parallel,
state.flags,
state.metadata,
Expand Down
4 changes: 2 additions & 2 deletions numba_dppy/driver/dpctl_capi_fn_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import llvmlite.llvmpy.core as lc
from llvmlite.ir import builder
from numba.core import types
from numba.core import types, cgutils

import numba_dppy.utils as utils

Expand Down Expand Up @@ -48,7 +48,7 @@ def _build_dpctl_function(builder, return_ty, arg_list, func_name):

"""
func_ty = lc.Type.function(return_ty, arg_list)
fn = builder.module.get_or_insert_function(func_ty, func_name)
fn = cgutils.get_or_insert_function(builder.module, func_ty, func_name)
return fn

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions numba_dppy/numpy_usm_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,13 @@ def allocator_UsmArray(context, builder, size, align):

# Get the Numba external allocator for USM memory.
ext_allocator_fnty = ir.FunctionType(cgutils.voidptr_t, [])
ext_allocator_fn = mod.get_or_insert_function(
ext_allocator_fn = cgutils.get_or_insert_function(mod,
ext_allocator_fnty, name="usmarray_get_ext_allocator"
)
ext_allocator = builder.call(ext_allocator_fn, [])
# Get the Numba function to allocate an aligned array with an external allocator.
fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32, cgutils.voidptr_t])
fn = mod.get_or_insert_function(
fn = cgutils.get_or_insert_function(mod,
fnty, name="NRT_MemInfo_alloc_safe_aligned_external"
)
fn.return_value.add_attribute("noalias")
Expand Down
6 changes: 3 additions & 3 deletions numba_dppy/ocl/oclimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _declare_function(context, builder, name, sig, cargs, mangler=mangle_c):
llargs = [context.get_value_type(t) for t in sig.args]
fnty = Type.function(llretty, llargs)
mangled = mangler(name, cargs)
fn = mod.get_or_insert_function(fnty, mangled)
fn = cgutils.get_or_insert_function(mod, fnty, mangled)
fn.calling_convention = target.CC_SPIR_FUNC
return fn

Expand Down Expand Up @@ -226,7 +226,7 @@ def insert_and_call_atomic_fn(
llargs = [ll_p, context.get_value_type(sig.args[2])]
fnty = ir.FunctionType(llretty, llargs)

fn = mod.get_or_insert_function(fnty, name)
fn = cgutils.get_or_insert_function(mod, fnty, name)
fn.calling_convention = target.CC_SPIR_FUNC

generic_ptr = context.addrspacecast(builder, ptr, address_space.GENERIC)
Expand Down Expand Up @@ -291,7 +291,7 @@ def native_atomic_add(context, builder, sig, args):
)

fnty = ir.FunctionType(retty, spirv_fn_arg_types)
fn = builder.module.get_or_insert_function(fnty, mangled_fn_name)
fn = cgutils.get_or_insert_function(builder.module, fnty, mangled_fn_name)
fn.calling_convention = target.CC_SPIR_FUNC

sycl_memory_order = atomic_helper.sycl_memory_order.relaxed
Expand Down
2 changes: 1 addition & 1 deletion numba_dppy/printimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
def declare_print(lmod):
voidptrty = lc.Type.pointer(lc.Type.int(8), addrspace=address_space.GENERIC)
printfty = lc.Type.function(lc.Type.int(), [voidptrty], var_arg=True)
printf = lmod.get_or_insert_function(printfty, "printf")
printf = cgutils.get_or_insert_function(lmod, printfty, "printf")
return printf


Expand Down
24 changes: 20 additions & 4 deletions numba_dppy/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from . import codegen
from numba_dppy.dppy_array_type import DPPYArray, DPPYArrayModel
from numba_dppy.utils import npytypes_array_to_dppy_array, address_space, calling_conv
from numba.core.target_extension import GPU, target_registry


CC_SPIR_KERNEL = "spir_kernel"
Expand Down Expand Up @@ -104,6 +105,13 @@ def _init_data_model_manager():

spirv_data_model_manager = _init_data_model_manager()

class SyclDevice(GPU):
"""Mark the hardware target as SYCL Device.
"""


target_registry['SyclDevice'] = SyclDevice


class DPPYTargetContext(BaseContext):
"""A numba_dppy-specific target context inheriting Numba's ``BaseContext``.
Expand Down Expand Up @@ -185,7 +193,7 @@ def _finalize_wrapper_module(self, fn):
fn.calling_convention = CC_SPIR_KERNEL

# Mark kernels
ocl_kernels = mod.get_or_insert_named_metadata("opencl.kernels")
ocl_kernels = cgutils.get_or_insert_named_metadata(mod, "opencl.kernels")
ocl_kernels.add(
lc.MetaData.get(
mod,
Expand All @@ -209,7 +217,7 @@ def _finalize_wrapper_module(self, fn):
]

for name in others:
nmd = mod.get_or_insert_named_metadata(name)
nmd = cgutils.get_or_insert_named_metadata(mod, name)
if not nmd.operands:
nmd.add(empty_md)

Expand Down Expand Up @@ -247,13 +255,17 @@ def _generate_kernel_wrapper(self, func, argtypes):
module.get_function(func.name).linkage = "internal"
return wrapper

def __init__(self, typingctx, target='SyclDevice'):
super().__init__(typingctx, target)

def init(self):
self._internal_codegen = codegen.JITSPIRVCodegen("numba_dppy.jit")
self._target_data = ll.create_target_data(
codegen.SPIR_DATA_LAYOUT[utils.MACHINE_BITS]
)
# Override data model manager to SPIR model
self.data_model_manager = spirv_data_model_manager
import numba.cpython.unicode
self.data_model_manager = _init_data_model_manager()
Comment on lines +267 to +268
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@reazulhoque
What is this code for?

self.extra_compile_options = dict()

from numba.np.ufunc_db import _lazy_init_db
Expand All @@ -265,6 +277,10 @@ def init(self):
self.ufunc_db = copy.deepcopy(ufunc_db)
self.cpu_context = cpu_target.target_context

# Overrides
def create_module(self, name):
return self._internal_codegen._create_empty_module(name)

def replace_numpy_ufunc_with_opencl_supported_functions(self):
from numba_dppy.ocl.mathimpl import lower_ocl_impl, sig_mapper

Expand Down Expand Up @@ -373,7 +389,7 @@ def declare_function(self, module, fndesc):

"""
fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
fn = module.get_or_insert_function(fnty, name=fndesc.mangled_name)
fn = cgutils.get_or_insert_function(module, fnty, name=fndesc.mangled_name)
if not self.enable_debuginfo:
fn.attributes.add("alwaysinline")
ret = super(DPPYTargetContext, self).declare_function(module, fndesc)
Expand Down
20 changes: 11 additions & 9 deletions numba_dppy/target_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from numba.core import registry, serialize, dispatcher
from numba import types
from numba.core.errors import UnsupportedError
from numba.core.target_extension import resolve_dispatcher_from_str, target_registry, dispatcher_registry
import dpctl
from numba.core.compiler_lock import global_compiler_lock

Expand Down Expand Up @@ -90,30 +91,31 @@ def get_current_disp(self):

if target is None:
if dpctl.get_current_device_type() == dpctl.device_type.gpu:
return registry.dispatcher_registry[
return dispatcher_registry[target_registry[
TargetDispatcher.target_offload_gpu
]
]]
elif dpctl.get_current_device_type() == dpctl.device_type.cpu:
return registry.dispatcher_registry[
return dispatcher_registry[target_registry[
TargetDispatcher.target_offload_cpu
]
]]
else:
if dpctl.is_in_device_context():
raise UnsupportedError("Unknown dppy device type")
if offload:
if dpctl.has_gpu_queues():
return registry.dispatcher_registry[
return dispatcher_registry[target_registry[
TargetDispatcher.target_offload_gpu
]
]]
elif dpctl.has_cpu_queues():
return registry.dispatcher_registry[
return dispatcher_registry[target_registry[
TargetDispatcher.target_offload_cpu
]
]]

if target is None:
target = "cpu"

return registry.dispatcher_registry[target]
return resolve_dispatcher_from_str(target)


def _reduce_states(self):
return dict(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def spirv_compile():
packages = find_packages(include=["numba_dppy", "numba_dppy.*"])
build_requires = ["cython"]
install_requires = [
"numba >={},<{}".format("0.53.1", "0.54"),
"numba >={},<{}".format("0.53.1", "0.55"),
"dpctl",
]

Expand Down