Skip to content
This repository was archived by the owner on Jan 25, 2023. It is now read-only.

Initial fallback implementation #8

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 65 additions & 2 deletions numba/dppl/dppl_lowerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@
from numba.core.typing import signature

import warnings
from numba.core.errors import NumbaParallelSafetyWarning
from numba.core.errors import NumbaParallelSafetyWarning, NumbaPerformanceWarning

from .target import SPIR_GENERIC_ADDRSPACE
from .dufunc_inliner import dufunc_inliner
from . import dppl_host_fn_call_gen as dppl_call_gen
import dppl.ocldrv as driver
from numba.dppl.target import DPPLTargetContext


def _print_block(block):
Expand Down Expand Up @@ -956,10 +957,72 @@ def load_range(v):

from numba.core.lowering import Lower


class DPPLLower(Lower):
def __init__(self, context, library, fndesc, func_ir, metadata=None):
Lower.__init__(self, context, library, fndesc, func_ir, metadata)
lowering.lower_extensions[parfor.Parfor] = _lower_parfor_gufunc

fndesc_cpu = copy.copy(fndesc)
fndesc_cpu.calltypes = fndesc.calltypes.copy()
fndesc_cpu.typemap = fndesc.typemap.copy()

cpu_context = context.cpu_context if isinstance(context, DPPLTargetContext) else context
self.gpu_lower = Lower(context, library, fndesc, func_ir.copy(), metadata)
self.cpu_lower = Lower(cpu_context, library, fndesc_cpu, func_ir.copy(), metadata)

def lower(self):
# Basically we are trying to lower on GPU first and if failed - try to lower on CPU.
# This happens in next order:
# 1. Start lowering of parent function
# 2. Try to lower parfor on GPU
# 2.a. enter lower_parfor_rollback and prepare function to lower on GPU - insert get_global_id.
# 2.a.a. starting lower parfor body - enter this point (DPPLLower.lower()) second time.
# 2.a.b. If lowering on GPU failed - try on CPU.
# 2.a.d. Since get_global_id is NOT supported with CPU context - fail and throw exception
# 2.b. in lower_parfor_rollback catch exception and restore parfor body and other to its initial state
# 2.c. in lower_parfor_rollback throw expeption to catch it here (DPPLLower.lower())
# 3. Catch exception and start parfor lowering with CPU context.

# WARNING: this approach only works in case no device specific modifications were added to
# parent function (function with parfor). In case parent function was patched with device specific
# different solution should be used.
try:
lowering.lower_extensions[parfor.Parfor] = lower_parfor_rollback
self.gpu_lower.lower()
self.base_lower = self.gpu_lower
lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel
except:
lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel
self.cpu_lower.lower()
self.base_lower = self.cpu_lower

self.env = self.base_lower.env
self.call_helper = self.base_lower.call_helper

def create_cpython_wrapper(self, release_gil=False):
return self.base_lower.create_cpython_wrapper(release_gil)


def lower_parfor_rollback(lowerer, parfor):
cache_parfor_races = copy.copy(parfor.races)
cache_parfor_params = copy.copy(parfor.params)
cache_parfor_loop_body = copy.deepcopy(parfor.loop_body)
cache_parfor_init_block = parfor.init_block.copy()
cache_parfor_loop_nests = parfor.loop_nests.copy()

try:
_lower_parfor_gufunc(lowerer, parfor)
except Exception as e:
msg = ("Failed to lower parfor on GPU")
warnings.warn(NumbaPerformanceWarning(msg, parfor.loc))
raise e
finally:
parfor.params = cache_parfor_params
parfor.loop_body = cache_parfor_loop_body
parfor.init_block = cache_parfor_init_block
parfor.loop_nests = cache_parfor_loop_nests
parfor.races = cache_parfor_races


def dppl_lower_array_expr(lowerer, expr):
raise NotImplementedError(expr)
7 changes: 7 additions & 0 deletions numba/dppl/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from numba.core.utils import cached_property
from numba.core import datamodel
from numba.core.base import BaseContext
from numba.core.registry import cpu_target
from numba.core.callconv import MinimalCallConv
from . import codegen

Expand Down Expand Up @@ -105,6 +106,12 @@ def init(self):
from numba.np.ufunc_db import _ufunc_db as ufunc_db
self.ufunc_db = copy.deepcopy(ufunc_db)

from numba.core.cpu import CPUContext
from numba.core.typing import Context as TypingContext

self.cpu_context = cpu_target.target_context



def replace_numpy_ufunc_with_opencl_supported_functions(self):
from numba.dppl.ocl.mathimpl import lower_ocl_impl, sig_mapper
Expand Down
56 changes: 56 additions & 0 deletions numba/dppl/tests/dppl/test_dppl_fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from __future__ import print_function, division, absolute_import

import numpy as np

import numba
from numba import dppl
from numba.dppl.testing import unittest
from numba.dppl.testing import DPPLTestCase
import dppl.ocldrv as ocldrv
import sys
import io


@unittest.skipUnless(ocldrv.has_gpu_device, 'test only on GPU system')
class TestDPPLFallback(DPPLTestCase):

def capture_stderr(self, func):
backup = sys.stderr
sys.stderr = io.StringIO()
result = func()
out = sys.stderr.getvalue()
sys.stderr.close()
sys.stderr = backup

return out, result

def test_dppl_fallback(self):

@numba.jit
def fill_value(i):
return i

def np_rand_fallback():
x = 10
a = np.empty(shape=x, dtype=np.float32)

for i in numba.prange(x):
a[i] = fill_value(i)

return a

def run_dppl():
dppl = numba.njit(parallel={'offload':True})(np_rand_fallback)
return dppl()

ref = np_rand_fallback

err, dppl_result = self.capture_stderr(run_dppl)
ref_result = ref()

np.testing.assert_array_equal(dppl_result, ref_result)
self.assertTrue('Failed to lower parfor on GPU' in err)


if __name__ == '__main__':
unittest.main()