Skip to content
This repository was archived by the owner on Jan 25, 2023. It is now read-only.

Commit 45b3a24

Browse files
authored
Merge pull request #8 from AlexanderKalistratov/akalist/cpu_fallback
Initial fallback implementation
2 parents 5f0a8cc + bffd454 commit 45b3a24

File tree

3 files changed

+128
-2
lines changed

3 files changed

+128
-2
lines changed

numba/dppl/dppl_lowerer.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,13 @@
3535
from numba.core.typing import signature
3636

3737
import warnings
38-
from numba.core.errors import NumbaParallelSafetyWarning
38+
from numba.core.errors import NumbaParallelSafetyWarning, NumbaPerformanceWarning
3939

4040
from .target import SPIR_GENERIC_ADDRSPACE
4141
from .dufunc_inliner import dufunc_inliner
4242
from . import dppl_host_fn_call_gen as dppl_call_gen
4343
import dppl.ocldrv as driver
44+
from numba.dppl.target import DPPLTargetContext
4445

4546

4647
def _print_block(block):
@@ -956,10 +957,72 @@ def load_range(v):
956957

957958
from numba.core.lowering import Lower
958959

960+
959961
class DPPLLower(Lower):
960962
def __init__(self, context, library, fndesc, func_ir, metadata=None):
961963
Lower.__init__(self, context, library, fndesc, func_ir, metadata)
962-
lowering.lower_extensions[parfor.Parfor] = _lower_parfor_gufunc
964+
965+
fndesc_cpu = copy.copy(fndesc)
966+
fndesc_cpu.calltypes = fndesc.calltypes.copy()
967+
fndesc_cpu.typemap = fndesc.typemap.copy()
968+
969+
cpu_context = context.cpu_context if isinstance(context, DPPLTargetContext) else context
970+
self.gpu_lower = Lower(context, library, fndesc, func_ir.copy(), metadata)
971+
self.cpu_lower = Lower(cpu_context, library, fndesc_cpu, func_ir.copy(), metadata)
972+
973+
def lower(self):
974+
# Basically we are trying to lower on GPU first and if failed - try to lower on CPU.
975+
# This happens in next order:
976+
# 1. Start lowering of parent function
977+
# 2. Try to lower parfor on GPU
978+
# 2.a. enter lower_parfor_rollback and prepare function to lower on GPU - insert get_global_id.
979+
# 2.a.a. starting lower parfor body - enter this point (DPPLLower.lower()) second time.
980+
# 2.a.b. If lowering on GPU failed - try on CPU.
981+
# 2.a.d. Since get_global_id is NOT supported with CPU context - fail and throw exception
982+
# 2.b. in lower_parfor_rollback catch exception and restore parfor body and other to its initial state
983+
# 2.c. in lower_parfor_rollback throw expeption to catch it here (DPPLLower.lower())
984+
# 3. Catch exception and start parfor lowering with CPU context.
985+
986+
# WARNING: this approach only works in case no device specific modifications were added to
987+
# parent function (function with parfor). In case parent function was patched with device specific
988+
# different solution should be used.
989+
try:
990+
lowering.lower_extensions[parfor.Parfor] = lower_parfor_rollback
991+
self.gpu_lower.lower()
992+
self.base_lower = self.gpu_lower
993+
lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel
994+
except:
995+
lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel
996+
self.cpu_lower.lower()
997+
self.base_lower = self.cpu_lower
998+
999+
self.env = self.base_lower.env
1000+
self.call_helper = self.base_lower.call_helper
1001+
1002+
def create_cpython_wrapper(self, release_gil=False):
1003+
return self.base_lower.create_cpython_wrapper(release_gil)
1004+
1005+
1006+
def lower_parfor_rollback(lowerer, parfor):
1007+
cache_parfor_races = copy.copy(parfor.races)
1008+
cache_parfor_params = copy.copy(parfor.params)
1009+
cache_parfor_loop_body = copy.deepcopy(parfor.loop_body)
1010+
cache_parfor_init_block = parfor.init_block.copy()
1011+
cache_parfor_loop_nests = parfor.loop_nests.copy()
1012+
1013+
try:
1014+
_lower_parfor_gufunc(lowerer, parfor)
1015+
except Exception as e:
1016+
msg = ("Failed to lower parfor on GPU")
1017+
warnings.warn(NumbaPerformanceWarning(msg, parfor.loc))
1018+
raise e
1019+
finally:
1020+
parfor.params = cache_parfor_params
1021+
parfor.loop_body = cache_parfor_loop_body
1022+
parfor.init_block = cache_parfor_init_block
1023+
parfor.loop_nests = cache_parfor_loop_nests
1024+
parfor.races = cache_parfor_races
1025+
9631026

9641027
def dppl_lower_array_expr(lowerer, expr):
9651028
raise NotImplementedError(expr)

numba/dppl/target.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from numba.core.utils import cached_property
1212
from numba.core import datamodel
1313
from numba.core.base import BaseContext
14+
from numba.core.registry import cpu_target
1415
from numba.core.callconv import MinimalCallConv
1516
from . import codegen
1617

@@ -105,6 +106,12 @@ def init(self):
105106
from numba.np.ufunc_db import _ufunc_db as ufunc_db
106107
self.ufunc_db = copy.deepcopy(ufunc_db)
107108

109+
from numba.core.cpu import CPUContext
110+
from numba.core.typing import Context as TypingContext
111+
112+
self.cpu_context = cpu_target.target_context
113+
114+
108115

109116
def replace_numpy_ufunc_with_opencl_supported_functions(self):
110117
from numba.dppl.ocl.mathimpl import lower_ocl_impl, sig_mapper
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from __future__ import print_function, division, absolute_import
2+
3+
import numpy as np
4+
5+
import numba
6+
from numba import dppl
7+
from numba.dppl.testing import unittest
8+
from numba.dppl.testing import DPPLTestCase
9+
import dppl.ocldrv as ocldrv
10+
import sys
11+
import io
12+
13+
14+
@unittest.skipUnless(ocldrv.has_gpu_device, 'test only on GPU system')
15+
class TestDPPLFallback(DPPLTestCase):
16+
17+
def capture_stderr(self, func):
18+
backup = sys.stderr
19+
sys.stderr = io.StringIO()
20+
result = func()
21+
out = sys.stderr.getvalue()
22+
sys.stderr.close()
23+
sys.stderr = backup
24+
25+
return out, result
26+
27+
def test_dppl_fallback(self):
28+
29+
@numba.jit
30+
def fill_value(i):
31+
return i
32+
33+
def np_rand_fallback():
34+
x = 10
35+
a = np.empty(shape=x, dtype=np.float32)
36+
37+
for i in numba.prange(x):
38+
a[i] = fill_value(i)
39+
40+
return a
41+
42+
def run_dppl():
43+
dppl = numba.njit(parallel={'offload':True})(np_rand_fallback)
44+
return dppl()
45+
46+
ref = np_rand_fallback
47+
48+
err, dppl_result = self.capture_stderr(run_dppl)
49+
ref_result = ref()
50+
51+
np.testing.assert_array_equal(dppl_result, ref_result)
52+
self.assertTrue('Failed to lower parfor on GPU' in err)
53+
54+
55+
if __name__ == '__main__':
56+
unittest.main()

0 commit comments

Comments
 (0)