|
35 | 35 | from numba.core.typing import signature
|
36 | 36 |
|
37 | 37 | import warnings
|
38 |
| -from numba.core.errors import NumbaParallelSafetyWarning |
| 38 | +from numba.core.errors import NumbaParallelSafetyWarning, NumbaPerformanceWarning |
39 | 39 |
|
40 | 40 | from .target import SPIR_GENERIC_ADDRSPACE
|
41 | 41 | from .dufunc_inliner import dufunc_inliner
|
42 | 42 | from . import dppl_host_fn_call_gen as dppl_call_gen
|
43 | 43 | import dppl.ocldrv as driver
|
| 44 | +from numba.dppl.target import DPPLTargetContext |
44 | 45 |
|
45 | 46 |
|
46 | 47 | def _print_block(block):
|
@@ -956,10 +957,72 @@ def load_range(v):
|
956 | 957 |
|
957 | 958 | from numba.core.lowering import Lower
|
958 | 959 |
|
| 960 | + |
959 | 961 | class DPPLLower(Lower):
|
960 | 962 | def __init__(self, context, library, fndesc, func_ir, metadata=None):
|
961 | 963 | Lower.__init__(self, context, library, fndesc, func_ir, metadata)
|
962 |
| - lowering.lower_extensions[parfor.Parfor] = _lower_parfor_gufunc |
| 964 | + |
| 965 | + fndesc_cpu = copy.copy(fndesc) |
| 966 | + fndesc_cpu.calltypes = fndesc.calltypes.copy() |
| 967 | + fndesc_cpu.typemap = fndesc.typemap.copy() |
| 968 | + |
| 969 | + cpu_context = context.cpu_context if isinstance(context, DPPLTargetContext) else context |
| 970 | + self.gpu_lower = Lower(context, library, fndesc, func_ir.copy(), metadata) |
| 971 | + self.cpu_lower = Lower(cpu_context, library, fndesc_cpu, func_ir.copy(), metadata) |
| 972 | + |
| 973 | + def lower(self): |
| 974 | + # Basically we are trying to lower on GPU first and if failed - try to lower on CPU. |
| 975 | + # This happens in next order: |
| 976 | + # 1. Start lowering of parent function |
| 977 | + # 2. Try to lower parfor on GPU |
| 978 | + # 2.a. enter lower_parfor_rollback and prepare function to lower on GPU - insert get_global_id. |
| 979 | + # 2.a.a. starting lower parfor body - enter this point (DPPLLower.lower()) second time. |
| 980 | + # 2.a.b. If lowering on GPU failed - try on CPU. |
| 981 | + # 2.a.d. Since get_global_id is NOT supported with CPU context - fail and throw exception |
| 982 | + # 2.b. in lower_parfor_rollback catch exception and restore parfor body and other to its initial state |
| 983 | + # 2.c. in lower_parfor_rollback throw expeption to catch it here (DPPLLower.lower()) |
| 984 | + # 3. Catch exception and start parfor lowering with CPU context. |
| 985 | + |
| 986 | + # WARNING: this approach only works in case no device specific modifications were added to |
| 987 | + # parent function (function with parfor). In case parent function was patched with device specific |
| 988 | + # different solution should be used. |
| 989 | + try: |
| 990 | + lowering.lower_extensions[parfor.Parfor] = lower_parfor_rollback |
| 991 | + self.gpu_lower.lower() |
| 992 | + self.base_lower = self.gpu_lower |
| 993 | + lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel |
| 994 | + except: |
| 995 | + lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel |
| 996 | + self.cpu_lower.lower() |
| 997 | + self.base_lower = self.cpu_lower |
| 998 | + |
| 999 | + self.env = self.base_lower.env |
| 1000 | + self.call_helper = self.base_lower.call_helper |
| 1001 | + |
| 1002 | + def create_cpython_wrapper(self, release_gil=False): |
| 1003 | + return self.base_lower.create_cpython_wrapper(release_gil) |
| 1004 | + |
| 1005 | + |
| 1006 | +def lower_parfor_rollback(lowerer, parfor): |
| 1007 | + cache_parfor_races = copy.copy(parfor.races) |
| 1008 | + cache_parfor_params = copy.copy(parfor.params) |
| 1009 | + cache_parfor_loop_body = copy.deepcopy(parfor.loop_body) |
| 1010 | + cache_parfor_init_block = parfor.init_block.copy() |
| 1011 | + cache_parfor_loop_nests = parfor.loop_nests.copy() |
| 1012 | + |
| 1013 | + try: |
| 1014 | + _lower_parfor_gufunc(lowerer, parfor) |
| 1015 | + except Exception as e: |
| 1016 | + msg = ("Failed to lower parfor on GPU") |
| 1017 | + warnings.warn(NumbaPerformanceWarning(msg, parfor.loc)) |
| 1018 | + raise e |
| 1019 | + finally: |
| 1020 | + parfor.params = cache_parfor_params |
| 1021 | + parfor.loop_body = cache_parfor_loop_body |
| 1022 | + parfor.init_block = cache_parfor_init_block |
| 1023 | + parfor.loop_nests = cache_parfor_loop_nests |
| 1024 | + parfor.races = cache_parfor_races |
| 1025 | + |
963 | 1026 |
|
964 | 1027 | def dppl_lower_array_expr(lowerer, expr):
|
965 | 1028 | raise NotImplementedError(expr)
|
0 commit comments