Open
Description
I'm running on Gen9 and dppy 17.4 and have sporadic inaccurate results relative to numpy for the following code
import argparse
import math
import time
import dpctl
import numba
import numpy as np
import numpy.random as rnd
import numba_dppy as dppy
import numba_dppy
from numba_dppy import kernel, atomic, DEFAULT_LOCAL_SIZE
atomic_add = atomic.add
SEED = 777777
DTYPE = np.float32
@kernel(access_types={"read_only": ["a", "b"], "write_only": ["c"]})
def l2_distance_kernel(a, b, c):
i = numba_dppy.get_global_id(0)
j = numba_dppy.get_global_id(1)
sub = a[i, j] - b[i, j]
sq = sub**2
atomic_add(c, 0, sq)
def gen_data(nopt, dims, dtype=DTYPE):
rnd.seed(SEED)
return rnd.random((nopt, dims)).astype(dtype), rnd.random((nopt, dims)).astype(dtype)
def l2_distance_python(a, b):
return np.linalg.norm(a - b)
def run(sizes=3, step=2, nopt=2**20):
parser = argparse.ArgumentParser(description="Black-Scholes")
parser.add_argument("--iter", dest="iter", type=int, default=10)
args = parser.parse_args()
dims = 1
for _ in range(sizes):
# Use the environment variable SYCL_DEVICE_FILTER to change the default device.
# See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
device = dpctl.select_default_device()
print("Using device ...", device)
device.print_device_info()
X, Y = gen_data(nopt, dims, np.float32)
distance = np.asarray([0.0]).astype(np.float32)
p_dis = l2_distance_python(X, Y)
n_dis = 0
with dpctl.device_context(device):
l2_distance_kernel[(X.shape[0], X.shape[1]), DEFAULT_LOCAL_SIZE](X, Y, distance)
if int(distance) >= 0:
n_dis = math.sqrt(distance)
if np.allclose(n_dis, p_dis, rtol=1e-05 * np.sqrt(nopt)):
print("Test succeeded for size", nopt, ". Python dis: ", p_dis, " Numba dis: ", n_dis, "\n")
else:
print("Test failed for size", nopt, ". Python dis: ", p_dis, " Numba dis: ", n_dis, "\n")
nopt *= step
print("Done...")
if __name__ == "__main__":
run()
the results is the following
(dppy_bench) geexie@geek-box:~/code/dpbench$ IGC_ShaderDumpEnable=1 IGC_DumpToCurrentDir=1 ICG_DumpCompilerStats=1 NUMBA_DPPY_OFFLOAD_DIAGNOSTICS=1 NUMBA_DPPY_SAVE_IR_FILES=1 NUMBA_DPPY_FALLBACK_ON_CPU=0 python l2_distance.py
Using device ... <dpctl.SyclDevice [backend_type.level_zero, device_type.gpu, Intel(R) UHD Graphics [0x9bca]] at 0x7f9484631bb0>
Name Intel(R) UHD Graphics [0x9bca]
Driver version 1.2.21786
Vendor Intel(R) Corporation
Profile FULL_PROFILE
Filter string level_zero:gpu:0
================================================================================
Parallel Accelerator Optimizing: Function l2_distance_kernel,
/localdisk/dpbench/l2_distance.py (31)
================================================================================
No source available
------------------------------ After Optimisation ------------------------------
Parallel structure is already optimal.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
------------------------------- Auto-offloading --------------------------------
Parallel structure is already optimal.
Device - 'level_zero:gpu:0'
--------------------------------------------------------------------------------
-------------------------------Generated LLVM IR--------------------------------
generated_llvm.ir
================================================================================
-----------------------------Generated LLVM Bitcode-----------------------------
generated_llvm.bc
================================================================================
--------------------------------Generated SPIRV---------------------------------
generated_spirv.spir
================================================================================
Test succeeded for size 1048576 . Python dis: 417.9472 Numba dis: 417.8261263671768
Using device ... <dpctl.SyclDevice [backend_type.level_zero, device_type.gpu, Intel(R) UHD Graphics [0x9bca]] at 0x7f9484631470>
Name Intel(R) UHD Graphics [0x9bca]
Driver version 1.2.21786
Vendor Intel(R) Corporation
Profile FULL_PROFILE
Filter string level_zero:gpu:0
Test failed for size 2097152 . Python dis: 591.58044 Numba dis: 723.4867612817804
Using device ... <dpctl.SyclDevice [backend_type.level_zero, device_type.gpu, Intel(R) UHD Graphics [0x9bca]] at 0x7f948459e5b0>
Name Intel(R) UHD Graphics [0x9bca]
Driver version 1.2.21786
Vendor Intel(R) Corporation
Profile FULL_PROFILE
Filter string level_zero:gpu:0
Test failed for size 4194304 . Python dis: 835.9003 Numba dis: 1100.8355917211252
Done...
Full code of the benchmark you can find here