Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Add reductions and tests #37

Merged
merged 2 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion numba/openmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5401,12 +5401,26 @@ def PLUS(self, args):
print("visit PLUS", args, type(args))
return "+"

def MINUS(self, args):
if config.DEBUG_OPENMP >= 1:
print("visit MINUS", args, type(args))
return "-"

def STAR(self, args):
if config.DEBUG_OPENMP >= 1:
print("visit STAR", args, type(args))
return "*"

def reduction_operator(self, args):
arg = args[0]
if config.DEBUG_OPENMP >= 1:
print("visit reduction_operator", args, type(args), arg, type(arg))
if arg == "+":
return "ADD"
elif arg == "-":
return "SUB"
elif arg == "*":
return "MUL"
assert(0)

def threadprivate_directive(self, args):
Expand Down Expand Up @@ -6150,7 +6164,9 @@ def NUMBER(self, args):
var_list: name_slice | var_list "," name_slice
number_list: NUMBER | number_list "," NUMBER
PLUS: "+"
reduction_operator: PLUS | "\\" | "*" | "-" | "&" | "^" | "|" | "&&" | "||"
MINUS: "-"
STAR: "*"
reduction_operator: PLUS | "\\" | STAR | MINUS | "&" | "^" | "|" | "&&" | "||"
threadprivate_directive: "threadprivate" "(" var_list ")"
cancellation_point_directive: "cancellation point" construct_type_clause
construct_type_clause: PARALLEL
Expand Down
338 changes: 338 additions & 0 deletions numba/tests/test_openmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,345 @@ def test_impl():
return a
self.check(test_impl)

class TestReductions(TestOpenmpBase):
def __init__(self, *args):
TestOpenmpBase.__init__(self, *args)

def test_parallel_reduction_add_int(self):
@njit
def test_impl():
redux = 0
nthreads = 0
with openmp("parallel reduction(+:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = 1
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, nthreads)

def test_parallel_reduction_sub_int(self):
@njit
def test_impl():
redux = 0
nthreads = 0
with openmp("parallel reduction(-:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = 1
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, nthreads)

def test_parallel_reduction_mul_int(self):
@njit
def test_impl():
redux = 1
nthreads = 0
with openmp("parallel reduction(*:redux) num_threads(8)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = 2
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 2**nthreads)

def test_parallel_reduction_add_fp64(self):
@njit
def test_impl():
redux = np.float64(0.0)
nthreads = np.float64(0.0)
with openmp("parallel reduction(+:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float64(1.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 1.0*nthreads)

def test_parallel_reduction_sub_fp64(self):
@njit
def test_impl():
redux = np.float64(0.0)
nthreads = np.float64(0.0)
with openmp("parallel reduction(-:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float64(1.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 1.0*nthreads)

def test_parallel_reduction_mul_fp64(self):
@njit
def test_impl():
redux = np.float64(1.0)
nthreads = np.float64(0.0)
with openmp("parallel reduction(*:redux) num_threads(8)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float64(2.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 2.0**nthreads)

def test_parallel_reduction_add_fp32(self):
@njit
def test_impl():
redux = np.float32(0.0)
nthreads = np.float32(0.0)
with openmp("parallel reduction(+:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float32(1.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 1.0*nthreads)

def test_parallel_reduction_sub_fp32(self):
@njit
def test_impl():
redux = np.float32(0.0)
nthreads = np.float32(0.0)
with openmp("parallel reduction(-:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float32(1.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 1.0*nthreads)

def test_parallel_reduction_mul_fp32(self):
@njit
def test_impl():
redux = np.float32(1.0)
nthreads = np.float32(0.0)
with openmp("parallel reduction(*:redux) num_threads(8)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float32(2.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 2.0**nthreads)

def test_parallel_for_reduction_add_int(self):
@njit
def test_impl():
redux = 0
with openmp("parallel for reduction(+:redux)"):
for i in range(10):
redux += 1
return redux

redux = test_impl()
self.assertEqual(redux, 10)

def test_parallel_for_reduction_sub_int(self):
@njit
def test_impl():
redux = 0
with openmp("parallel for reduction(-:redux)"):
for i in range(10):
redux += 1
return redux

redux = test_impl()
self.assertEqual(redux, 10)

def test_parallel_for_reduction_mul_int(self):
@njit
def test_impl():
redux = 1
with openmp("parallel for reduction(*:redux)"):
for i in range(10):
redux *= 2
return redux

redux = test_impl()
self.assertEqual(redux, 2**10)

def test_parallel_for_reduction_add_fp64(self):
@njit
def test_impl():
redux = np.float64(0.0)
with openmp("parallel for reduction(+:redux)"):
for i in range(10):
redux += np.float64(1.0)
return redux

redux = test_impl()
self.assertEqual(redux, 10.0)

def test_parallel_for_reduction_sub_fp64(self):
@njit
def test_impl():
redux = np.float64(0.0)
with openmp("parallel for reduction(-:redux)"):
for i in range(10):
redux += np.float64(1.0)
return redux

redux = test_impl()
self.assertEqual(redux, 10.0)

def test_parallel_for_reduction_mul_fp64(self):
@njit
def test_impl():
redux = np.float64(1.0)
with openmp("parallel for reduction(*:redux)"):
for i in range(10):
redux *= np.float64(2.0)
return redux

redux = test_impl()
self.assertEqual(redux, 2.0**10)

def test_parallel_for_reduction_add_fp32(self):
@njit
def test_impl():
redux = np.float32(0.0)
with openmp("parallel for reduction(+:redux)"):
for i in range(10):
redux += np.float32(1.0)
return redux

redux = test_impl()
self.assertEqual(redux, 10.0)

def test_parallel_for_reduction_sub_fp32(self):
@njit
def test_impl():
redux = np.float32(0.0)
with openmp("parallel for reduction(-:redux)"):
for i in range(10):
redux += np.float32(1.0)
return redux

redux = test_impl()
self.assertEqual(redux, 10.0)

def test_parallel_for_reduction_mul_fp32(self):
@njit
def test_impl():
redux = np.float32(1.0)
with openmp("parallel for reduction(*:redux)"):
for i in range(10):
redux *= np.float32(2.0)
return redux

redux = test_impl()
self.assertEqual(redux, 2.0**10)

def test_parallel_reduction_add_int_10(self):
@njit
def test_impl():
redux = 10
nthreads = 0
with openmp("parallel reduction(+:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = 1
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, nthreads+10)

def test_parallel_reduction_add_fp32_10(self):
@njit
def test_impl():
redux = np.float32(10.0)
nthreads = np.float32(0.0)
with openmp("parallel reduction(+:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float32(1.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 1.0*nthreads+10.0)

def test_parallel_reduction_add_fp64_10(self):
@njit
def test_impl():
redux = np.float64(10.0)
nthreads = np.float64(0.0)
with openmp("parallel reduction(+:redux)"):
thread_id = omp_get_thread_num()
if thread_id == 0:
nthreads = omp_get_num_threads()
redux = np.float64(1.0)
return redux, nthreads

redux, nthreads = test_impl()
self.assertGreater(nthreads, 1)
self.assertEqual(redux, 1.0*nthreads+10.0)

def test_parallel_for_reduction_add_int_10(self):
@njit
def test_impl():
redux = 10
with openmp("parallel for reduction(+:redux)"):
for i in range(10):
redux += 1
return redux

redux = test_impl()
self.assertEqual(redux, 10+10)

def test_parallel_for_reduction_add_fp32(self):
@njit
def test_impl():
redux = np.float32(0.0)
with openmp("parallel for reduction(+:redux)"):
for i in range(10):
redux += np.float32(1.0)
return redux

redux = test_impl()
self.assertEqual(redux, 10.0)

def test_parallel_for_reduction_add_fp64_10(self):
@njit
def test_impl():
redux = np.float64(10.0)
with openmp("parallel for reduction(+:redux)"):
for i in range(10):
redux += np.float64(1.0)
return redux

redux = test_impl()
self.assertEqual(redux, 10.0+10.0)

class TestOpenmpDataClauses(TestOpenmpBase):

Expand Down