Merge pull request #37 from Python-for-HPC/add-reductions-and-tests

DrTodd13 · web-flow · commit 1fa0b32f132f · 2024-09-27T11:56:36.000-07:00
Add reductions and tests
diff --git a/numba/openmp.py b/numba/openmp.py
@@ -5401,12 +5401,26 @@ def PLUS(self, args):
             print("visit PLUS", args, type(args))
         return "+"
 
+    def MINUS(self, args):
+        if config.DEBUG_OPENMP >= 1:
+            print("visit MINUS", args, type(args))
+        return "-"
+
+    def STAR(self, args):
+        if config.DEBUG_OPENMP >= 1:
+            print("visit STAR", args, type(args))
+        return "*"
+
     def reduction_operator(self, args):
         arg = args[0]
         if config.DEBUG_OPENMP >= 1:
             print("visit reduction_operator", args, type(args), arg, type(arg))
         if arg == "+":
             return "ADD"
+        elif arg == "-":
+            return "SUB"
+        elif arg == "*":
+            return "MUL"
         assert(0)
 
     def threadprivate_directive(self, args):
@@ -6150,7 +6164,9 @@ def NUMBER(self, args):
     var_list: name_slice | var_list "," name_slice
     number_list: NUMBER | number_list "," NUMBER
     PLUS: "+"
-    reduction_operator: PLUS | "\\" | "*" | "-" | "&" | "^" | "|" | "&&" | "||"
+    MINUS: "-"
+    STAR: "*"
+    reduction_operator: PLUS | "\\" | STAR | MINUS | "&" | "^" | "|" | "&&" | "||"
     threadprivate_directive: "threadprivate" "(" var_list ")"
     cancellation_point_directive: "cancellation point" construct_type_clause
     construct_type_clause: PARALLEL
diff --git a/numba/tests/test_openmp.py b/numba/tests/test_openmp.py
@@ -996,7 +996,345 @@ def test_impl():
             return a
         self.check(test_impl)
 
+class TestReductions(TestOpenmpBase):
+    def __init__(self, *args):
+        TestOpenmpBase.__init__(self, *args)
+
+    def test_parallel_reduction_add_int(self):
+        @njit
+        def test_impl():
+            redux = 0
+            nthreads = 0
+            with openmp("parallel reduction(+:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = 1
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, nthreads)
+
+    def test_parallel_reduction_sub_int(self):
+        @njit
+        def test_impl():
+            redux = 0
+            nthreads = 0
+            with openmp("parallel reduction(-:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = 1
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, nthreads)
+
+    def test_parallel_reduction_mul_int(self):
+        @njit
+        def test_impl():
+            redux = 1
+            nthreads = 0
+            with openmp("parallel reduction(*:redux) num_threads(8)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = 2
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 2**nthreads)
+
+    def test_parallel_reduction_add_fp64(self):
+        @njit
+        def test_impl():
+            redux = np.float64(0.0)
+            nthreads = np.float64(0.0)
+            with openmp("parallel reduction(+:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float64(1.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 1.0*nthreads)
+
+    def test_parallel_reduction_sub_fp64(self):
+        @njit
+        def test_impl():
+            redux = np.float64(0.0)
+            nthreads = np.float64(0.0)
+            with openmp("parallel reduction(-:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float64(1.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 1.0*nthreads)
+
+    def test_parallel_reduction_mul_fp64(self):
+        @njit
+        def test_impl():
+            redux = np.float64(1.0)
+            nthreads = np.float64(0.0)
+            with openmp("parallel reduction(*:redux) num_threads(8)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float64(2.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 2.0**nthreads)
+
+    def test_parallel_reduction_add_fp32(self):
+        @njit
+        def test_impl():
+            redux = np.float32(0.0)
+            nthreads = np.float32(0.0)
+            with openmp("parallel reduction(+:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float32(1.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 1.0*nthreads)
+
+    def test_parallel_reduction_sub_fp32(self):
+        @njit
+        def test_impl():
+            redux = np.float32(0.0)
+            nthreads = np.float32(0.0)
+            with openmp("parallel reduction(-:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float32(1.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 1.0*nthreads)
+
+    def test_parallel_reduction_mul_fp32(self):
+        @njit
+        def test_impl():
+            redux = np.float32(1.0)
+            nthreads = np.float32(0.0)
+            with openmp("parallel reduction(*:redux) num_threads(8)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float32(2.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 2.0**nthreads)
+
+    def test_parallel_for_reduction_add_int(self):
+        @njit
+        def test_impl():
+            redux = 0
+            with openmp("parallel for reduction(+:redux)"):
+                for i in range(10):
+                    redux += 1
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10)
+
+    def test_parallel_for_reduction_sub_int(self):
+        @njit
+        def test_impl():
+            redux = 0
+            with openmp("parallel for reduction(-:redux)"):
+                for i in range(10):
+                    redux += 1
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10)
+
+    def test_parallel_for_reduction_mul_int(self):
+        @njit
+        def test_impl():
+            redux = 1
+            with openmp("parallel for reduction(*:redux)"):
+                for i in range(10):
+                    redux *= 2
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 2**10)
+
+    def test_parallel_for_reduction_add_fp64(self):
+        @njit
+        def test_impl():
+            redux = np.float64(0.0)
+            with openmp("parallel for reduction(+:redux)"):
+                for i in range(10):
+                    redux += np.float64(1.0)
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10.0)
+
+    def test_parallel_for_reduction_sub_fp64(self):
+        @njit
+        def test_impl():
+            redux = np.float64(0.0)
+            with openmp("parallel for reduction(-:redux)"):
+                for i in range(10):
+                    redux += np.float64(1.0)
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10.0)
+
+    def test_parallel_for_reduction_mul_fp64(self):
+        @njit
+        def test_impl():
+            redux = np.float64(1.0)
+            with openmp("parallel for reduction(*:redux)"):
+                for i in range(10):
+                    redux *= np.float64(2.0)
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 2.0**10)
+
+    def test_parallel_for_reduction_add_fp32(self):
+        @njit
+        def test_impl():
+            redux = np.float32(0.0)
+            with openmp("parallel for reduction(+:redux)"):
+                for i in range(10):
+                    redux += np.float32(1.0)
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10.0)
+
+    def test_parallel_for_reduction_sub_fp32(self):
+        @njit
+        def test_impl():
+            redux = np.float32(0.0)
+            with openmp("parallel for reduction(-:redux)"):
+                for i in range(10):
+                    redux += np.float32(1.0)
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10.0)
+
+    def test_parallel_for_reduction_mul_fp32(self):
+        @njit
+        def test_impl():
+            redux = np.float32(1.0)
+            with openmp("parallel for reduction(*:redux)"):
+                for i in range(10):
+                    redux *= np.float32(2.0)
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 2.0**10)
+
+    def test_parallel_reduction_add_int_10(self):
+        @njit
+        def test_impl():
+            redux = 10
+            nthreads = 0
+            with openmp("parallel reduction(+:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = 1
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, nthreads+10)
+
+    def test_parallel_reduction_add_fp32_10(self):
+        @njit
+        def test_impl():
+            redux = np.float32(10.0)
+            nthreads = np.float32(0.0)
+            with openmp("parallel reduction(+:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float32(1.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 1.0*nthreads+10.0)
+
+    def test_parallel_reduction_add_fp64_10(self):
+        @njit
+        def test_impl():
+            redux = np.float64(10.0)
+            nthreads = np.float64(0.0)
+            with openmp("parallel reduction(+:redux)"):
+                thread_id = omp_get_thread_num()
+                if thread_id == 0:
+                    nthreads = omp_get_num_threads()
+                redux = np.float64(1.0)
+            return redux, nthreads
+
+        redux, nthreads = test_impl()
+        self.assertGreater(nthreads, 1)
+        self.assertEqual(redux, 1.0*nthreads+10.0)
+
+    def test_parallel_for_reduction_add_int_10(self):
+        @njit
+        def test_impl():
+            redux = 10
+            with openmp("parallel for reduction(+:redux)"):
+                for i in range(10):
+                    redux += 1
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10+10)
+
+    def test_parallel_for_reduction_add_fp32(self):
+        @njit
+        def test_impl():
+            redux = np.float32(0.0)
+            with openmp("parallel for reduction(+:redux)"):
+                for i in range(10):
+                    redux += np.float32(1.0)
+            return redux
+
+        redux = test_impl()
+        self.assertEqual(redux, 10.0)
+
+    def test_parallel_for_reduction_add_fp64_10(self):
+        @njit
+        def test_impl():
+            redux = np.float64(10.0)
+            with openmp("parallel for reduction(+:redux)"):
+                for i in range(10):
+                    redux += np.float64(1.0)
+            return redux
 
+        redux = test_impl()
+        self.assertEqual(redux, 10.0+10.0)
 
 class TestOpenmpDataClauses(TestOpenmpBase):