test: Add more CUDA tests

AlexisPerry · Dec 18, 2018 · 369cc3e · 369cc3e
1 parent 3e1e640
commit 369cc3e
Show file tree

Hide file tree

Showing 5 changed files with 404 additions and 0 deletions.
diff --git a/language/tests/cuda/run_pass/forbid.rg b/language/tests/cuda/run_pass/forbid.rg
@@ -0,0 +1,52 @@
+-- Copyright 2018 Stanford University, NVIDIA Corporation
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+-- runs-with:
+-- [["-fcuda", "1", "-ll:gpu", "1" ]]
+
+import "regent"
+
+__demand(__cuda)
+task init(r : region(ispace(int1d), double),
+          p : partition(disjoint, r, ispace(int1d)))
+where
+  reads writes(r)
+do
+  __forbid(__cuda)
+  for c in p.colors do
+    var s = p[c]
+    for e in s do
+      @e = 10.0
+    end
+  end
+end
+
+task check(r : region(ispace(int1d), double))
+where
+  reads(r)
+do
+  for e in r do
+    regentlib.assert(@e == 10.0, "test failed")
+  end
+end
+
+task main()
+  var r = region(ispace(int1d, 100), double)
+  var p = partition(equal, r, ispace(int1d, 10))
+
+  init(r, p)
+  check(r)
+end
+
+regentlib.start(main)
diff --git a/language/tests/cuda/run_pass/math.rg b/language/tests/cuda/run_pass/math.rg
@@ -0,0 +1,91 @@
+-- Copyright 2018 Stanford University
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+-- FIXME: Turn off this test until vectorizer supports scattered reads again.
+-- runs-with:
+-- []
+--
+
+import "regent"
+
+fspace fs
+{
+  v : double,
+  v_cpu : double,
+  v_cpu_vec : double,
+  v_gpu : double,
+}
+
+local ceil = regentlib.ceil(double)
+local log2 = regentlib.log2(double)
+local log10 = regentlib.log10(double)
+local log = regentlib.log(double)
+local cos = regentlib.cos(double)
+local sin = regentlib.sin(double)
+local exp2 = regentlib.exp2(double)
+local fabs = regentlib.fabs(double)
+local trunc = regentlib.trunc(double)
+local floor = regentlib.floor(double)
+local sqrt = regentlib.sqrt(double)
+local exp = regentlib.exp(double)
+
+task init(r : region(ispace(int1d), fs))
+where
+  reads writes(r.v)
+do
+  for e in r do
+    e.v = [double]([int](e))
+  end
+end
+
+task f(r : region(ispace(int1d), fs))
+where
+  reads(r.v),
+  reads writes(r.{v_cpu, v_cpu_vec})
+do
+  __forbid(__vectorize)
+  for e in r do
+    e.v_cpu = exp2(exp(log10(fabs(log2(fabs(log(fabs(sin(cos(fabs(sqrt(e.v + 10))))))))))))
+  end
+  __demand(__vectorize)
+  for e in r do
+    e.v_cpu_vec = exp2(exp(log10(fabs(log2(fabs(log(fabs(sin(cos(fabs(sqrt(e.v + 10))))))))))))
+  end
+end
+
+__demand(__cuda)
+task g(r : region(ispace(int1d), fs))
+where
+  reads(r.v),
+  reads writes(r.v_gpu)
+do
+  for e in r do
+    e.v_gpu = exp2(exp(log10(fabs(log2(fabs(log(fabs(sin(cos(fabs(sqrt(e.v + 10))))))))))))
+  end
+end
+
+task toplevel()
+  var n = 100
+  var r = region(ispace(int1d, n), fs)
+  init(r)
+  f(r)
+  g(r)
+
+  for e in r do
+    regentlib.assert(fabs(e.v_cpu - e.v_cpu_vec) < 1.0e-6, "test failed")
+    regentlib.assert(fabs(e.v_cpu - e.v_gpu) < 1.0e-6, "test failed")
+  end
+end
+
+regentlib.start(toplevel)
diff --git a/language/tests/cuda/run_pass/parallelize_tasks.rg b/language/tests/cuda/run_pass/parallelize_tasks.rg
@@ -0,0 +1,150 @@
+-- Copyright 2018 Stanford University
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+-- runs-with:
+-- [["-fcuda", "1", "-ll:gpu", "1" ]]
+
+import "regent"
+
+local c = regentlib.c
+
+fspace fs
+{
+  f : double,
+  g : double,
+  h : double,
+}
+
+__demand(__parallel, __cuda)
+task init(r : region(ispace(int2d), fs))
+where reads writes(r)
+do
+  for e in r do e.f = 0.3 * (e.x + 1) + 0.7 * (e.y + 1) end
+  for e in r do e.g = 0 end
+  for e in r do e.h = 0 end
+end
+
+__demand(__parallel, __cuda)
+task stencil1(interior : region(ispace(int2d), fs),
+                    r : region(ispace(int2d), fs))
+where reads(r.f), reads writes(r.g), interior <= r
+do
+  var ts_start = c.legion_get_current_time_in_micros()
+  for e in interior do
+    var center = e
+    var idx1 = e + {-2,  0}
+    var idx2 = e + { 0, -1}
+    do
+      var idx3 = e + { 1,  0}
+      var idx4 = e + { 0,  2}
+      var v1 = r[idx1].f + r[idx2].f
+      var v2 = r[idx3].f
+      r[center].g = 0.5 * (r[center].f + v1 + v2 + r[idx4].f)
+    end
+  end
+  var ts_end = c.legion_get_current_time_in_micros()
+  c.printf("parallel version: %lu us\n", ts_end - ts_start)
+end
+
+__demand(__parallel, __cuda)
+task stencil2(interior : region(ispace(int2d), fs),
+                    r : region(ispace(int2d), fs))
+where reads(r.f), reads writes(r.g), interior <= r
+do
+  var ts_start = c.legion_get_current_time_in_micros()
+  for e in interior do
+    var center = e
+    var idx1 = e + {-1,  0}
+    var idx4 = e + { 0,  1}
+    do
+      var idx2 = e + { 0, -1}
+      var idx3 = e + { 1,  0}
+      var v1 = r[center].f + r[idx1].f + r[idx2].f
+      var v2 = r[idx3].f + r[idx4].f
+      r[center].g += 0.3 * (v1 + v2)
+    end
+  end
+  var ts_end = c.legion_get_current_time_in_micros()
+  c.printf("parallel version: %lu us\n", ts_end - ts_start)
+end
+
+task stencil_serial(interior : region(ispace(int2d), fs),
+                           r : region(ispace(int2d), fs))
+where reads(r.f), reads writes(r.h), interior <= r
+do
+  var ts_start = c.legion_get_current_time_in_micros()
+  for e in interior do
+    r[e].h = 0.5 * (r[e].f +
+                    r[e + {-2, 0}].f + r[e + {0, -1}].f +
+                    r[e + { 1, 0}].f + r[e + {0,  2}].f)
+    r[e].h += 0.3 * (r[e].f +
+                     r[e + {-1, 0}].f + r[e + {0, -1}].f +
+                     r[e + { 1, 0}].f + r[e + {0,  1}].f)
+  end
+  var ts_end = c.legion_get_current_time_in_micros()
+  c.printf("serial version: %lu us\n", ts_end - ts_start)
+end
+
+__demand(__parallel, __cuda)
+task increment(r : region(ispace(int2d), fs), c : double)
+where reads writes(r.f)
+do
+  for e in r do e.f += e.f + c end
+end
+
+local cmath = terralib.includec("math.h")
+
+task check(r : region(ispace(int2d), fs))
+where reads(r.{g, h})
+do
+  for e in r do
+    regentlib.assert(cmath.fabs(e.h - e.g) < 0.000001, "test failed")
+  end
+end
+
+task test(size : int)
+  c.srand48(12345)
+  var is = ispace(int2d, {size, size})
+  var primary_region = region(is, fs)
+  fill(primary_region.{f, g, h}, 0.0)
+  var np = 2
+  var bounds = primary_region.bounds
+  var coloring = c.legion_domain_point_coloring_create()
+  c.legion_domain_point_coloring_color_domain(coloring, [int1d](0),
+                                              rect2d { bounds.lo + {2, 1},
+                                                       bounds.hi - {1, 2} })
+  var interior_partition =
+    partition(disjoint, primary_region, coloring, ispace(int1d, 1))
+  c.legion_domain_point_coloring_destroy(coloring)
+  var interior_region = interior_partition[0]
+
+  var steps = 4
+  while steps > 0 do
+    for idx = 0, 1 do
+      stencil1(interior_region, primary_region)
+      stencil2(interior_region, primary_region)
+      stencil_serial(interior_region, primary_region)
+      increment(primary_region, 1)
+    end
+    increment(primary_region, 2)
+    check(primary_region)
+    steps -= 1
+  end
+end
+
+task toplevel()
+  test(100)
+end
+
+regentlib.start(toplevel)
diff --git a/language/tests/cuda/run_pass/scalar_reduce.rg b/language/tests/cuda/run_pass/scalar_reduce.rg
@@ -0,0 +1,52 @@
+-- Copyright 2018 Stanford University
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+-- runs-with:
+-- [["-fcuda", "1", "-ll:gpu", "1" ]]
+
+import "regent"
+
+__demand(__cuda)
+task init(r : region(ispace(int2d), double),
+          v : double)
+where
+  reads writes(r)
+do
+  for e in r do
+    @e = v
+  end
+end
+
+__demand(__cuda)
+task red(r : region(ispace(int2d), double))
+where
+  reads(r)
+do
+  var sum : double = 0.0
+  for e in r do
+    sum += @e
+  end
+  return sum
+end
+
+task main()
+  var size = 10
+  var v : double = 2.0
+  var r = region(ispace(int2d, {size, size}), double)
+  init(r, v)
+  var res = red(r)
+  regentlib.assert(res == r.volume * v, "test failed")
+end
+
+regentlib.start(main)