Skip to content

Commit 4d4e19c

Browse files
authored
[TESTCASE] Add a mock test workflow of CUDA codegen (#19)
1 parent 110c9be commit 4d4e19c

File tree

4 files changed

+64
-14
lines changed

4 files changed

+64
-14
lines changed

python/tvm/function.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,13 @@ def compute(shape, fcompute, name="compute"):
117117
The created tensor
118118
"""
119119
shape = (shape,) if isinstance(shape, _expr.Expr) else shape
120-
121120
ndim = len(shape)
122121
arg_names = fcompute.__code__.co_varnames
122+
123+
if fcompute.__code__.co_argcount == 0 and len(arg_names) == 1:
124+
arg_names = ["i%d" % i for i in range(ndim)]
123125
if ndim != len(arg_names):
124-
raise ValueError("fcompute do not match dimension")
126+
raise ValueError("fcompute do not match dimension, ndim=%d" % ndim)
125127

126128
dim_var = [IterVar((0, s), x) for x, s in zip(arg_names, shape)]
127129
body = fcompute(*[v.var for v in dim_var])
@@ -170,7 +172,7 @@ def Buffer(shape, dtype=None,
170172
name, ptr, shape, strides, dtype)
171173

172174

173-
def IterVar(dom, name='iter', thread_tag=''):
175+
def IterVar(dom=None, name=None, thread_tag=''):
174176
"""Create a iteration variable
175177
176178
Parameters
@@ -189,14 +191,17 @@ def IterVar(dom, name='iter', thread_tag=''):
189191
iter_var : IterVar
190192
The result itervar
191193
"""
192-
if isinstance(dom, (list, tuple)):
193-
if len(dom) != 2:
194-
raise ValueError("need to list of ranges")
195-
dom = Range(dom[0], dom[1])
196-
197-
if not isinstance(dom, _collections.Range):
198-
raise ValueError("dom need to be Range")
199-
194+
if dom is not None:
195+
if isinstance(dom, (list, tuple)):
196+
if len(dom) != 2:
197+
raise ValueError("need to list of ranges")
198+
dom = Range(dom[0], dom[1])
199+
200+
if not isinstance(dom, _collections.Range):
201+
raise ValueError("dom need to be Range")
202+
if name is None:
203+
name = thread_tag if thread_tag else name
204+
name = name if name else 'iter'
200205
return _function_internal._IterVar(dom, name, thread_tag)
201206

202207

python/tvm/schedule.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ def split(self, parent, factor=None, outer=None):
5656
if outer is not None:
5757
if outer.thread_tag == '':
5858
raise ValueError("split by outer must have special thread_tag")
59-
if outer.dom is None:
60-
raise ValueError("split by outer must have specified domain")
6159
inner = _function_internal._StageSplitByOuter(self, parent, outer, factor)
6260
else:
6361
if factor is None:

src/schedule/bound.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66
#include <tvm/ir.h>
77
#include <tvm/ir_visitor.h>
8+
#include <tvm/ir_pass.h>
89
#include <tvm/schedule_pass.h>
910
#include "./int_set.h"
1011
#include "./graph.h"
@@ -14,7 +15,7 @@ namespace schedule {
1415

1516
// result = ceil((a / b)), both a and b are positive integer
1617
inline Expr DivCeil(Expr a, Expr b) {
17-
return (a + b - 1) / b;
18+
return ir::Simplify((a + b - 1) / b);
1819
}
1920

2021
// Downward message passing algorithm on stage schedule s,

tests/python/test_codegen_cuda.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import tvm
2+
import numpy
3+
4+
def mock_test_add():
5+
"""Not yet working, mock design"""
6+
n = tvm.Var('n')
7+
A = tvm.placeholder((n,), name='A')
8+
B = tvm.placeholder((n,), name='B')
9+
C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
10+
s = tvm.Schedule(C.op)
11+
12+
# GPU schedule have to split by gridIdx and threadIdx
13+
num_thread = 256
14+
grid_x = tvm.IterVar(thread_tag="gridIdx.x")
15+
thread_x = tvm.IterVar((0, num_thread), thread_tag="threadIdx.x")
16+
_, x = s[C].split(C.op.axis[0], factor=num_thread, outer=grid_x)
17+
_, x = s[C].split(x, outer=thread_x)
18+
# compile to IR
19+
bounds = tvm.schedule.InferBound(s)
20+
stmt = tvm.ir_pass.ScheduleOps(s, bounds)
21+
22+
Ab = tvm.Buffer(A.shape, A.dtype, name='A')
23+
Bb = tvm.Buffer(B.shape, B.dtype, name='B')
24+
Cb = tvm.Buffer(C.shape, C.dtype, name='C')
25+
26+
def codegen():
27+
# generate host/device code
28+
host_code, device_code = tvm.codegen.GenCUDA(
29+
s,
30+
inputs={A: Ab, B:Bb},
31+
outputs={C: Cb},
32+
args=[A, B, C])
33+
# generate a function based on the code
34+
f = tvm.cuda.build_function(host_code, device_code)
35+
# create arrays
36+
a = tvm.nd.array(np.ones(10), ctx=tvm.gpu(0))
37+
b = tvm.nd.array(np.ones(10), ctx=tvm.gpu(0))
38+
c = tvm.nd.array(np.zeros(10), ctx=tvm.gpu(0))
39+
# calll the generated code
40+
f(a, b, c)
41+
# sync the result
42+
np.testing.assert_equal(c.asnumpy(), np.ones(10) * 2)
43+
44+
45+
if __name__ == "__main__":
46+
mock_test_add()

0 commit comments

Comments
 (0)