-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Closed
dmlc/HalideIR
#55Description
With the following input DSL:
import tvm
m = 48
A = tvm.placeholder((m,), name='A', dtype="float16")
C = tvm.compute((m,), lambda i: A[i], name='C')
D = tvm.compute((m,), lambda i: C[i], name='D')
s = tvm.create_schedule(D.op)
# We split the two axis with factors where neither counts the other
co, ci = s[C].split(C.op.axis[0], 10)
do, di = s[D].split(D.op.axis[0], 32)
s[C].compute_at(s[D], do)
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
print(stmt)
stmt = tvm.ir_pass.LoopPartition(stmt, True)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
print(stmt)
The following is the output of first print statement. Code is correct.
// attr [compute(D, 0x1e4d7e0)] realize_scope = ""
realize D([0, 48]) {
produce D {
for (i.outer, 0, 2) {
// attr [compute(C, 0x195b3a0)] realize_scope = ""
realize C([(i.outer*32), 32]) {
produce C {
for (i.outer, 0, 4) {
for (i.inner, 0, 10) {
if (likely((((i.outer*10) + i.inner) < 32))) {
if (likely(((((i.outer*32) + (i.outer*10)) + i.inner) < 48))) {
C((((i.outer*32) + (i.outer*10)) + i.inner)) =A((((i.outer*32) + (i.outer*10)) + i.inner))
}
}
}
}
}
for (i.inner, 0, 32) {
if (likely((((i.outer*32) + i.inner) < 48))) {
if (likely((((i.outer*32) + i.inner) < 48))) {
D(((i.outer*32) + i.inner)) =C(((i.outer*32) + i.inner))
}
}
}
}
}
}
}
The following is the output of the second print statement. Code is incorrect in that in the second produce C
only 10 (instead of 16) elements of A
are copied to C
.
// attr [compute(D, 0x1e4d7e0)] realize_scope = ""
realize D([0, 48]) {
produce D {
for (i.outer, 0, 1) {
// attr [compute(C, 0x195b3a0)] realize_scope = ""
realize C([(i.outer*32), 32]) {
produce C {
for (i.outer, 0, 3) {
for (i.inner, 0, 10) {
C((((i.outer*32) + (i.outer*10)) + i.inner)) =A((((i.outer*32) + (i.outer*10)) + i.inner))
}
}
for (i.inner, 0, 2) {
C(((30 + (i.outer*32)) + i.inner)) =A(((30 + (i.outer*32)) + i.inner))
}
}
for (i.inner, 0, 32) {
D(((i.outer*32) + i.inner)) =C(((i.outer*32) + i.inner))
}
}
}
// attr [compute(C, 0x195b3a0)] realize_scope = ""
realize C([32, 32]) {
produce C {
for (i.outer, 0, 1) {
for (i.inner, 0, 10) {
C(((32 + (i.outer*10)) + i.inner)) =A(((32 + (i.outer*10)) + i.inner))
}
}
}
for (i.inner, 0, 16) {
D((32 + i.inner)) =C((32 + i.inner))
}
}
}
}