Skip to content

Commit

Permalink
[TensorIR][M2a] Reorder (#8767)
Browse files Browse the repository at this point in the history
This PR is part of the TensorIR upstreaming effort (#7527), which adds a schedule primitive: reorder.

Co-authored-by: Siyuan Feng <Hzfengsy@sjtu.edu.cn>
Co-authored-by: Bohan Hou <32121147+spectrometerHBH@users.noreply.github.com>
Co-authored-by: Ruihang Lai <lairuihangdongdong@qq.com>
Co-authored-by: Wuwei Lin <wuwei@apache.org>
Co-authored-by: Junru Shao <junrushao1994@gmail.com>
  • Loading branch information
6 people authored Aug 23, 2021
1 parent d6d6367 commit 0a1eae8
Show file tree
Hide file tree
Showing 10 changed files with 736 additions and 0 deletions.
13 changes: 13 additions & 0 deletions include/tvm/tir/schedule/schedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,19 @@ class ScheduleNode : public runtime::Object {
* \return The new loops after split
*/
virtual Array<LoopRV> Split(const LoopRV& loop_rv, const Array<Optional<ExprRV>>& factors) = 0;
/*!
* \brief Reorder a list of loops. It doesn't require the loops to be consecutive.
* It requires:
* 1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... ,
* l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between
* l_1 and l_n (which also indicates they are under the same scope).
* 2) After reordering, the domain of an outer loop cannot depend on any of the inner loops.
* 3) For every block under the loop nests, its block binding must be affine, and the block
* variables must be either data parallel or reduction.
* 4) No duplicated loops are allowed in the arguments.
* \param ordered_loop_rvs The loops in the new order
*/
virtual void Reorder(const Array<LoopRV>& ordered_loop_rvs) = 0;
/******** Schedule: Manipulate ForKind ********/
/*!
* \brief Parallelize the input loop. It requires:
Expand Down
59 changes: 59 additions & 0 deletions python/tvm/tir/schedule/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,65 @@ def after_split(a: ty.handle, b: ty.handle) -> None:
# that there is at most one None in `factors`
return _ffi_api.ScheduleSplit(self, loop, factors) # type: ignore # pylint: disable=no-member

def reorder(self, *ordered_loops: List[LoopRV]) -> None:
"""
Reorder a list of loops. It doesn't require the loops to be consecutive.
It requires:
1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... ,
l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between
l_1 and l_n (which also indicates they are under the same scope).
2) After reordering, the domain of an outer loop cannot depend on any of the inner loops.
3) For every block under the loop nests, its block binding must be affine, and the block
variables must be either data parallel or reduction.
4) No duplicated loops are allowed in the arguments.
Parameters
----------
*ordered_loops : List[LoopRV]
The loops in the new order
Examples
--------
Before reorder, in TensorIR, the IR is:
.. code-block:: python
@tvm.script.tir
def before_reorder(a: ty.handle, b: ty.handle) -> None:
A = tir.match_buffer(a, (128, 128))
B = tir.match_buffer(b, (128, 128))
for i, j in tir.grid(128, 128):
with tir.block([128, 128], "B") as [vi, vj]:
B[vi, vj] = A[vi, vj] * 2.0
Create the schedule and do reorder:
.. code-block:: python
sch = tir.Schedule(before_reorder)
i, j = sch.get_loops(sch.get_block("B"))
sch.reorder(j, i)
print(tvm.script.asscript(sch.mod["main"]))
After applying reorder, the IR becomes:
.. code-block:: python
@tvm.script.tir
def after_reorder(a: ty.handle, b: ty.handle) -> None:
A = tir.match_buffer(a, (128, 128))
B = tir.match_buffer(b, (128, 128))
# Here j and i are reordered
for j, i in tir.grid(128, 128):
with tir.block([128, 128], "B") as [vi, vj]:
tir.bind(vi, i)
tir.bind(vj, j)
B[vi, vj] = A[vi, vj] * 2.0
"""
_ffi_api.ScheduleReorder(self, ordered_loops) # type: ignore # pylint: disable=no-member

########## Schedule: Manipulate ForKind ##########

def parallel(self, loop: LoopRV) -> None:
Expand Down
7 changes: 7 additions & 0 deletions src/tir/schedule/concrete_schedule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,13 @@ Array<LoopRV> ConcreteScheduleNode::Split(const LoopRV& loop_rv,
return CreateRV<LoopRV>(results);
}

void ConcreteScheduleNode::Reorder(const Array<LoopRV>& ordered_loop_rvs) {
TVM_TIR_SCHEDULE_BEGIN();
tir::Reorder(state_, GetSRefs(ordered_loop_rvs));
TVM_TIR_SCHEDULE_END("reorder", this->error_render_level_);
this->state_->DebugVerify();
}

/******** Schedule: Manipulate ForKind ********/

void ConcreteScheduleNode::Parallel(const LoopRV& loop_rv) {
Expand Down
1 change: 1 addition & 0 deletions src/tir/schedule/concrete_schedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class ConcreteScheduleNode : public ScheduleNode {
/******** Schedule: Transform loops ********/
LoopRV Fuse(const Array<LoopRV>& loop_rvs) override;
Array<LoopRV> Split(const LoopRV& loop_rv, const Array<Optional<ExprRV>>& factors) override;
void Reorder(const Array<LoopRV>& ordered_loop_rvs) override;
/******** Schedule: Manipulate ForKind ********/
void Parallel(const LoopRV& loop_rv) override;
void Vectorize(const LoopRV& loop_rv) override;
Expand Down
15 changes: 15 additions & 0 deletions src/tir/schedule/primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ TVM_DLL Array<StmtSRef> Split(ScheduleState self, const StmtSRef& loop_sref,
* \return The sref to the fused loop
*/
TVM_DLL StmtSRef Fuse(ScheduleState self, const Array<StmtSRef>& loop_srefs);
/*!
* \brief Reorder a list of loops. It doesn't require the loops to be consecutive.
* It requires:
* 1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... ,
* l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between
* l_1 and l_n (which also indicates they are under the same scope).
* 2) After reordering, the domain of an outer loop cannot depend on any of the inner loops.
* 3) For every block under the loop nests, its block binding must be affine, and the block
* variables must be either data parallel or reduction.
* 4) No duplicated loops are allowed in the arguments.
* \param self The state of the schedule
* \param ordered_loop_srefs An array of srefs which indicates the new order of loops
*/
TVM_DLL void Reorder(ScheduleState self, const Array<StmtSRef>& ordered_loop_srefs);

/******** Schedule: Manipulate ForKind ********/
/*!
* \brief Parallelize the input loop. It requires:
Expand Down
Loading

0 comments on commit 0a1eae8

Please sign in to comment.