Skip to content

Commit

Permalink
fix alignmentC=8 for imma N=128 (NVIDIA#822)
Browse files Browse the repository at this point in the history
Co-authored-by: Haicheng Wu <haichengw@nvidia.com>
  • Loading branch information
hwu36 and hwu36 authored Feb 15, 2023
1 parent 8f5c242 commit 9fb38ac
Showing 1 changed file with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ struct DefaultIteratorsTensorOp<
>;

using WarpTileIterator = typename platform::conditional<
(ThreadblockShape::kN == 256),
(ThreadblockShape::kN == 256) || (ThreadblockShape::kN == 128 && ElementsPerAccess == 8),
WarpTileIteratorNotMixed,
WarpTileIteratorMixed>::type;

Expand All @@ -289,7 +289,7 @@ struct DefaultIteratorsTensorOp<
>;

using SharedLoadIterator = typename platform::conditional<
(ThreadblockShape::kN == 256),
(ThreadblockShape::kN == 256) || (ThreadblockShape::kN == 128 && ElementsPerAccess == 8),
SharedLoadIteratorNotMixed,
SharedLoadIteratorMixed>::type;

Expand Down Expand Up @@ -337,7 +337,7 @@ struct DefaultIteratorsTensorOp<
>;

using WarpTileIterator = typename platform::conditional<
(ThreadblockShape::kN == 256),
(ThreadblockShape::kN == 256) || (ThreadblockShape::kN == 128 && ElementsPerAccess == 8),
WarpTileIteratorNotMixed,
WarpTileIteratorMixed>::type;

Expand All @@ -356,7 +356,7 @@ struct DefaultIteratorsTensorOp<
>;

using SharedLoadIterator = typename platform::conditional<
(ThreadblockShape::kN == 256),
(ThreadblockShape::kN == 256) || (ThreadblockShape::kN == 128 && ElementsPerAccess == 8),
SharedLoadIteratorNotMixed,
SharedLoadIteratorMixed>::type;

Expand Down Expand Up @@ -404,7 +404,7 @@ struct DefaultIteratorsTensorOp<
>;

using WarpTileIterator = typename platform::conditional<
(ThreadblockShape::kN == 256),
(ThreadblockShape::kN == 256) || (ThreadblockShape::kN == 128 && ElementsPerAccess == 8),
WarpTileIteratorNotMixed,
WarpTileIteratorMixed>::type;

Expand All @@ -423,7 +423,7 @@ struct DefaultIteratorsTensorOp<
>;

using SharedLoadIterator = typename platform::conditional<
(ThreadblockShape::kN == 256),
(ThreadblockShape::kN == 256) || (ThreadblockShape::kN == 128 && ElementsPerAccess == 8),
SharedLoadIteratorNotMixed,
SharedLoadIteratorMixed>::type;

Expand Down

0 comments on commit 9fb38ac

Please sign in to comment.