Skip to content

Commit

Permalink
Re-enable all alignments for int accumulators (NVIDIA#807)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackkosaian authored Feb 7, 2023
1 parent add4ba6 commit 5921043
Showing 1 changed file with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,15 @@ struct DefaultIteratorsTensorOp<float, float, 4, ThreadblockShape, WarpShape, In
static int const kFragmentsPerIteration = 2;
};

/// Partial specialization for int32_t <= int32_t x 4
/// Partial specialization for int32_t <= int32_t
template <
int ElementsPerAccess,
typename ThreadblockShape,
typename WarpShape,
typename InstructionShape,
typename ThreadMap
>
struct DefaultIteratorsTensorOp<int32_t, int32_t, 4, ThreadblockShape, WarpShape, InstructionShape, ThreadMap> {
struct DefaultIteratorsTensorOp<int32_t, int32_t, ElementsPerAccess, ThreadblockShape, WarpShape, InstructionShape, ThreadMap> {

using WarpTileIterator = cutlass::epilogue::warp::TileIteratorTensorOp<
WarpShape,
Expand All @@ -160,14 +161,15 @@ struct DefaultIteratorsTensorOp<int32_t, int32_t, 4, ThreadblockShape, WarpShape
static int const kFragmentsPerIteration = 1;
};

/// Partial specialization for float <= int32_t x 4
/// Partial specialization for float <= int32_t
template <
int ElementsPerAccess,
typename ThreadblockShape,
typename WarpShape,
typename InstructionShape,
typename ThreadMap
>
struct DefaultIteratorsTensorOp<float, int32_t, 4, ThreadblockShape, WarpShape, InstructionShape, ThreadMap> {
struct DefaultIteratorsTensorOp<float, int32_t, ElementsPerAccess, ThreadblockShape, WarpShape, InstructionShape, ThreadMap> {

using WarpTileIterator = cutlass::epilogue::warp::TileIteratorTensorOp<
WarpShape,
Expand Down

0 comments on commit 5921043

Please sign in to comment.