Skip to content

Commit

Permalink
Typoes (NVIDIA#107)
Browse files Browse the repository at this point in the history
* Update splitk_gemm.cu

* Update gemm_bias_relu.cu

* Update mma_sm75.h
  • Loading branch information
hwu36 authored Jul 13, 2020
1 parent fd7e058 commit 4dac749
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 4 deletions.
2 changes: 1 addition & 1 deletion examples/06_splitK_gemm/splitk_gemm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ int run() {
cutlass::HostTensor<ElementInputA, LayoutInputA> tensor_a(
problem_size.mk()); // <- Create matrix A with dimensions M x K
cutlass::HostTensor<ElementInputB, LayoutInputB> tensor_b(
problem_size.nk()); // <- Create matrix B with dimensions N x K
problem_size.kn()); // <- Create matrix B with dimensions K x N
cutlass::HostTensor<ElementOutput, LayoutOutput> tensor_c(
problem_size.mn()); // <- Create matrix C with dimensions M x N
cutlass::HostTensor<ElementOutput, LayoutOutput> tensor_d(
Expand Down
3 changes: 1 addition & 2 deletions examples/12_gemm_bias_relu/gemm_bias_relu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ int run() {
cutlass::HostTensor<ElementInputA, LayoutInputA> tensor_a(
problem_size.mk()); // <- Create matrix A with dimensions M x K
cutlass::HostTensor<ElementInputB, LayoutInputB> tensor_b(
problem_size.nk()); // <- Create matrix B with dimensions N x K
problem_size.kn()); // <- Create matrix B with dimensions K x N

cutlass::HostTensor<ElementOutput, LayoutOutput> tensor_c_bias(
{problem_size.m(), 1}); // <- Create matrix C with dimensions M x 1
Expand Down Expand Up @@ -234,7 +234,6 @@ int run() {
tensor_a.device_ref(),
tensor_b.device_ref(),
0,
tensor_c_bias.device_ref(),
tensor_ref_d.device_ref());

// Wait for kernels to finish
Expand Down
2 changes: 1 addition & 1 deletion include/cutlass/arch/mma_sm75.h
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ struct Mma<
int const *C = reinterpret_cast<int const *>(&c);
int *D = reinterpret_cast<int *>(&d);

asm volatile("_mma.m8n8k32.row.col.s32.s4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"
asm volatile("mma.sync.aligned.m8n8k32.row.col.s32.s4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"
: "=r"(D[0]), "=r"(D[1])
: "r"(A), "r"(B), "r"(C[0]), "r"(C[1]));

Expand Down

0 comments on commit 4dac749

Please sign in to comment.