Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Revert "[Kernel] Marlin_24: Ensure the mma.sp instruction is using th…
Browse files Browse the repository at this point in the history
…e ::ordered_metadata modifier (introduced with PTX 8.5)" (vllm-project#5149)
  • Loading branch information
simon-mo authored and robertgshaw2-neuralmagic committed Jul 14, 2024
1 parent 48d5cea commit 5fe5e47
Showing 1 changed file with 4 additions and 8 deletions.
12 changes: 4 additions & 8 deletions csrc/quantization/marlin/sparse/common/mma.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
float* c = reinterpret_cast<float*>(&frag_c);
if (psel == 0) {
asm volatile(
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
"f32 "
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
"{%12,%13,%14,%15}, %16, 0x0;\n"
: "=f"(c[0]), "=f"(c[1]), "=f"(c[2]), "=f"(c[3])
: "r"(a0[0]), "r"(a1[0]), "r"(a0[1]), "r"(a1[1]), "r"(b[0]), "r"(b[2]),
"r"(b[4]), "r"(b[6]), "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]),
"r"(e[0]));
asm volatile(
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
"f32 "
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
"{%12,%13,%14,%15}, %16, 0x0;\n"
: "=f"(c[4]), "=f"(c[5]), "=f"(c[6]), "=f"(c[7])
Expand All @@ -51,17 +49,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
"r"(e[0]));
} else {
asm volatile(
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
"f32 "
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
"{%12,%13,%14,%15}, %16, 0x1;\n"
: "=f"(c[0]), "=f"(c[1]), "=f"(c[2]), "=f"(c[3])
: "r"(a0[0]), "r"(a1[0]), "r"(a0[1]), "r"(a1[1]), "r"(b[0]), "r"(b[2]),
"r"(b[4]), "r"(b[6]), "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]),
"r"(e[0]));
asm volatile(
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
"f32 "
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
"{%12,%13,%14,%15}, %16, 0x1;\n"
: "=f"(c[4]), "=f"(c[5]), "=f"(c[6]), "=f"(c[7])
Expand Down

0 comments on commit 5fe5e47

Please sign in to comment.