Skip to content

Commit

Permalink
fix cp.async L2 prefetch typo (NVIDIA#1187)
Browse files Browse the repository at this point in the history
  • Loading branch information
reed-lau authored Nov 28, 2023
1 parent 8098336 commit eb01d54
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion include/cute/arch/copy_sm80.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ struct SM80_CP_ASYNC_CACHEGLOBAL
#if defined(CUTE_ARCH_CP_ASYNC_SM80_ENABLED)
TS const* gmem_ptr = &gmem_src;
uint32_t smem_int_ptr = cast_smem_ptr_to_uint(&smem_dst);
asm volatile("cp.async.cg.shared.global.L2::128BB [%0], [%1], %2;\n"
asm volatile("cp.async.cg.shared.global.L2::128B [%0], [%1], %2;\n"
:: "r"(smem_int_ptr),
"l"(gmem_ptr),
"n"(sizeof(TS)));
Expand Down

0 comments on commit eb01d54

Please sign in to comment.