Skip to content

Commit f604ede

Browse files
committed
[BugFix] Enough reg for producer when threads=512
1 parent c856ced commit f604ede

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

examples/dequantize_gemm/example_dequant_gemm_bf16_mxfp4_hopper.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def get_configs():
3535
block_N=[64, 128, 256],
3636
block_K=[64, 128, 256],
3737
num_stages=[0, 2],
38-
threads=[128, 256],
38+
threads=[128, 256, 512],
3939
split=[1, 2],
4040
)
4141
return [{
@@ -190,6 +190,8 @@ def main(
190190
B_shared: tilelang.layout.make_swizzled_layout(B_shared),
191191
C_shared: tilelang.layout.make_swizzled_layout(C_shared),
192192
})
193+
if threads == 512:
194+
T.no_set_max_nreg()
193195

194196
T.clear(C_local)
195197
for k in T.Pipelined(K // block_K, num_stages=num_stages):

0 commit comments

Comments
 (0)