We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c856ced commit f604edeCopy full SHA for f604ede
examples/dequantize_gemm/example_dequant_gemm_bf16_mxfp4_hopper.py
@@ -35,7 +35,7 @@ def get_configs():
35
block_N=[64, 128, 256],
36
block_K=[64, 128, 256],
37
num_stages=[0, 2],
38
- threads=[128, 256],
+ threads=[128, 256, 512],
39
split=[1, 2],
40
)
41
return [{
@@ -190,6 +190,8 @@ def main(
190
B_shared: tilelang.layout.make_swizzled_layout(B_shared),
191
C_shared: tilelang.layout.make_swizzled_layout(C_shared),
192
})
193
+ if threads == 512:
194
+ T.no_set_max_nreg()
195
196
T.clear(C_local)
197
for k in T.Pipelined(K // block_K, num_stages=num_stages):
0 commit comments