neuralmagic
diff --git a/‎benchmarks/kernels/benchmark_moe.py
Lines changed: 3 additions & 0 deletions b/‎benchmarks/kernels/benchmark_moe.py
Lines changed: 3 additions & 0 deletions
@@ -86,6 +86,9 @@ def benchmark_config(
             (num_experts, 2 * shard_intermediate_size), dtype=torch.float32
         )
         w2_scale = torch.randn((hidden_size, num_experts), dtype=torch.float32)
+    if use_deep_gemm:
+        # we use the default block shape for deepgemm
+        block_quant_shape = [128, 128]
     if use_fp8_w8a8:
         if block_quant_shape:
             block_n, block_k = block_quant_shape[0], block_quant_shape[1]