more refactoring + filter out invalid tests ahead of time

bnellnm · bnellnm · commit d4dacd7e555f · 2025-10-03T21:12:32.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/tests/kernels/moe/modular_kernel_tools/common.py b/tests/kernels/moe/modular_kernel_tools/common.py
@@ -197,56 +197,57 @@ def all2all_backend(self):
         info = prepare_finalize_info(self.prepare_finalize_type)
         return info.backend
 
-    def is_valid(self):
+    def is_valid(self) -> tuple[bool, Optional[str]]:
         # Check prepare-finalize and fused-experts compatibility
         if self.is_batched_prepare_finalize():
             if not self.is_batched_fused_experts():
-                return False
+                return False, "Mismatched format."
         else:
             if not self.is_standard_fused_experts():
-                return False
+                return False, "Mismatched format."
 
         use_chunking = self.fused_moe_chunk_size is not None
         if use_chunking and not self.is_fe_supports_chunking():
-            return False
+            return False, "Chunking not supported."
 
         # Check quantization sanity
         if (int(self.is_per_act_token_quant) +
                 int(self.is_per_tensor_act_quant) +
                 int(self.quant_block_shape is not None)) > 1:
             # invalid quant config
-            return False
+            return False, "Bad quant_config."
 
         # check type support
         if self.quant_dtype is None:
             if (self.dtype not in self.pf_supported_types()
                     or self.dtype not in self.fe_supported_types()):
-                return False
+                return False, "Unsupported type 1."
         else:
             if (self.quant_dtype not in self.pf_supported_types()
                     or self.quant_dtype not in self.fe_supported_types()):
-                return False
+                return False, "Unsupported type 2."
 
         # Check block quanization support
         is_block_quatized = self.quant_block_shape is not None
         if is_block_quatized and self.quant_dtype is None:
-            return False
+            return False, "No block quantization support."
+
         if is_block_quatized and not self.is_block_quant_supported():
-            return False
+            return False, "Mismatched block quantization support."
 
         # deep_gemm only works with block-quantized
         if self.needs_deep_gemm() and not is_block_quatized:
-            return False
+            return False, "Needs DeepGEMM but not block quantized."
 
         # Check dependencies (turn into asserts?)
         if self.needs_deep_ep() and not has_deep_ep():
-            return False
+            return False, "Needs DeepEP."
         if self.needs_deep_gemm() and not has_deep_gemm():
-            return False
+            return False, "Needs DeepGEMM."
         if self.needs_pplx() and not has_pplx():  # noqa: SIM103
-            return False
+            return False, "Needs PPLX."
 
-        return True
+        return True, None
 
 
 @dataclass
diff --git a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
@@ -130,7 +130,7 @@ def add_to_results(config: Config,
                         fused_moe_chunk_size=None)
 
         success = None
-        if config.is_valid():
+        if config.is_valid()[0]:
             print(f"Running config : {config.describe()} ...")
             try:
                 weights: WeightTensors = WeightTensors.make(config)
diff --git a/tests/kernels/moe/modular_kernel_tools/mk_objects.py b/tests/kernels/moe/modular_kernel_tools/mk_objects.py
@@ -430,7 +430,7 @@ def make_fused_experts(
         print(f"Making BatchedTritonOrDeepGemmExperts {kwargs} ...")
         experts = BatchedTritonOrDeepGemmExperts(**kwargs)
     elif fused_experts_type == DeepGemmExperts:
-        print("Making DeepGemmExperts {quant_config} ...")
+        print(f"Making DeepGemmExperts {quant_config} ...")
         experts = DeepGemmExperts(quant_config)
     elif fused_experts_type == TritonExperts:
         kwargs = quant_kwargs
diff --git a/tests/kernels/moe/test_modular_kernel_combinations.py b/tests/kernels/moe/test_modular_kernel_combinations.py
@@ -5,18 +5,18 @@
 import textwrap
 import traceback
 from itertools import product
-from typing import Optional
+from typing import Any, Optional
 
 import pytest
 import torch
 
 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.config import VllmConfig, set_current_vllm_config
 from vllm.platforms import current_platform
-from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx
+from vllm.utils import (cuda_device_count_stateless, has_deep_ep,
+                        has_deep_gemm, has_pplx)
 from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
 
-from ...utils import multi_gpu_test
 from .modular_kernel_tools.common import (Config, RankTensors, WeightTensors,
                                           reference_moe_impl,
                                           run_modular_kernel)
@@ -122,7 +122,8 @@ def rank_worker(
 
 
 def run(config: Config, verbose: bool):
-    assert config.is_valid()
+    assert config.is_valid()[0]
+    assert not is_nyi_config(config)
 
     weights: WeightTensors = WeightTensors.make(config)
 
@@ -156,24 +157,63 @@ def is_nyi_config(config: Config) -> bool:
     return not info.supports_expert_map
 
 
-@pytest.mark.parametrize("k", Ks)
-@pytest.mark.parametrize("n", Ns)
-@pytest.mark.parametrize("e", Es)
-@pytest.mark.parametrize("dtype", DTYPEs)
-@pytest.mark.parametrize("quant_config", MK_QUANT_CONFIGS)
+def generate_valid_test_cases(world_size: int,
+                              prepare_finalize_types) -> list[tuple[Any, ...]]:
+    cases = []
+
+    for k, n, e, dtype, quant_config, combination, chunk_size in product(
+            Ks, Ns, Es, DTYPEs, MK_QUANT_CONFIGS,
+            product(prepare_finalize_types, MK_FUSED_EXPERT_TYPES),
+            FUSED_MOE_CHUNK_SIZEs):
+
+        config = Config(
+            Ms=Ms,
+            K=k,
+            N=n,
+            E=e,
+            topks=TOPKs,
+            dtype=dtype,
+            quant_config=quant_config,
+            prepare_finalize_type=combination[0],
+            fused_experts_type=combination[1],
+            fused_moe_chunk_size=chunk_size,
+            world_size=world_size,
+        )
+
+        # TODO
+        verbose = False  #pytestconfig.getoption('verbose') > 0
+
+        valid, reason = config.is_valid()
+
+        if not valid:
+            if verbose:
+                print(f"Tests config {config} is not valid: {reason}")
+            continue
+
+        if is_nyi_config(config):
+            if verbose:
+                print(f"Tests config {config} is nyi.")
+            continue
+
+        cases.append((k, n, e, dtype, quant_config, combination[0],
+                      combination[1], chunk_size, world_size))
+
+    return cases
+
+
 @pytest.mark.parametrize(
-    "combination",
-    product(MK_MULTI_GPU_PREPARE_FINALIZE_TYPES, MK_FUSED_EXPERT_TYPES))
-@pytest.mark.parametrize("fused_moe_chunk_size", FUSED_MOE_CHUNK_SIZEs)
-@pytest.mark.parametrize("world_size", [2])
-@multi_gpu_test(num_gpus=2)
+    "k,n,e,dtype,quant_config,prepare_finalize_type,fused_experts_type,chunk_size,world_size",
+    generate_valid_test_cases(
+        world_size=2,
+        prepare_finalize_types=MK_MULTI_GPU_PREPARE_FINALIZE_TYPES))
 @meets_multi_gpu_requirements
 def test_modular_kernel_combinations_multigpu(
         k: int, n: int, e: int, dtype: torch.dtype,
         quant_config: Optional[TestMoEQuantConfig],
-        combination: tuple[mk.FusedMoEPrepareAndFinalize,
-                           mk.FusedMoEPermuteExpertsUnpermute],
-        fused_moe_chunk_size: Optional[int], world_size: int, pytestconfig):
+        prepare_finalize_type: mk.FusedMoEPrepareAndFinalize,
+        fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute,
+        chunk_size: Optional[int], world_size: int, pytestconfig):
+    assert cuda_device_count_stateless() >= world_size
 
     config = Config(
         Ms=Ms,
@@ -183,38 +223,26 @@ def test_modular_kernel_combinations_multigpu(
         topks=TOPKs,
         dtype=dtype,
         quant_config=quant_config,
-        prepare_finalize_type=combination[0],
-        fused_experts_type=combination[1],
-        fused_moe_chunk_size=fused_moe_chunk_size,
+        prepare_finalize_type=prepare_finalize_type,
+        fused_experts_type=fused_experts_type,
+        fused_moe_chunk_size=chunk_size,
         world_size=world_size,
     )
-
-    if not config.is_valid():
-        pytest.skip(f"Tests config {config} is not valid. Skipping ...")
-
-    if is_nyi_config(config):
-        pytest.skip(f"Tests config {config} is nyi. Skipping ...")
-
     verbosity = pytestconfig.getoption('verbose')
     run(config, verbosity > 0)
 
 
-@pytest.mark.parametrize("k", Ks)
-@pytest.mark.parametrize("n", Ns)
-@pytest.mark.parametrize("e", Es)
-@pytest.mark.parametrize("dtype", DTYPEs)
-@pytest.mark.parametrize("quant_config", MK_QUANT_CONFIGS)
 @pytest.mark.parametrize(
-    "combination",
-    product(MK_SINGLE_GPU_PREPARE_FINALIZE_TYPES, MK_FUSED_EXPERT_TYPES))
-@pytest.mark.parametrize("fused_moe_chunk_size", FUSED_MOE_CHUNK_SIZEs)
-@pytest.mark.parametrize("world_size", [1])
+    "k,n,e,dtype,quant_config,prepare_finalize_type,fused_experts_type,chunk_size,world_size",
+    generate_valid_test_cases(
+        world_size=1,
+        prepare_finalize_types=MK_SINGLE_GPU_PREPARE_FINALIZE_TYPES))
 def test_modular_kernel_combinations_singlegpu(
         k: int, n: int, e: int, dtype: torch.dtype,
         quant_config: Optional[TestMoEQuantConfig],
-        combination: tuple[mk.FusedMoEPrepareAndFinalize,
-                           mk.FusedMoEPermuteExpertsUnpermute],
-        fused_moe_chunk_size: Optional[int], world_size: int, pytestconfig):
+        prepare_finalize_type: mk.FusedMoEPrepareAndFinalize,
+        fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute,
+        chunk_size: Optional[int], world_size: int, pytestconfig):
     config = Config(
         Ms=Ms,
         K=k,
@@ -223,18 +251,12 @@ def test_modular_kernel_combinations_singlegpu(
         topks=TOPKs,
         dtype=dtype,
         quant_config=quant_config,
-        prepare_finalize_type=combination[0],
-        fused_experts_type=combination[1],
-        fused_moe_chunk_size=fused_moe_chunk_size,
+        prepare_finalize_type=prepare_finalize_type,
+        fused_experts_type=fused_experts_type,
+        fused_moe_chunk_size=chunk_size,
         world_size=world_size,
     )
 
-    if not config.is_valid():
-        pytest.skip(f"Tests config {config} is not valid. Skipping ...")
-
-    if is_nyi_config(config):
-        pytest.skip(f"Tests config {config} is nyi. Skipping ...")
-
     verbosity = pytestconfig.getoption('verbose')
     run(config, verbosity > 0)
 
diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py