@@ -294,6 +294,7 @@ def test_slice_and_copy_similar_to_vllm(self, granularity):
294294 self ._test_slice_and_copy_similar_to_vllm (config )
295295
296296 @unittest .skipIf (not is_sm_at_least_90 (), "Nedd sm90+" )
297+ @unittest .skipIf (not _is_fbgemm_gpu_genai_available (), "Need fbgemm_gpu_genai" )
297298 def test_bmm (self ):
298299 # only support per row quantization
299300 config = Float8DynamicActivationFloat8WeightConfig (granularity = PerRow ())
@@ -406,6 +407,7 @@ def test_cat(self, granularity, sizes):
406407 self .assertEqual (cat_qweight2 .scale , ref_scale )
407408
408409 @unittest .skipIf (not is_sm_at_least_90 (), "Nedd sm90+" )
410+ @unittest .skipIf (not _is_fbgemm_gpu_genai_available (), "Need fbgemm_gpu_genai" )
409411 def test_moe_weight_reshape_ops (self ):
410412 # only per row quantization is supported for bmm
411413 granularity = PerRow ()
@@ -416,6 +418,7 @@ def test_moe_weight_reshape_ops(self):
416418 # that should be moved here after v1 config is deprecated:
417419 # https://github.com/pytorch/ao/issues/2649
418420 @unittest .skipIf (not is_sm_at_least_90 (), "Nedd sm90+" )
421+ @unittest .skipIf (not _is_fbgemm_gpu_genai_available (), "Need fbgemm_gpu_genai" )
419422 def test_expected_gpu_kernel_fbgemm (self ):
420423 """Making sure KernelPreference.FBGEMM calls correct quantize and gemm kernels
421424 and the bias add happens in the gemm kernel for per row quantization
0 commit comments