[RpaV3] Renable Tests Part 1 (#534)

QiliangCui · Lumosis · commit 6e6322e21168 · 2025-08-26T16:17:55.000Z
Signed-off-by: Qiliang Cui &lt;derrhein@gmail.com&gt;
diff --git a/.buildkite/pipeline_jax.yml b/.buildkite/pipeline_jax.yml
@@ -76,7 +76,6 @@ steps:
            python3 -m pytest -s -v -x /workspace/tpu_commons/tests/ \
            --ignore=/workspace/tpu_commons/tests/kernels \
            --ignore=/workspace/tpu_commons/tests/e2e \
-           --ignore=/workspace/tpu_commons/tests/models/vllm \
            --ignore=/workspace/tpu_commons/tpu_commons/mock \
            --cov-config=/workspace/tpu_commons/.coveragerc --cov tpu_commons --cov-report term-missing --cov-fail-under=69
 
diff --git a/tests/models/vllm/test_jax_attention.py b/tests/models/vllm/test_jax_attention.py
@@ -57,7 +57,7 @@ def generate_attention_metadata(num_tokens, mesh) -> AttentionMetadata:
 
 def generate_kv_caches(num_kv_heads, head_size, mesh, dtype):
     cache_shape = get_kv_cache_shape_with_mesh(mesh, 1024, 16, num_kv_heads,
-                                               head_size, dtype)
+                                               head_size, t2j_dtype(dtype))
     sharding = NamedSharding(mesh, PartitionSpec())
 
     def _allocate():
@@ -138,15 +138,16 @@ def test_jax_attention(mesh, num_heads, head_size, num_kv_heads, num_tokens):
         vllm_model_wrapper_context = get_vllm_model_wrapper_context()
         kv_cache = vllm_model_wrapper_context.kv_caches[0]
 
-    ref_output = ref_ragged_paged_attention(q,
-                                            k,
-                                            v,
-                                            kv_cache,
-                                            md.seq_lens,
-                                            md.block_tables,
-                                            md.query_start_loc,
-                                            md.request_distribution,
-                                            sm_scale=scale)
+    ref_output, _ = ref_ragged_paged_attention(
+        q,
+        jax.device_put(t2j(k), NamedSharding(mesh, P())),
+        jax.device_put(t2j(v), NamedSharding(mesh, P())),
+        kv_cache,
+        md.seq_lens,
+        md.block_tables,
+        md.query_start_loc,
+        md.request_distribution,
+        sm_scale=scale)
     ref_output = j2t(ref_output.astype(jnp.float32)).to(dtype)
 
     torch.testing.assert_close(ref_output, jax_output, atol=1e-2, rtol=1e-5)
diff --git a/tests/models/vllm/test_jax_merged_column_parallel_linear.py b/tests/models/vllm/test_jax_merged_column_parallel_linear.py
@@ -49,6 +49,10 @@ def setup_environment():
         ensure_model_parallel_initialized(1, 1)
 
 
+@pytest.mark.skip(
+    reason=
+    "b/440248045. The failure is not caused by Rpav3. Will fix in another change."
+)
 @pytest.mark.parametrize("bias", [False, True])
 @pytest.mark.parametrize("mesh", [test_utils.get_spmd_mesh()])
 @pytest.mark.parametrize("fuse_matmuls", [False, True])
@@ -95,6 +99,10 @@ def test_jax_merged_column_parallel_linear(bias, mesh, fuse_matmuls,
     torch.testing.assert_close(output, jax_output)
 
 
+@pytest.mark.skip(
+    reason=
+    "b/440248045. The failure is not caused by Rpav3. Will fix in another change."
+)
 @pytest.mark.parametrize("bias", [False, True])
 @pytest.mark.parametrize("mesh", [test_utils.get_spmd_mesh()])
 @pytest.mark.parametrize("fuse_matmuls", [False, True])
diff --git a/tests/models/vllm/test_jax_qkv_parallel_linear.py b/tests/models/vllm/test_jax_qkv_parallel_linear.py
@@ -49,6 +49,10 @@ def setup_environment():
         ensure_model_parallel_initialized(1, 1)
 
 
+@pytest.mark.skip(
+    reason=
+    "b/440248045. The failure is not caused by Rpav3. Will fix in another change."
+)
 @pytest.mark.parametrize("bias", [False, True])
 @pytest.mark.parametrize("mesh", [test_utils.get_spmd_mesh()])
 @pytest.mark.parametrize("fuse_matmuls", [False, True])
@@ -89,6 +93,10 @@ def test_jax_qkv_parallel_linear(bias, mesh, fuse_matmuls):
     torch.testing.assert_close(output, jax_output)
 
 
+@pytest.mark.skip(
+    reason=
+    "b/440248045. The failure is not caused by Rpav3. Will fix in another change."
+)
 @pytest.mark.parametrize("bias", [False, True])
 @pytest.mark.parametrize("mesh", [test_utils.get_spmd_mesh()])
 @pytest.mark.parametrize("fuse_matmuls", [False, True])
diff --git a/tests/models/vllm/test_jax_row_parallel_linear.py b/tests/models/vllm/test_jax_row_parallel_linear.py
@@ -98,7 +98,8 @@ def test_jax_row_parallel_linear(bias, mesh, enable_sp):
 
 @pytest.mark.parametrize("bias", [False, True])
 @pytest.mark.parametrize("mesh", [test_utils.get_spmd_mesh()])
-def test_jax_row_parallel_linear_w8a8_int8(bias, mesh):
+@pytest.mark.parametrize("enable_sp", [False, True])
+def test_jax_row_parallel_linear_w8a8_int8(bias, mesh, enable_sp):
     dtype = torch.bfloat16
 
     engine_args = EngineArgs(
@@ -142,7 +143,8 @@ def test_jax_row_parallel_linear_w8a8_int8(bias, mesh):
 
     # Set jax default device to workaround a layout bug in JAX 0.7.0 and earlier
     with torchax.default_env(), jax.default_device(jax.devices("tpu")[0]):
-        jax_row_linear = JaxRowParallelLinear(row_linear, mesh=mesh)
+        jax_row_linear = JaxRowParallelLinear(
+            row_linear, mesh=mesh, enable_sequence_parallelism=enable_sp)
         jax_input_tensor = torch_view(t2j(input_tensor, use_dlpack=False))
         jax_input_tensor.apply_jax_(jax.device_put,
                                     NamedSharding(mesh, P(None, None)))
diff --git a/tests/models/vllm/test_torchax_wrapper.py b/tests/models/vllm/test_torchax_wrapper.py
@@ -50,6 +50,8 @@ def test_func():
         mock_disable.assert_called_once()
 
 
+@pytest.mark.skip(
+    reason="b/440250062. Delete the test when deleting torchax-pt path.")
 @pytest.mark.parametrize("tensor_dtype", [torch.float32, torch.bfloat16])
 @pytest.mark.parametrize("use_mesh", [True, False])
 def test_get_cpu_tensor_from_torchax_tensor(tensor_dtype, use_mesh):