vllm-project
diff --git a/‎.buildkite/test-pipeline.yaml
+1-3 b/‎.buildkite/test-pipeline.yaml
+1-3
diff --git a/‎tests/lora/test_long_context.py
-301 b/‎tests/lora/test_long_context.py
-301
@@ -287,7 +287,7 @@ steps:
   source_file_dependencies:
   - vllm/lora
   - tests/lora
-  command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py  --ignore=lora/test_transfomers_model.py
+  command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py  --ignore=lora/test_transfomers_model.py
   parallelism: 4
 
 - label: PyTorch Fullgraph Smoke Test # 9min
@@ -592,8 +592,6 @@ steps:
     # FIXIT: find out which code initialize cuda before running the test
     # before the fix, we need to use spawn to test it
     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-    # This test runs llama 13B, so it is required to run on 4 GPUs.
-    - pytest -v -s -x lora/test_long_context.py
     # There is some Tensor Parallelism related processing logic in LoRA that
     # requires multi-GPU testing for validation.
     - pytest -v -s -x lora/test_chatglm3_tp.py