Description
Your current environment
Flakey test for at least the past month: https://buildkite.com/organizations/vllm/analytics/suites/ci-1/tests/4abfbf0d-3a86-8a68-9ff3-0e0ab0fbb38b?period=28days&tags=scm.branch%3Amain%2Cresult%3Afailed
🐛 Describe the bug
Failing tests:
FAILED v1/engine/test_engine_core_client.py::test_kv_cache_events[True-tcp] - AssertionError: No message received
assert None is not None
Logs:
=================================== FAILURES ===================================
________________________ test_kv_cache_events[True-tcp] ________________________
monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7fc027da70e0>
multiprocessing_mode = True
publisher_config = KVEventsConfig(enable_kv_cache_events=True, publisher='zmq', endpoint='tcp://*:51905', replay_endpoint='tcp://*:51906', buffer_steps=100, hwm=1000, max_queue_size=100000, topic='test')
@pytest.mark.parametrize(
"multiprocessing_mode,publisher_config",
[(True, "tcp"), (False, "inproc")],
indirect=["publisher_config"],
)
def test_kv_cache_events(
monkeypatch: pytest.MonkeyPatch,
multiprocessing_mode: bool,
publisher_config,
):
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
block_size = 16
num_blocks = 2
engine_args = EngineArgs(model=MODEL_NAME,
enforce_eager=True,
enable_prefix_caching=True,
block_size=block_size)
engine_args.kv_events_config = publisher_config
vllm_config = engine_args.create_engine_config(
UsageContext.UNKNOWN_CONTEXT)
executor_class = Executor.get_class(vllm_config)
client = EngineCoreClient.make_client(
multiprocess_mode=multiprocessing_mode,
asyncio_mode=False,
vllm_config=vllm_config,
executor_class=executor_class,
log_stats=False,
)
endpoint = publisher_config.endpoint.replace("*", "127.0.0.1")
subscriber = MockSubscriber(endpoint,
topic=publisher_config.topic,
decode_type=KVEventBatch)
try:
custom_tokens = list(range(num_blocks * block_size))
request = EngineCoreRequest(
request_id=str(uuid.uuid4()),
prompt_token_ids=custom_tokens,
mm_inputs=None,
mm_hashes=None,
mm_placeholders=None,
sampling_params=SamplingParams(
max_tokens=1), # Short completion for speed
eos_token_id=None,
arrival_time=time.time(),
lora_request=None,
cache_salt=None,
)
client.add_request(request)
outputs: dict[str, list] = {request.request_id: []}
loop_until_done(client, outputs)
result = subscriber.receive_one(timeout=1000)
> assert result is not None, "No message received"
E AssertionError: No message received
E assert None is not None
v1/engine/test_engine_core_client.py:318: AssertionError
=============================== warnings summary ===============================
../../usr/local/lib/python3.12/dist-packages/schemathesis/generation/coverage.py:305
/usr/local/lib/python3.12/dist-packages/schemathesis/generation/coverage.py:305: DeprecationWarning: jsonschema.exceptions.RefResolutionError is deprecated as of version 4.18.0. If you wish to catch potential reference resolution errors, directly catch referencing.exceptions.Unresolvable.
ref_error: type[Exception] = jsonschema.RefResolutionError,
tests/v1/engine/test_async_llm.py: 13 warnings
tests/v1/engine/test_engine_core_client.py: 2 warnings
/usr/lib/python3.12/multiprocessing/popen_fork.py:66: DeprecationWarning: This process (pid=277) is multi-threaded, use of fork() may lead to deadlocks in the child.
self.pid = os.fork()
tests/v1/engine/test_engine_core.py::test_engine_core
tests/v1/engine/test_engine_core.py::test_engine_core_advanced_sampling
tests/v1/engine/test_engine_core.py::test_engine_core_concurrent_batches
tests/v1/engine/test_engine_core_client.py::test_engine_core_client[True]
tests/v1/engine/test_engine_core_client.py::test_engine_core_client[False]
/vllm-workspace/tests/utils.py:723: DeprecationWarning: This process (pid=277) is multi-threaded, use of fork() may lead to deadlocks in the child.
pid = os.fork()
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED v1/engine/test_engine_core_client.py::test_kv_cache_events[True-tcp] - AssertionError: No message received
assert None is not None
============ 1 failed, 49 passed, 21 warnings in 706.77s (0:11:46) =============
^^^ +++
🚨 Error: The command exited with status 1
^^^ +++
user command error: The plugin docker command hook exited with status 1
~~~ Running global pre-exit hook
$ /etc/buildkite-agent/hooks/pre-exit
~~~ Running plugin docker pre-exit hook
$ /var/lib/buildkite-agent/plugins/bk-gpu-1-queue-ci-i-0aabd234c4d03089e-1/github-com-buildkite-plugins-docker-buildkite-plugin-v5-2-0/hooks/pre-exit
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Metadata
Metadata
Assignees
Type
Projects
Status
Done