We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent da554f9 commit d1fa2f2Copy full SHA for d1fa2f2
tests/models/language/pooling/test_auto_prefix_cache_support.py
@@ -26,7 +26,8 @@ def test_classify_models(
26
with vllm_runner(model,
27
max_model_len=512,
28
dtype=dtype,
29
- enable_prefix_caching=True) as vllm_model:
+ enable_prefix_caching=True,
30
+ enable_chunked_prefill=True) as vllm_model:
31
cache_config = vllm_model.llm.llm_engine.cache_config
32
assert cache_config.enable_prefix_caching
33
vllm_outputs = vllm_model.classify(example_prompts)
@@ -63,6 +64,7 @@ def test_embed_models(
63
64
runner="pooling",
65
max_model_len=None,
66
enable_prefix_caching=True,
67
+ enable_chunked_prefill=True,
68
) as vllm_model:
69
70
0 commit comments