vllm-project · sakunkun · Oct 21, 2025 · Oct 23, 2025 · gemini-code-assist · Oct 21, 2025
diff --git a/examples/others/lmcache/cpu_offload_lmcache.py b/examples/others/lmcache/cpu_offload_lmcache.py
@@ -144,6 +144,9 @@ def main():
         print_output(llm, first_prompt, sampling_params, "first")
 
         time.sleep(1)
+        # Clear vLLM's internal prefix cache to force the second request
+        # to fetch cached KVs from LMCache
+        llm.reset_prefix_cache()
-        llm.reset_prefix_cache()
+        if not llm.reset_prefix_cache():
+            raise RuntimeError(
+                "Failed to reset prefix cache. The example may not run as expected."
+            )
-        llm.reset_prefix_cache()
+        if not llm.reset_prefix_cache():
+            raise RuntimeError(
+                "Failed to reset prefix cache. The example may not run as expected."
+            )
 
         # print the second output
         print_output(llm, second_prompt, sampling_params, "second")