[MISC] Add prefix cache reset to LMCache CPU offload example

sakunkun · sakunkun · commit 8b1b61cd55dd · 2025-10-21T06:16:15.000Z
diff --git a/examples/others/lmcache/cpu_offload_lmcache.py b/examples/others/lmcache/cpu_offload_lmcache.py
@@ -144,6 +144,9 @@ def main():
         print_output(llm, first_prompt, sampling_params, "first")
 
         time.sleep(1)
+        # Clear vLLM's internal prefix cache to force the second request
+        # to fetch cached KVs from LMCache
+        llm.reset_prefix_cache()
 
         # print the second output
         print_output(llm, second_prompt, sampling_params, "second")