[MISC] Add prefix cache reset to LMCache CPU offload example

sakunkun · sakunkun · commit 0a3940fea036 · 2025-10-21T06:55:09.000Z
Signed-off-by: zhou.qianjun &lt;zhou.qianjun@zte.com.cn&gt;
diff --git a/examples/others/lmcache/cpu_offload_lmcache.py b/examples/others/lmcache/cpu_offload_lmcache.py
@@ -144,6 +144,9 @@ def main():
         print_output(llm, first_prompt, sampling_params, "first")
 
         time.sleep(1)
+        # Clear vLLM's internal prefix cache to force the second request
+        # to fetch cached KVs from LMCache
+        llm.reset_prefix_cache()
 
         # print the second output
         print_output(llm, second_prompt, sampling_params, "second")