From d4e55bad11bba009a34442088df897528785c368 Mon Sep 17 00:00:00 2001
From: Sage Moore <sage@neuralmagic.com>
Date: Wed, 10 Apr 2024 07:23:19 -0400
Subject: [PATCH] add test_compressed_memory to skipped list

---
 README.md                                  | 1 +
 neuralmagic/tests/skip-for-remote-push.txt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/README.md b/README.md
index cf86c30e82d7d..9941b8abd977b 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,7 @@
 [vLLM](https://github.com/vllm-project/vllm) is a fast and easy-to-use library for LLM inference that Neural Magic regularly contributes upstream improvements to. This fork, `nm-vllm` is our opinionated focus on incorporating the latest LLM optimizations like quantization and sparsity for enhanced performance.
 
 ## Installation
+The [nm-vllm PyPi package](https://pypi.org/project/nm-vllm/) includes pre-compiled binaries for CUDA (version 12.1) kernels, streamlining the setup process. For other PyTorch or CUDA versions, please compile the package from source.
 
 Install it using pip:
 ```bash
diff --git a/neuralmagic/tests/skip-for-remote-push.txt b/neuralmagic/tests/skip-for-remote-push.txt
index 67a07fe4bdbcd..110b54f8e6fc6 100644
--- a/neuralmagic/tests/skip-for-remote-push.txt
+++ b/neuralmagic/tests/skip-for-remote-push.txt
@@ -12,6 +12,7 @@ tests/distributed/test_comm_ops.py
 tests/prefix_caching/test_prefix_caching.py
 tests/models/test_models_logprobs.py
 tests/models/test_models.py
+tests/models/test_compressed_memory.py
 tests/spec_decode/test_utils.py
 tests/spec_decode/test_spec_decode_worker.py
 tests/spec_decode/test_metrics.py