From da5a0b539d6a5fe0c0195513a797814d2c267540 Mon Sep 17 00:00:00 2001
From: Alexander Matveev <59768536+alexm-neuralmagic@users.noreply.github.com>
Date: Mon, 20 May 2024 10:55:34 -0400
Subject: [PATCH] Remove marlin warning (#4918)

---
 csrc/quantization/gptq_marlin/gptq_marlin.cu | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/csrc/quantization/gptq_marlin/gptq_marlin.cu b/csrc/quantization/gptq_marlin/gptq_marlin.cu
index fdc0ebef4672e..34950a5d13cf5 100644
--- a/csrc/quantization/gptq_marlin/gptq_marlin.cu
+++ b/csrc/quantization/gptq_marlin/gptq_marlin.cu
@@ -1519,10 +1519,6 @@ exec_config_t determine_thread_config(int prob_m, int prob_n, int prob_k,
       }
     }
 
-    printf("WARNING: Marlin kernel is reducing max_m_blocks due to small SM "
-           "GPU cache. This may "
-           "hurt performance. Consider upgrading your GPU.\n");
-
     max_m_blocks--; // Process less M blocks per invocation to reduce cache
                     // usage
   }