From da5a0b539d6a5fe0c0195513a797814d2c267540 Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-neuralmagic@users.noreply.github.com> Date: Mon, 20 May 2024 10:55:34 -0400 Subject: [PATCH] Remove marlin warning (#4918) --- csrc/quantization/gptq_marlin/gptq_marlin.cu | 4 ---- 1 file changed, 4 deletions(-) diff --git a/csrc/quantization/gptq_marlin/gptq_marlin.cu b/csrc/quantization/gptq_marlin/gptq_marlin.cu index fdc0ebef4672e..34950a5d13cf5 100644 --- a/csrc/quantization/gptq_marlin/gptq_marlin.cu +++ b/csrc/quantization/gptq_marlin/gptq_marlin.cu @@ -1519,10 +1519,6 @@ exec_config_t determine_thread_config(int prob_m, int prob_n, int prob_k, } } - printf("WARNING: Marlin kernel is reducing max_m_blocks due to small SM " - "GPU cache. This may " - "hurt performance. Consider upgrading your GPU.\n"); - max_m_blocks--; // Process less M blocks per invocation to reduce cache // usage }