Vulkan Mixture of Experts (MoE) support (#7628)

* Finish Vulkan mul_mat_id implementation * Add Vulkan sum_rows and div ops * Fix MUL_MAT_ID matrix matrix shader * Fix MUL_MAT_ID matrix vector shader dispatch size * Fix MUL_MAT_ID matrix vector shader and dispatch code * Update Vulkan CPU offload for MUL_MAT_ID * Fix crash when using split mode none and setting a main GPU
ggerganov · Jun 3, 2024 · 3d7ebf6 · 3d7ebf6
1 parent a10cda5
commit 3d7ebf6
Show file tree

Hide file tree

Showing 5 changed files with 99,976 additions and 40,426 deletions.
diff --git a/common/common.cpp b/common/common.cpp
@@ -1002,9 +1002,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             return true;
         }
         params.main_gpu = std::stoi(argv[i]);
-#ifndef GGML_USE_CUDA_SYCL
-        fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n");
-#endif // GGML_USE_CUDA_SYCL
+#ifndef GGML_USE_CUDA_SYCL_VULKAN
+        fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n");
+#endif // GGML_USE_CUDA_SYCL_VULKAN
         return true;
     }
     if (arg == "--split-mode" || arg == "-sm") {
@@ -1030,9 +1030,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             invalid_param = true;
             return true;
         }
-#ifndef GGML_USE_CUDA_SYCL
-        fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the split mode has no effect.\n");
-#endif // GGML_USE_CUDA_SYCL
+#ifndef GGML_USE_CUDA_SYCL_VULKAN
+        fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n");
+#endif // GGML_USE_CUDA_SYCL_VULKAN
         return true;
     }
     if (arg == "--tensor-split" || arg == "-ts") {