Merged upstream, fixed OSX compile errors, integrated noavx2 build in…

…to main
YellowRoseCx · Apr 12, 2023 · 4faae0a · 4faae0a
2 parents 2444a99 + 4dbbd40
commit 4faae0a
Show file tree

Hide file tree

Showing 15 changed files with 132 additions and 72 deletions.
diff --git a/.ecrc b/.ecrc
@@ -0,0 +1,5 @@
+{
+  "Disable": {
+    "IndentSize": true
+  }
+}
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,16 @@
+# https://EditorConfig.org
+
+# Top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file, utf-8 charset
+[*]
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+charset = utf-8
+indent_style = space
+indent_size = 4
+
+[Makefile]
+indent_style = tab
diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md
@@ -22,9 +22,9 @@ Please provide a detailed written description of what you were trying to do, and
 
 # Current Behavior
 
-Please provide a detailed written description of what `llama.cpp` did, instead. 
+Please provide a detailed written description of what `llama.cpp` did, instead.
 
-# Environment and Context 
+# Environment and Context
 
 Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions.
 
@@ -133,7 +133,7 @@ llama_model_load: loading model part 8/8 from './models/65B/ggml-model-q4_0.bin.
 llama_model_load: .......................................................................................... done
 llama_model_load: model size =  4869.09 MB / num tensors = 723
 
-system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 
+system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
 
 main: prompt: 'Please close your issue when it has been answered.'
 main: number of tokens in prompt = 11
@@ -166,14 +166,14 @@ main:    total time = 246406.42 ms
 
  Performance counter stats for './main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p Please close your issue when it has been answered.':
 
-        3636882.89 msec task-clock                #   14.677 CPUs utilized          
-             13509      context-switches          #    3.714 /sec                   
-              2436      cpu-migrations            #    0.670 /sec                   
-          10476679      page-faults               #    2.881 K/sec                  
+        3636882.89 msec task-clock                #   14.677 CPUs utilized
+             13509      context-switches          #    3.714 /sec
+              2436      cpu-migrations            #    0.670 /sec
+          10476679      page-faults               #    2.881 K/sec
     13133115082869      cycles                    #    3.611 GHz                      (16.77%)
        29314462753      stalled-cycles-frontend   #    0.22% frontend cycles idle     (16.76%)
     10294402631459      stalled-cycles-backend    #   78.39% backend cycles idle      (16.74%)
-    23479217109614      instructions              #    1.79  insn per cycle         
+    23479217109614      instructions              #    1.79  insn per cycle
                                                   #    0.44  stalled cycles per insn  (16.76%)
      2353072268027      branches                  #  647.002 M/sec                    (16.77%)
         1998682780      branch-misses             #    0.08% of all branches          (16.76%)

diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml
@@ -0,0 +1,17 @@
+name: EditorConfig Checker
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  editorconfig:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: editorconfig-checker/action-editorconfig-checker@main
+      - run: editorconfig-checker
diff --git a/Makefile b/Makefile
@@ -34,6 +34,7 @@ endif
 CFLAGS   = -I.              -Ofast -DNDEBUG -std=c11   -fPIC
 CXXFLAGS = -I. -I./examples -Ofast -DNDEBUG -std=c++11 -fPIC
 LDFLAGS  =
+BONUSCFLAGS =
 
 #lets try enabling everything
 CFLAGS   += -pthread -s 
@@ -71,7 +72,8 @@ endif
 #       feel free to update the Makefile for your architecture and send a pull request or issue
 ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
 	# Use all CPU extensions that are available:
-	CFLAGS += -mf16c -mfma -mavx2 -mavx -msse3
+	CFLAGS += -mf16c -mavx -msse3 
+	BONUSCFLAGS += -mfma -mavx2 
 endif
 ifneq ($(filter ppc64%,$(UNAME_M)),)
 	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
@@ -122,17 +124,19 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
 endif
 
 OPENBLAS_BUILD = 
-ifeq ($(OS),Windows_NT)
-	OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS)
-else
-	OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.'
-endif
-
 CLBLAST_BUILD = 
+OPENBLAS_NOAVX2_BUILD = 
+
 ifeq ($(OS),Windows_NT)
+	OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS)
 	CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS)
+	OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas_noavx2.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas_noavx2.dll $(LDFLAGS)
 else
-	CLBLAST_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use CLBlast, please install it seperately, then link it manually with LLAMA_CLBLAST=1. This is just a reminder, not an error.'
+	ifndef LLAMA_OPENBLAS
+	ifndef LLAMA_CLBLAST
+	OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
+	endif
+	endif
 endif
 
 #
@@ -150,22 +154,28 @@ $(info I CC:       $(CCV))
 $(info I CXX:      $(CXXV))
 $(info )
 
-default: llamalib llamalib_openblas llamalib_clblast
+default: llamalib llamalib_openblas llamalib_openblas_noavx2 llamalib_clblast
 
 #
 # Build library
 #
 
 ggml.o: ggml.c ggml.h
-	$(CC)  $(CFLAGS) -c ggml.c -o ggml.o
+	$(CC)  $(CFLAGS) $(BONUSCFLAGS) -c ggml.c -o ggml.o
 
 ggml_openblas.o: ggml.c ggml.h
-	$(CC)  $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o
+	$(CC)  $(CFLAGS) $(BONUSCFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o
+
+ggml_openblas_noavx2.o: ggml.c ggml.h
+	$(CC)  $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas_noavx2.o
 
 ggml_clblast.o: ggml.c ggml.h
-	$(CC)  $(CFLAGS) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c ggml.c -o ggml_clblast.o
+	$(CC)  $(CFLAGS) $(BONUSCFLAGS) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c ggml.c -o ggml_clblast.o
 
 ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
+	$(CC)  $(CFLAGS) $(BONUSCFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
+
+ggml_v1_noavx2.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
 	$(CC)  $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
 
 llama.o: llama.cpp llama.h llama_internal.h
@@ -198,6 +208,9 @@ llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
 llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o 
 	$(OPENBLAS_BUILD)
 
+llamalib_openblas_noavx2: ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o 
+	$(OPENBLAS_NOAVX2_BUILD)
+
 llamalib_clblast: ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o 
 	$(CLBLAST_BUILD)
 

diff --git a/examples/Miku.sh b/examples/Miku.sh
@@ -19,15 +19,15 @@ GEN_OPTIONS=(--batch_size 1024
 --top_p 0.5)
 
 if [ -n "$N_THREAD" ]; then
-	GEN_OPTIONS+=(--threads "$N_THREAD")
+    GEN_OPTIONS+=(--threads "$N_THREAD")
 fi
 
 ./main "${GEN_OPTIONS[@]}" \
-	--model "$MODEL" \
-	--n_predict "$N_PREDICTS" \
-	--color --interactive \
-	--reverse-prompt "${USER_NAME}:" \
-	--prompt "
+    --model "$MODEL" \
+    --n_predict "$N_PREDICTS" \
+    --color --interactive \
+    --reverse-prompt "${USER_NAME}:" \
+    --prompt "
 This is a transcript of a 1000 page, never ending conversation between ${USER_NAME} and the cute and helpful AI assistant ${AI_NAME}. ${AI_NAME} is a girl who is an AI running on the users computer.
 ${AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.
 ${AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help.

diff --git a/examples/common.cpp b/examples/common.cpp
@@ -22,9 +22,9 @@ extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHand
 extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
 extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
 extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
-extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int CodePage, unsigned long dwFlags, 
-                                                                   const wchar_t * lpWideCharStr, int cchWideChar, 
-                                                                   char * lpMultiByteStr, int cbMultiByte, 
+extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int CodePage, unsigned long dwFlags,
+                                                                   const wchar_t * lpWideCharStr, int cchWideChar,
+                                                                   char * lpMultiByteStr, int cbMultiByte,
                                                                    const char * lpDefaultChar, bool * lpUsedDefaultChar);
 #define CP_UTF8 65001
 #endif
@@ -328,9 +328,9 @@ void win32_console_init(bool enable_color) {
 
 // Convert a wide Unicode string to an UTF8 string
 void win32_utf8_encode(const std::wstring & wstr, std::string & str) {
-	int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
-	std::string strTo(size_needed, 0);
-	WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
-	str = strTo;
+    int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
+    std::string strTo(size_needed, 0);
+    WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
+    str = strTo;
 }
 #endif
diff --git a/examples/embedding/README.md b/examples/embedding/README.md
@@ -1,3 +1,3 @@
-# embedding
-
-TODO
+# embedding
+
+TODO
diff --git a/examples/main/README.md b/examples/main/README.md
@@ -1,3 +1,3 @@
-# main
-
-TODO
+# main
+
+TODO
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -168,7 +168,7 @@ int main(int argc, char ** argv) {
     }
 
     // enable interactive mode if reverse prompt or interactive start is specified
-    if (params.antiprompt.size() != 0 || params.interactive_start) { 
+    if (params.antiprompt.size() != 0 || params.interactive_start) {
         params.interactive = true;
     }
 

diff --git a/examples/perplexity/README.md b/examples/perplexity/README.md
@@ -1,3 +1,3 @@
-# perplexity
-
-TODO
+# perplexity
+
+TODO
diff --git a/ggml.c b/ggml.c
@@ -127,9 +127,9 @@ typedef void* thread_ret_t;
 
 #ifdef GGML_USE_ACCELERATE
 #include <Accelerate/Accelerate.h>
-#elif GGML_USE_OPENBLAS
-#include <ggml_blas_adapter.c>
 #endif
+#include <ggml_blas_adapter.c>
+
 
 #undef MIN
 #undef MAX
@@ -228,12 +228,12 @@ static inline float fp32_from_bits(uint32_t w) {
 }
 
 static inline uint32_t fp32_to_bits(float f) {
-	union {
-		float as_value;
-		uint32_t as_bits;
-	} fp32;
-	fp32.as_value = f;
-	return fp32.as_bits;
+    union {
+        float as_value;
+        uint32_t as_bits;
+    } fp32;
+    fp32.as_value = f;
+    return fp32.as_bits;
 }
 
 static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
@@ -1881,7 +1881,7 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest
         sum1 += x1->d * y1->d * (vgetq_lane_s32(p_1, 0) + vgetq_lane_s32(p_1, 1) + vgetq_lane_s32(p_1, 2) + vgetq_lane_s32(p_1, 3));
 #endif
 #else
-	    const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0ls));
+        const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0ls));
         const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0ls), vget_high_s8(v1_0ls));
 
         const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hs), vget_low_s8 (v1_0hs));

diff --git a/ggml_blas_adapter.c b/ggml_blas_adapter.c
@@ -4,6 +4,7 @@
 //windows binaries for clblast obtained from https://github.com/CNugteren/CLBlast (apache license)
 //windows binaries for opencl obtained from https://github.com/KhronosGroup/OpenCL-SDK (apache license)
 
+#if GGML_USE_OPENBLAS
 #include <cblas.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -103,22 +104,17 @@ static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS
     clReleaseMemObject(cl_buffer_c);
 }
 
+#endif
 #endif
 
-static void do_blas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
-OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc)
-{
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
 #if GGML_USE_CLBLAST
-    ggml_cl_sgemm_wrapper(Order, TransA, TransB,
-                M, N, K,
-                alpha, A, lda,
-                B, ldb,
-                beta, C, ldc);
+#define do_blas_sgemm(Order, TransA, TransB,M, N, K,alpha, A, lda, B, ldb, beta, C, ldc) ({\
+ggml_cl_sgemm_wrapper(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);\
+})
 #else
-    cblas_sgemm(Order, TransA, TransB,
-                M, N, K,
-                alpha, A, lda,
-                B, ldb,
-                beta, C, ldc);
+#define do_blas_sgemm(Order, TransA, TransB,M, N, K,alpha, A, lda, B, ldb, beta, C, ldc) ({\
+cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);\
+})
 #endif
-}
+#endif
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -36,11 +36,14 @@ class generation_outputs(ctypes.Structure):
 handle = None
 use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
 use_clblast = False #uses CLBlast instead
+use_noavx2 = False #uses openblas with no avx2 instructions
 
 def init_library():
-    global handle, use_blas, use_clblast
+    global handle, use_blas, use_clblast, use_noavx2
     libname = ""
-    if use_blas:
+    if use_noavx2:
+        libname = "koboldcpp_openblas_noavx2.dll"
+    elif use_blas:
         libname = "koboldcpp_openblas.dll"
     elif use_clblast:
         libname = "koboldcpp_clblast.dll"
@@ -309,7 +312,7 @@ def stop(self):
             sys.exit(0)
 
 def main(args): 
-    global use_blas, use_clblast
+    global use_blas, use_clblast, use_noavx2
     if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
         print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
         use_blas = False
@@ -322,6 +325,14 @@ def main(args):
         else:
             print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.")
             use_clblast = True
+    elif args.noavx2:
+        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas_noavx2.dll")):
+            print("Warning: libopenblas.dll or koboldcpp_openblas_noavx2.dll not found. This mode cannot be used.")
+        elif os.name == 'nt':
+            print("Attempting to use non-avx2 compatibility openblas library.")
+            use_noavx2 = True
+        else:
+            print("Non-AVX2 compatibility OpenBLAS mode only available on windows. On other OS, please manually rebuild without AVX2 flags.")
     elif not args.noblas:
         print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")
         use_blas = True
@@ -409,8 +420,10 @@ def main(args):
     parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
     parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
     parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
-    parser.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
     parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
-    parser.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
+    compatgroup = parser.add_mutually_exclusive_group()
+    compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
+    compatgroup.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --noblas or --clblast.", action='store_true')
+    compatgroup.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
     args = parser.parse_args()
     main(args)
diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat
@@ -1 +1 @@
-pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
+pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
-Original file line number
+Diff line change
@@ -1,3 +1,3 @@
-    # main
-    TODO
+    # main
+    TODO
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
		pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"