clean and refactor handling of flags

YellowRoseCx · Apr 12, 2023 · 1bd5992 · 1bd5992
2 parents 636f8e5 + f76cb3a
commit 1bd5992
Show file tree

Hide file tree

Showing 10 changed files with 106 additions and 245 deletions.
diff --git a/Makefile b/Makefile
@@ -123,14 +123,16 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
 	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
 endif
 
-OPENBLAS_BUILD = 
-CLBLAST_BUILD = 
-OPENBLAS_NOAVX2_BUILD = 
+OPENBLAS_BUILD =
+CLBLAST_BUILD =
+NOAVX2_BUILD = 
+OPENBLAS_NOAVX2_BUILD =
 
 ifeq ($(OS),Windows_NT)
 	OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS)
 	CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS)
-	OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas_noavx2.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas_noavx2.dll $(LDFLAGS)
+	OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas_noavx2.dll $(LDFLAGS)
+	NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o -shared -o koboldcpp_noavx2.dll $(LDFLAGS)
 else
 	ifndef LLAMA_OPENBLAS
 	ifndef LLAMA_CLBLAST
@@ -154,7 +156,7 @@ $(info I CC:       $(CCV))
 $(info I CXX:      $(CXXV))
 $(info )
 
-default: llamalib llamalib_openblas llamalib_openblas_noavx2 llamalib_clblast
+default: llamalib llamalib_noavx2 llamalib_openblas llamalib_openblas_noavx2 llamalib_clblast
 
 #
 # Build library
@@ -166,6 +168,9 @@ ggml.o: ggml.c ggml.h
 ggml_openblas.o: ggml.c ggml.h
 	$(CC)  $(CFLAGS) $(BONUSCFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o
 
+ggml_noavx2.o: ggml.c ggml.h
+	$(CC)  $(CFLAGS) -c ggml.c -o ggml_noavx2.o
+
 ggml_openblas_noavx2.o: ggml.c ggml.h
 	$(CC)  $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas_noavx2.o
 
@@ -176,7 +181,7 @@ ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
 	$(CC)  $(CFLAGS) $(BONUSCFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
 
 ggml_v1_noavx2.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
-	$(CC)  $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
+	$(CC)  $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1_noavx2.o
 
 llama.o: llama.cpp llama.h llama_internal.h
 	$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
@@ -194,7 +199,7 @@ gpttype_adapter.o:
 	$(CXX) $(CXXFLAGS) -c gpttype_adapter.cpp -o gpttype_adapter.o
 
 clean:
-	rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll gptj.exe gpt2.exe
+	rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll gptj.exe gpt2.exe
 
 main: examples/main/main.cpp ggml.o llama.o common.o
 	$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
@@ -208,6 +213,9 @@ llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
 llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o 
 	$(OPENBLAS_BUILD)
 
+llamalib_noavx2: ggml_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o 
+	$(NOAVX2_BUILD)
+
 llamalib_openblas_noavx2: ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o 
 	$(OPENBLAS_NOAVX2_BUILD)
 

diff --git a/expose.cpp b/expose.cpp
@@ -31,9 +31,15 @@ extern "C"
         std::string model = inputs.model_filename;
         file_format = check_file_format(model.c_str());
 
-        //first digit is platform, second is devices
-        int platform = inputs.clblast_info/10;
-        int devices = inputs.clblast_info%10;
+        //first digit is whether configured, second is platform, third is devices
+        int parseinfo = inputs.clblast_info;
+
+        std::string usingclblast = "KCPP_CLBLAST_CONFIGURED="+std::to_string(parseinfo>0?1:0);
+        putenv((char*)usingclblast.c_str());
+
+        parseinfo = parseinfo%100; //keep last 2 digits      
+        int platform = parseinfo/10;
+        int devices = parseinfo%10;
         std::string platformenv = "KCPP_CLBLAST_PLATFORM="+std::to_string(platform);
         std::string deviceenv = "KCPP_CLBLAST_DEVICES="+std::to_string(devices);
         putenv((char*)platformenv.c_str());

diff --git a/koboldcpp.py b/koboldcpp.py
@@ -42,13 +42,17 @@ def init_library():
     global handle, use_blas, use_clblast, use_noavx2
     libname = ""
     if use_noavx2:
-        libname = "koboldcpp_openblas_noavx2.dll"
-    elif use_blas:
-        libname = "koboldcpp_openblas.dll"
-    elif use_clblast:
-        libname = "koboldcpp_clblast.dll"
+        if use_blas:
+            libname = "koboldcpp_openblas_noavx2.dll"
+        else:
+            libname = "koboldcpp_noavx2.dll"
     else:
-        libname = "koboldcpp.dll"
+        if use_clblast:
+            libname = "koboldcpp_clblast.dll"
+        elif use_blas:
+            libname = "koboldcpp_openblas.dll"
+        else:
+            libname = "koboldcpp.dll"
 
     print("Initializing dynamic library: " + libname)
     dir_path = os.path.dirname(os.path.realpath(__file__))  
@@ -72,7 +76,7 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
     inputs.use_mmap = use_mmap
     clblastids = 0
     if args.useclblast:
-        clblastids = int(args.useclblast[0])*10 + int(args.useclblast[1])
+        clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1])
     inputs.clblast_info = clblastids
     ret = handle.load_model(inputs)
     return ret
@@ -313,30 +317,36 @@ def stop(self):
 
 def main(args): 
     global use_blas, use_clblast, use_noavx2
-    if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
-        print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
-        use_blas = False
-    elif os.name != 'nt':
-        print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1")
-        use_blas = False
+    use_blas = False 
+    use_clblast = False 
+    use_noavx2 = False 
+
+    if os.name != 'nt':
+        print("You are not on Windows. Default koboldcpp.dll library file will be used. Remember to manually link with OpenBLAS using LLAMA_OPENBLAS=1, or CLBlast with LLAMA_CLBLAST=1 if you want to use them.")
+    elif args.noavx2:
+        use_noavx2 = True
+        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas_noavx2.dll")):
+            print("Warning: libopenblas.dll or koboldcpp_openblas_noavx2.dll not found. Non-BLAS library will be used.")     
+        elif args.noblas:            
+            print("Attempting to use non-avx2 compatibility library without OpenBLAS.")
+        else:
+            use_blas = True
+            print("Attempting to use non-avx2 compatibility library with OpenBLAS.")
     elif args.useclblast:
         if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_clblast.dll")):
             print("Warning: clblast.dll or koboldcpp_clblast.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with CLBlast.")
         else:
             print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.")
             use_clblast = True
-    elif args.noavx2:
-        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas_noavx2.dll")):
-            print("Warning: libopenblas.dll or koboldcpp_openblas_noavx2.dll not found. This mode cannot be used.")
-        elif os.name == 'nt':
-            print("Attempting to use non-avx2 compatibility openblas library.")
-            use_noavx2 = True
+    else:
+        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
+            print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used.")
+        elif args.noblas:
+            print("Attempting to library without OpenBLAS.")
         else:
-            print("Non-AVX2 compatibility OpenBLAS mode only available on windows. On other OS, please manually rebuild without AVX2 flags.")
-    elif not args.noblas:
-        print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")
-        use_blas = True
-
+            use_blas = True
+            print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")
+
     if args.psutil_set_threads:
         import psutil
         args.threads = psutil.cpu_count(logical=False)
@@ -421,9 +431,9 @@ def main(args):
     parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
     parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
     parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
+    parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
     compatgroup = parser.add_mutually_exclusive_group()
     compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
-    compatgroup.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --noblas or --clblast.", action='store_true')
     compatgroup.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
     args = parser.parse_args()
     main(args)
diff --git a/llama.cpp b/llama.cpp
@@ -1949,4 +1949,4 @@ const char * llama_print_system_info(void) {
 // For internal test use
 std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
     return ctx->model.tensors_by_name;
-}
+}
diff --git a/llama_adapter.cpp b/llama_adapter.cpp
@@ -7,6 +7,11 @@
 //No dynamic memory allocation! Setup structs with FIXED (known) shapes and sizes for ALL output fields
 //Python will ALWAYS provide the memory, we just write to it.
 
+// Defines sigaction on msys:
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
 #include <time.h>
 #include "./examples/main/main.cpp"
 #include "ggml.h"
@@ -38,11 +43,12 @@ bool llama_load_model(const load_model_inputs inputs, FileFormat in_file_format)
     modelname = inputs.model_filename;
 
     ctx_params.n_ctx = inputs.max_context_length;
-    ctx_params.n_parts = inputs.n_parts_overwrite;
+    ctx_params.n_parts = -1;//inputs.n_parts_overwrite;
     ctx_params.seed = -1;
     ctx_params.f16_kv = inputs.f16_kv;
     ctx_params.logits_all = false;
     ctx_params.use_mmap = inputs.use_mmap;
+    ctx_params.use_mlock = false;
 
     file_format = in_file_format;
 

diff --git a/llamaextra.cpp b/llamaextra.cpp
@@ -2,22 +2,6 @@
 #include "llamaextra.h"
 #include "llama.cpp"
 
-#include <cassert>
-#include <cstring>
-#include <fstream>
-#include <regex>
-#include <iostream>
-#include <iterator>
-#include <queue>
-#include <string>
-#include <math.h>
-
- #if defined(_MSC_VER) || defined(__MINGW32__)
- #include <malloc.h> // using malloc.h with MSC/MINGW
- #elif !defined(__FreeBSD__) && !defined(__NetBSD__)
- #include <alloca.h>
- #endif
-
 
 // TODO: Calculate this constant from the vocabulary
 #define MAX_TOKEN_LEN 18

diff --git a/llamaextra.h b/llamaextra.h
@@ -15,6 +15,4 @@
 #include "llama.h"
 #include "ggml.h"
 
-
-
 std::vector<llama_token> legacy_llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat
@@ -1 +1 @@
-pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
+pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,6 +15,4 @@
		#include "llama.h"
		#include "ggml.h"



		std::vector<llama_token> legacy_llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
		pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"