diff --git a/.gitignore b/.gitignore index 3fcc055cddae5..043cbe7fbfebb 100644 --- a/.gitignore +++ b/.gitignore @@ -81,12 +81,12 @@ tests/test-tokenizer-0 koboldcpp.so koboldcpp_failsafe.so koboldcpp_openblas.so -koboldcpp_openblas_noavx2.so +koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp.dll koboldcpp_failsafe.dll koboldcpp_openblas.dll -koboldcpp_openblas_noavx2.dll +koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll cublas64_11.dll diff --git a/Makefile b/Makefile index f2386457f942a..f13811a56e370 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast koboldcpp_cublas +default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_cublas tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt dev: koboldcpp_openblas dev2: koboldcpp_clblast @@ -213,7 +213,7 @@ endif DEFAULT_BUILD = FAILSAFE_BUILD = OPENBLAS_BUILD = -OPENBLAS_NOAVX2_BUILD = +NOAVX2_BUILD = CLBLAST_BUILD = CUBLAS_BUILD = @@ -221,7 +221,7 @@ ifeq ($(OS),Windows_NT) DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) - OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS) + NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS) ifdef LLAMA_CUBLAS @@ -233,7 +233,7 @@ else FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS) ifdef LLAMA_OPENBLAS OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) - OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) + NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) endif ifdef LLAMA_CLBLAST ifeq ($(UNAME_S),Darwin) @@ -283,8 +283,8 @@ ggml_openblas.o: ggml.c ggml.h $(CC) $(CFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_failsafe.o: ggml.c ggml.h $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ -ggml_openblas_noavx2.o: ggml.c ggml.h - $(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ +ggml_noavx2.o: ggml.c ggml.h + $(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@ ggml_clblast.o: ggml.c ggml.h $(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_cublas.o: ggml.c ggml.h @@ -305,8 +305,8 @@ ggml_v2_openblas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(CFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ ggml_v2_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@ -ggml_v2_openblas_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h - $(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ +ggml_v2_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h + $(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h @@ -348,7 +348,7 @@ gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ clean: - rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so + rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so main: examples/main/main.cpp build-info.h ggml.o k_quants.o llama.o common.o grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @@ -363,8 +363,8 @@ koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common $(OPENBLAS_BUILD) koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o $(OBJS) $(FAILSAFE_BUILD) -koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o $(OBJS) - $(OPENBLAS_NOAVX2_BUILD) +koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o $(OBJS) + $(NOAVX2_BUILD) koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS) $(CLBLAST_BUILD) koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o $(CUBLAS_OBJS) $(OBJS) diff --git a/koboldcpp.py b/koboldcpp.py index 15507e7e0b3c3..588c944dd1565 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -89,29 +89,30 @@ def pick_existant_file(ntoption,nonntoption): lib_default = pick_existant_file("koboldcpp.dll","koboldcpp.so") lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.so") lib_openblas = pick_existant_file("koboldcpp_openblas.dll","koboldcpp_openblas.so") -lib_openblas_noavx2 = pick_existant_file("koboldcpp_openblas_noavx2.dll","koboldcpp_openblas_noavx2.so") +lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so") lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so") lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so") def init_library(): global handle - global lib_default,lib_failsafe,lib_openblas,lib_openblas_noavx2,lib_clblast,lib_cublas + global lib_default,lib_failsafe,lib_openblas,lib_noavx2,lib_clblast,lib_cublas libname = "" - use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir. + use_openblas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir. use_clblast = False #uses CLBlast instead use_cublas = False #uses cublas instead - use_noavx2 = False #uses openblas with no avx2 instructions + use_noavx2 = False #uses no avx2 instructions + use_failsafe = False #uses no intrinsics, failsafe mode if args.noavx2: use_noavx2 = True - if not file_exists(lib_openblas_noavx2) or (os.name=='nt' and not file_exists("libopenblas.dll")): - print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.") - elif args.noblas: + if not file_exists(lib_noavx2): + print("Warning: NoAVX2 library file not found. Failsafe library will be used.") + elif (args.noblas and args.nommap): + use_failsafe = True print("!!! Attempting to use FAILSAFE MODE !!!") else: - use_blas = True - print("Attempting to use non-avx2 compatibility library with OpenBLAS. A compatible libopenblas will be required.") + print("Attempting to use non-avx2 compatibility library.") elif args.useclblast: if not file_exists(lib_clblast) or (os.name=='nt' and not file_exists("clblast.dll")): print("Warning: CLBlast library file not found. Non-BLAS library will be used.") @@ -130,22 +131,22 @@ def init_library(): elif args.noblas: print("Attempting to library without OpenBLAS.") else: - use_blas = True + use_openblas = True print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.") if sys.platform=="darwin": print("Mac OSX note: Some people have found Accelerate actually faster than OpenBLAS. To compare, run Koboldcpp with --noblas instead.") if use_noavx2: - if use_blas: - libname = lib_openblas_noavx2 - else: + if use_failsafe: libname = lib_failsafe + else: + libname = lib_noavx2 else: if use_clblast: libname = lib_clblast elif use_cublas: libname = lib_cublas - elif use_blas: + elif use_openblas: libname = lib_openblas else: libname = lib_default @@ -1326,7 +1327,6 @@ def onDropdownChange(event): args.noavx2 = True args.noblas = True args.nommap = True - print("[Failsafe Mode : mmap is disabled.]") if selblaschoice==blasbatchopts[0]: args.blasbatchsize = -1 diff --git a/llama-util.h b/llama-util.h index e1e7fd748c072..7b80ddfc27af0 100644 --- a/llama-util.h +++ b/llama-util.h @@ -247,7 +247,7 @@ struct llama_mmap { #pragma message("warning: You are building for pre-Windows 8; prefetch not supported") #endif // _WIN32_WINNT >= _WIN32_WINNT_WIN8 #else - printf("\nPrefetchVirtualMemory skipped in failsafe mode."); + printf("\nPrefetchVirtualMemory skipped in compatibility mode.\n"); #endif } diff --git a/make_old_pyinstaller.bat b/make_old_pyinstaller.bat index e5d1113f8f4e5..c72a2ce10fa42 100644 --- a/make_old_pyinstaller.bat +++ b/make_old_pyinstaller.bat @@ -1,4 +1,4 @@ echo This file is only for my own usage, please do not use it. I am lazy. set PATH=d:\\MainApplications\\KoboldAIGPT\\KoboldAI-Horde-Bridge\\python;d:\\MainApplications\\KoboldAIGPT\\KoboldAI-Horde-Bridge\\python\\Scripts;%PATH% -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file diff --git a/make_old_pyinstaller_cuda.bat b/make_old_pyinstaller_cuda.bat index 004b88bbfe276..dfe2845716149 100644 --- a/make_old_pyinstaller_cuda.bat +++ b/make_old_pyinstaller_cuda.bat @@ -1,4 +1,4 @@ echo This file is only for my own usage, please do not use it. I am lazy. set PATH=d:\\MainApplications\\KoboldAIGPT\\KoboldAI-Horde-Bridge\\python;d:\\MainApplications\\KoboldAIGPT\\KoboldAI-Horde-Bridge\\python\\Scripts;%PATH% -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --icon "./nikogreen.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --icon "./nikogreen.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index 12be110f88258..3cf867b28c3fe 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1 +1 @@ -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/make_pyinstaller.sh b/make_pyinstaller.sh index 155a1cd82bcda..270850663ee79 100644 --- a/make_pyinstaller.sh +++ b/make_pyinstaller.sh @@ -5,7 +5,7 @@ pyinstaller --noconfirm --onefile --clean --console --collect-all customtkinter --add-data "./koboldcpp.so:." \ --add-data "./koboldcpp_openblas.so:." \ --add-data "./koboldcpp_failsafe.so:." \ ---add-data "./koboldcpp_openblas_noavx2.so:." \ +--add-data "./koboldcpp_noavx2.so:." \ --add-data "./koboldcpp_clblast.so:." \ --add-data "./rwkv_vocab.embd:." \ --add-data "./rwkv_world_vocab.embd:." \ diff --git a/make_pyinstaller_hybrid_henk.bat b/make_pyinstaller_hybrid_henk.bat index be25d2ebf6deb..a99b1569dcdd7 100644 --- a/make_pyinstaller_hybrid_henk.bat +++ b/make_pyinstaller_hybrid_henk.bat @@ -2,4 +2,4 @@ cd /d "%~dp0" copy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4\bin\cudart64_110.dll" .\ /Y copy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4\bin\cublasLt64_11.dll" .\ /Y copy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4\bin\cublas64_11.dll" .\ /Y -PyInstaller --noconfirm --onefile --collect-all customtkinter --clean --console --icon ".\niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cudart64_110.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cublas64_11.dll;." --add-data "./rwkv_vocab.embd;." --add-data "C:/Windows/System32/msvcp140.dll;." --add-data "C:/Windows/System32/vcruntime140_1.dll;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --collect-all customtkinter --clean --console --icon ".\niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cudart64_110.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cublas64_11.dll;." --add-data "./rwkv_vocab.embd;." --add-data "C:/Windows/System32/msvcp140.dll;." --add-data "C:/Windows/System32/vcruntime140_1.dll;." "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/otherarch/llama_v2-util.h b/otherarch/llama_v2-util.h index 00aedf8e64ecd..63942a007301b 100644 --- a/otherarch/llama_v2-util.h +++ b/otherarch/llama_v2-util.h @@ -225,7 +225,7 @@ struct llama_v2_mmap { #pragma message("warning: You are building for pre-Windows 8; prefetch not supported") #endif // _WIN32_WINNT >= _WIN32_WINNT_WIN8 #else - printf("\nPrefetchVirtualMemory skipped in failsafe mode."); + printf("\nPrefetchVirtualMemory skipped in compatibility mode.\n"); #endif }