Skip to content

Commit

Permalink
clean and refactor handling of flags
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Apr 12, 2023
2 parents 636f8e5 + f76cb3a commit 1bd5992
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 245 deletions.
22 changes: 15 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,16 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif

OPENBLAS_BUILD =
CLBLAST_BUILD =
OPENBLAS_NOAVX2_BUILD =
OPENBLAS_BUILD =
CLBLAST_BUILD =
NOAVX2_BUILD =
OPENBLAS_NOAVX2_BUILD =

ifeq ($(OS),Windows_NT)
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS)
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas_noavx2.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas_noavx2.dll $(LDFLAGS)
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas_noavx2.dll $(LDFLAGS)
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o -shared -o koboldcpp_noavx2.dll $(LDFLAGS)
else
ifndef LLAMA_OPENBLAS
ifndef LLAMA_CLBLAST
Expand All @@ -154,7 +156,7 @@ $(info I CC: $(CCV))
$(info I CXX: $(CXXV))
$(info )

default: llamalib llamalib_openblas llamalib_openblas_noavx2 llamalib_clblast
default: llamalib llamalib_noavx2 llamalib_openblas llamalib_openblas_noavx2 llamalib_clblast

#
# Build library
Expand All @@ -166,6 +168,9 @@ ggml.o: ggml.c ggml.h
ggml_openblas.o: ggml.c ggml.h
$(CC) $(CFLAGS) $(BONUSCFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o

ggml_noavx2.o: ggml.c ggml.h
$(CC) $(CFLAGS) -c ggml.c -o ggml_noavx2.o

ggml_openblas_noavx2.o: ggml.c ggml.h
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas_noavx2.o

Expand All @@ -176,7 +181,7 @@ ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
$(CC) $(CFLAGS) $(BONUSCFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o

ggml_v1_noavx2.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
$(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
$(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1_noavx2.o

llama.o: llama.cpp llama.h llama_internal.h
$(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o
Expand All @@ -194,7 +199,7 @@ gpttype_adapter.o:
$(CXX) $(CXXFLAGS) -c gpttype_adapter.cpp -o gpttype_adapter.o

clean:
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll gptj.exe gpt2.exe
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize-stats perplexity embedding main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll gptj.exe gpt2.exe

main: examples/main/main.cpp ggml.o llama.o common.o
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
Expand All @@ -208,6 +213,9 @@ llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(OPENBLAS_BUILD)

llamalib_noavx2: ggml_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(NOAVX2_BUILD)

llamalib_openblas_noavx2: ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(OPENBLAS_NOAVX2_BUILD)

Expand Down
12 changes: 9 additions & 3 deletions expose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,15 @@ extern "C"
std::string model = inputs.model_filename;
file_format = check_file_format(model.c_str());

//first digit is platform, second is devices
int platform = inputs.clblast_info/10;
int devices = inputs.clblast_info%10;
//first digit is whether configured, second is platform, third is devices
int parseinfo = inputs.clblast_info;

std::string usingclblast = "KCPP_CLBLAST_CONFIGURED="+std::to_string(parseinfo>0?1:0);
putenv((char*)usingclblast.c_str());

parseinfo = parseinfo%100; //keep last 2 digits
int platform = parseinfo/10;
int devices = parseinfo%10;
std::string platformenv = "KCPP_CLBLAST_PLATFORM="+std::to_string(platform);
std::string deviceenv = "KCPP_CLBLAST_DEVICES="+std::to_string(devices);
putenv((char*)platformenv.c_str());
Expand Down
60 changes: 35 additions & 25 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,17 @@ def init_library():
global handle, use_blas, use_clblast, use_noavx2
libname = ""
if use_noavx2:
libname = "koboldcpp_openblas_noavx2.dll"
elif use_blas:
libname = "koboldcpp_openblas.dll"
elif use_clblast:
libname = "koboldcpp_clblast.dll"
if use_blas:
libname = "koboldcpp_openblas_noavx2.dll"
else:
libname = "koboldcpp_noavx2.dll"
else:
libname = "koboldcpp.dll"
if use_clblast:
libname = "koboldcpp_clblast.dll"
elif use_blas:
libname = "koboldcpp_openblas.dll"
else:
libname = "koboldcpp.dll"

print("Initializing dynamic library: " + libname)
dir_path = os.path.dirname(os.path.realpath(__file__))
Expand All @@ -72,7 +76,7 @@ def load_model(model_filename,batch_size=8,max_context_length=512,n_parts_overwr
inputs.use_mmap = use_mmap
clblastids = 0
if args.useclblast:
clblastids = int(args.useclblast[0])*10 + int(args.useclblast[1])
clblastids = 100 + int(args.useclblast[0])*10 + int(args.useclblast[1])
inputs.clblast_info = clblastids
ret = handle.load_model(inputs)
return ret
Expand Down Expand Up @@ -313,30 +317,36 @@ def stop(self):

def main(args):
global use_blas, use_clblast, use_noavx2
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
use_blas = False
elif os.name != 'nt':
print("Prebuilt OpenBLAS binaries only available for windows. Please manually build/link libopenblas from makefile with LLAMA_OPENBLAS=1")
use_blas = False
use_blas = False
use_clblast = False
use_noavx2 = False

if os.name != 'nt':
print("You are not on Windows. Default koboldcpp.dll library file will be used. Remember to manually link with OpenBLAS using LLAMA_OPENBLAS=1, or CLBlast with LLAMA_CLBLAST=1 if you want to use them.")
elif args.noavx2:
use_noavx2 = True
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas_noavx2.dll")):
print("Warning: libopenblas.dll or koboldcpp_openblas_noavx2.dll not found. Non-BLAS library will be used.")
elif args.noblas:
print("Attempting to use non-avx2 compatibility library without OpenBLAS.")
else:
use_blas = True
print("Attempting to use non-avx2 compatibility library with OpenBLAS.")
elif args.useclblast:
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_clblast.dll")):
print("Warning: clblast.dll or koboldcpp_clblast.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with CLBlast.")
else:
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.")
use_clblast = True
elif args.noavx2:
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas_noavx2.dll")):
print("Warning: libopenblas.dll or koboldcpp_openblas_noavx2.dll not found. This mode cannot be used.")
elif os.name == 'nt':
print("Attempting to use non-avx2 compatibility openblas library.")
use_noavx2 = True
else:
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used.")
elif args.noblas:
print("Attempting to library without OpenBLAS.")
else:
print("Non-AVX2 compatibility OpenBLAS mode only available on windows. On other OS, please manually rebuild without AVX2 flags.")
elif not args.noblas:
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")
use_blas = True

use_blas = True
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")

if args.psutil_set_threads:
import psutil
args.threads = psutil.cpu_count(logical=False)
Expand Down Expand Up @@ -421,9 +431,9 @@ def main(args):
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
parser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --clblast.", action='store_true')
compatgroup = parser.add_mutually_exclusive_group()
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
compatgroup.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --noblas or --clblast.", action='store_true')
compatgroup.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
args = parser.parse_args()
main(args)
2 changes: 1 addition & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1949,4 +1949,4 @@ const char * llama_print_system_info(void) {
// For internal test use
std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
return ctx->model.tensors_by_name;
}
}
8 changes: 7 additions & 1 deletion llama_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
//No dynamic memory allocation! Setup structs with FIXED (known) shapes and sizes for ALL output fields
//Python will ALWAYS provide the memory, we just write to it.

// Defines sigaction on msys:
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#include <time.h>
#include "./examples/main/main.cpp"
#include "ggml.h"
Expand Down Expand Up @@ -38,11 +43,12 @@ bool llama_load_model(const load_model_inputs inputs, FileFormat in_file_format)
modelname = inputs.model_filename;

ctx_params.n_ctx = inputs.max_context_length;
ctx_params.n_parts = inputs.n_parts_overwrite;
ctx_params.n_parts = -1;//inputs.n_parts_overwrite;
ctx_params.seed = -1;
ctx_params.f16_kv = inputs.f16_kv;
ctx_params.logits_all = false;
ctx_params.use_mmap = inputs.use_mmap;
ctx_params.use_mlock = false;

file_format = in_file_format;

Expand Down
16 changes: 0 additions & 16 deletions llamaextra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,6 @@
#include "llamaextra.h"
#include "llama.cpp"

#include <cassert>
#include <cstring>
#include <fstream>
#include <regex>
#include <iostream>
#include <iterator>
#include <queue>
#include <string>
#include <math.h>

#if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW
#elif !defined(__FreeBSD__) && !defined(__NetBSD__)
#include <alloca.h>
#endif


// TODO: Calculate this constant from the vocabulary
#define MAX_TOKEN_LEN 18
Expand Down
2 changes: 0 additions & 2 deletions llamaextra.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,4 @@
#include "llama.h"
#include "ggml.h"



std::vector<llama_token> legacy_llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos);
2 changes: 1 addition & 1 deletion make_pyinstaller.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"
Loading

0 comments on commit 1bd5992

Please sign in to comment.