From b1f00fa9ccdaec045636318fa5548547b07c248c Mon Sep 17 00:00:00 2001 From: Ycros <18012+ycros@users.noreply.github.com> Date: Thu, 22 Jun 2023 01:01:46 +1000 Subject: [PATCH] Fix hordeconfig max context setting, and add Makefile flags for cuda F16/KQuants per iter. (#252) * Fix hordeconfig maxcontext setting. * cuda: Bring DMMV_F16 and KQUANTS_ITER Makefile flags over from llama. --- Makefile | 8 ++++++++ koboldcpp.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 789849ab2fe3f..b89eeaa5af688 100644 --- a/Makefile +++ b/Makefile @@ -149,6 +149,14 @@ ifdef LLAMA_CUDA_DMMV_Y else NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1 endif # LLAMA_CUDA_DMMV_Y +ifdef LLAMA_CUDA_DMMV_F16 + NVCCFLAGS += -DGGML_CUDA_DMMV_F16 +endif # LLAMA_CUDA_DMMV_F16 +ifdef LLAMA_CUDA_KQUANTS_ITER + NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) +else + NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 +endif ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h diff --git a/koboldcpp.py b/koboldcpp.py index dca7d129ea4d7..aa54262623c05 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -724,7 +724,7 @@ def main(args): sys.exit(2) if args.hordeconfig and args.hordeconfig[0]!="": - global friendlymodelname, maxhordelen, showdebug + global friendlymodelname, maxhordelen, maxhordectx, showdebug friendlymodelname = "koboldcpp/"+args.hordeconfig[0] if len(args.hordeconfig) > 1: maxhordelen = int(args.hordeconfig[1])