diff --git a/Makefile b/Makefile index 789849ab2fe3f..b89eeaa5af688 100644 --- a/Makefile +++ b/Makefile @@ -149,6 +149,14 @@ ifdef LLAMA_CUDA_DMMV_Y else NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1 endif # LLAMA_CUDA_DMMV_Y +ifdef LLAMA_CUDA_DMMV_F16 + NVCCFLAGS += -DGGML_CUDA_DMMV_F16 +endif # LLAMA_CUDA_DMMV_F16 +ifdef LLAMA_CUDA_KQUANTS_ITER + NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) +else + NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 +endif ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h diff --git a/koboldcpp.py b/koboldcpp.py index dca7d129ea4d7..aa54262623c05 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -724,7 +724,7 @@ def main(args): sys.exit(2) if args.hordeconfig and args.hordeconfig[0]!="": - global friendlymodelname, maxhordelen, showdebug + global friendlymodelname, maxhordelen, maxhordectx, showdebug friendlymodelname = "koboldcpp/"+args.hordeconfig[0] if len(args.hordeconfig) > 1: maxhordelen = int(args.hordeconfig[1])