Skip to content

Commit

Permalink
removed openblas backend, merged into CPU (with llamafile for BLAS). …
Browse files Browse the repository at this point in the history
…GPU backend is now automatically selected when running from CLI unless noblas is specified.
  • Loading branch information
LostRuins committed Sep 15, 2024
1 parent 01c7d82 commit 53bf0fb
Show file tree
Hide file tree
Showing 14 changed files with 115 additions and 159 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ tests/test-tokenizer-1-bpe

/koboldcpp_default.so
/koboldcpp_failsafe.so
/koboldcpp_openblas.so
/koboldcpp_noavx2.so
/koboldcpp_clblast.so
/koboldcpp_clblast_noavx2.so
Expand All @@ -116,7 +115,6 @@ tests/test-tokenizer-1-bpe
/koboldcpp_vulkan_noavx2.so
/koboldcpp_default.dll
/koboldcpp_failsafe.dll
/koboldcpp_openblas.dll
/koboldcpp_noavx2.dll
/koboldcpp_clblast.dll
/koboldcpp_clblast_noavx2.dll
Expand Down
38 changes: 7 additions & 31 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Add custom options to Makefile.local rather than editing this file.
-include $(abspath $(lastword ${MAKEFILE_LIST})).local

default: koboldcpp_default koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2
default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2
tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip whispermain sdmain gguf-split
dev: koboldcpp_openblas
dev: koboldcpp_default
dev2: koboldcpp_clblast
dev3: koboldcpp_vulkan

Expand Down Expand Up @@ -75,7 +75,6 @@ SIMPLECFLAGS =
FULLCFLAGS =
NONECFLAGS =

OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -DGGML_USE_BLAS -I/usr/local/include/openblas
CLBLAST_FLAGS = -DGGML_USE_CLBLAST
FAILSAFE_FLAGS = -DUSE_FAILSAFE
VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN
Expand Down Expand Up @@ -336,7 +335,6 @@ endif

DEFAULT_BUILD =
FAILSAFE_BUILD =
OPENBLAS_BUILD =
NOAVX2_BUILD =
CLBLAST_BUILD =
CUBLAS_BUILD =
Expand All @@ -346,7 +344,6 @@ VULKAN_BUILD =
ifeq ($(OS),Windows_NT)
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ lib/vulkan-1.lib -shared -o $@.dll $(LDFLAGS)
Expand All @@ -364,14 +361,11 @@ else
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
endif

ifdef LLAMA_OPENBLAS
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
endif
ifdef LLAMA_CLBLAST
ifeq ($(UNAME_S),Darwin)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
else
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -shared -o $@.so $(LDFLAGS)
endif
endif
ifdef LLAMA_CUBLAS
Expand All @@ -384,13 +378,11 @@ else
VULKAN_BUILD = $(CXX) $(CXXFLAGS) $^ -lvulkan -shared -o $@.so $(LDFLAGS)
endif

ifndef LLAMA_OPENBLAS
ifndef LLAMA_CLBLAST
ifndef LLAMA_CUBLAS
ifndef LLAMA_HIPBLAS
ifndef LLAMA_VULKAN
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
endif
VULKAN_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_VULKAN=1 to compile with Vulkan support. This is just a reminder, not an error.'
endif
endif
endif
Expand Down Expand Up @@ -421,8 +413,6 @@ $(info )

ggml.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml_v4_openblas.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_v4_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
ggml_v4_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
Expand Down Expand Up @@ -480,15 +470,13 @@ llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h
llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@

#this is only used for openblas and accelerate
#this is only used for accelerate
ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h
$(CXX) $(CXXFLAGS) -c $< -o $@

#version 3 libs
ggml_v3.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml_v3_openblas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_v3_failsafe.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
ggml_v3_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
Expand All @@ -503,8 +491,6 @@ ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
#version 2 libs
ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml_v2_openblas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
ggml_v2_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
ggml_v2_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
Expand Down Expand Up @@ -569,8 +555,6 @@ gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@
gpttype_adapter.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) -c $< -o $@
gpttype_adapter_openblas.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
gpttype_adapter_clblast.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
gpttype_adapter_cublas.o: $(GPTTYPE_ADAPTER)
Expand All @@ -583,7 +567,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@

clean:
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
rm -vrf ggml/src/ggml-cuda/*.o
rm -vrf ggml/src/ggml-cuda/template-instances/*.o

Expand Down Expand Up @@ -612,14 +596,6 @@ vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
koboldcpp_default: ggml.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS)
$(DEFAULT_BUILD)

ifdef OPENBLAS_BUILD
koboldcpp_openblas: ggml_v4_openblas.o ggml_v3_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o gpttype_adapter_openblas.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-blas.o $(OBJS_FULL) $(OBJS)
$(OPENBLAS_BUILD)
else
koboldcpp_openblas:
$(DONOTHING)
endif

ifdef FAILSAFE_BUILD
koboldcpp_failsafe: ggml_v4_failsafe.o ggml_v3_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o gpttype_adapter_failsafe.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FAILSAFE) $(OBJS)
$(FAILSAFE_BUILD)
Expand Down
22 changes: 11 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
- You can also run it using the command line. For info, please check `koboldcpp.exe --help`

## Linux Usage (Precompiled Binary, Recommended)
On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary.
On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first).

Alternatively, you can also install koboldcpp to the current directory by running the following terminal command:
```
Expand All @@ -24,7 +24,8 @@ curl -fLo koboldcpp https://github.com/LostRuins/koboldcpp/releases/latest/downl
After running this command you can launch Koboldcpp from the current directory using `./koboldcpp` in the terminal (for CLI usage, run with `--help`).

## MacOS (Precompiled Binary)
- PyInstaller binaries for Modern ARM64 MacOS (M1, M2, M3) are now available! **[Simply download and run the MacOS binary](https://github.com/LostRuins/koboldcpp/releases/latest)**
- PyInstaller binaries for Modern ARM64 MacOS (M1, M2, M3) are now available! **[Simply download the MacOS binary](https://github.com/LostRuins/koboldcpp/releases/latest)**
- In a MacOS terminal window, set the file to executable `chmod +x koboldcpp-mac-arm64` and run it with `./koboldcpp-mac-arm64`.
- Alternatively, or for older x86 MacOS computers, you can clone the repo and compile from source code, see Compiling for MacOS below.

## Run on Colab
Expand Down Expand Up @@ -70,13 +71,13 @@ when you can't use the precompiled binary directly, we provide an automated buil
### Compiling on Linux (Manual Method)
- To compile your binaries from source, clone the repo with `git clone https://github.com/LostRuins/koboldcpp.git`
- A makefile is provided, simply run `make`.
- Optional OpenBLAS: Link your own install of OpenBLAS manually with `make LLAMA_OPENBLAS=1`
- Optional Vulkan: Link your own install of Vulkan SDK manually with `make LLAMA_VULKAN=1`
- Optional CLBlast: Link your own install of CLBlast manually with `make LLAMA_CLBLAST=1`
- Note: for these you will need to obtain and link OpenCL and CLBlast libraries.
- For Arch Linux: Install `cblas` `openblas` and `clblast`.
- For Debian: Install `libclblast-dev` and `libopenblas-dev`.
- For Arch Linux: Install `cblas` and `clblast`.
- For Debian: Install `libclblast-dev`.
- You can attempt a CuBLAS build with `LLAMA_CUBLAS=1`, (or `LLAMA_HIPBLAS=1` for AMD). You will need CUDA Toolkit installed. Some have also reported success with the CMake file, though that is more for windows.
- For a full featured build (all backends), do `make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio)
- For a full featured build (all backends), do `make LLAMA_CLBLAST=1 LLAMA_CUBLAS=1 LLAMA_VULKAN=1`. (Note that `LLAMA_CUBLAS=1` will not work on windows, you need visual studio)
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.gguf] [port]`

### Compiling on Windows
Expand All @@ -87,12 +88,11 @@ when you can't use the precompiled binary directly, we provide an automated buil
- If you want to generate the .exe file, make sure you have the python module PyInstaller installed with pip (`pip install PyInstaller`). Then run the script `make_pyinstaller.bat`
- The koboldcpp.exe file will be at your dist folder.
- **Building with CUDA**: Visual Studio, CMake and CUDA Toolkit is required. Clone the repo, then open the CMake file and compile it in Visual Studio. Copy the `koboldcpp_cublas.dll` generated into the same directory as the `koboldcpp.py` file. If you are bundling executables, you may need to include CUDA dynamic libraries (such as `cublasLt64_11.dll` and `cublas64_11.dll`) in order for the executable to work correctly on a different PC.
- **Replacing Libraries (Not Recommended)**: If you wish to use your own version of the additional Windows libraries (OpenCL, CLBlast and OpenBLAS), you can do it with:
- **Replacing Libraries (Not Recommended)**: If you wish to use your own version of the additional Windows libraries (OpenCL, CLBlast, Vulkan), you can do it with:
- OpenCL - tested with https://github.com/KhronosGroup/OpenCL-SDK . If you wish to compile it, follow the repository instructions. You will need vcpkg.
- CLBlast - tested with https://github.com/CNugteren/CLBlast . If you wish to compile it you will need to reference the OpenCL files. It will only generate the ".lib" file if you compile using MSVC.
- OpenBLAS - tested with https://github.com/xianyi/OpenBLAS .
- Move the respectives .lib files to the /lib folder of your project, overwriting the older files.
- Also, replace the existing versions of the corresponding .dll files located in the project directory root (e.g. libopenblas.dll).
- Also, replace the existing versions of the corresponding .dll files located in the project directory root (e.g. clblast.dll).
- Make the KoboldCpp project using the instructions above.

### Compiling on MacOS
Expand Down Expand Up @@ -127,7 +127,7 @@ when you can't use the precompiled binary directly, we provide an automated buil
- Metal is enabled by default on macOS, Vulkan support is enabled by default on both Linux and macOS, ROCm support isn't available yet.
- You can also use `nix3-run` to use KoboldCpp: `nix run --expr ``with import <nixpkgs> { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp`` --impure`
- Or use `nix-shell`: `nix-shell --expr 'with import <nixpkgs> { config = { allowUnfree = true; cudaSupport = true; }; }; koboldcpp' --run "koboldcpp" --impure`
- Packages (like OpenBlast, CLBLast, Vulkan, etc.) can be overridden, please refer to the [17th Nix Pill - Nixpkgs Overriding Packages](https://nixos.org/guides/nix-pills/17-nixpkgs-overriding-packages)
- Packages (like CLBLast, Vulkan, etc.) can be overridden, please refer to the [17th Nix Pill - Nixpkgs Overriding Packages](https://nixos.org/guides/nix-pills/17-nixpkgs-overriding-packages)

## Questions and Help Wiki
- **First, please check out [The KoboldCpp FAQ and Knowledgebase](https://github.com/LostRuins/koboldcpp/wiki) which may already have answers to your questions! Also please search through past issues and discussions.**
Expand All @@ -141,13 +141,13 @@ when you can't use the precompiled binary directly, we provide an automated buil

## Considerations
- For Windows: No installation, single file executable, (It Just Works)
- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS.
- Since v1.15, requires CLBlast if enabled, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without CLBlast.
- Since v1.33, you can set the context size to be above what the model supports officially. It does increases perplexity but should still work well below 4096 even on untuned models. (For GPT-NeoX, GPT-J, and Llama models) Customize this with `--ropeconfig`.
- Since v1.42, supports GGUF models for LLAMA and Falcon
- Since v1.55, lcuda paths on Linux are hardcoded and may require manual changes to the makefile if you do not use koboldcpp.sh for the compilation.
- Since v1.60, provides native image generation with StableDiffusion.cpp, you can load any SD1.5 or SDXL .safetensors model and it will provide an A1111 compatible API to use.
- **I try to keep backwards compatibility with ALL past llama.cpp models**. But you are also encouraged to reconvert/update your models if possible for best results.
- Since v1.75, openblas has been deprecated and removed in favor of the native CPU implementation.

## License
- The original GGML library and llama.cpp by ggerganov are licensed under the MIT License
Expand Down
22 changes: 10 additions & 12 deletions class.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ def get_requested_parameters(self, model_name, model_path, menu_path, parameters
"menu_path": "",
"refresh_model_inputs": False,
"extra_classes": "",
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use OpenBLAS', 'value': 1}, {'text': 'Use CuBLAS', 'value': 2},
{'text': 'Use CLBLast GPU #1', 'value': 3},{'text': 'Use CLBLast GPU #2', 'value': 4},{'text': 'Use CLBLast GPU #3', 'value': 5}
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 6},{'text': 'Failsafe Mode (Old CPU)', 'value': 7},{'text': 'Use Vulkan GPU #1', 'value': 8},{'text': 'Use Vulkan GPU #2', 'value': 9}],
'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1},
{'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4}
,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Old CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}],
})
requested_parameters.append({
"uitype": "text",
Expand Down Expand Up @@ -238,24 +238,22 @@ def set_input_parameters(self, parameters):
if accel==0:
self.kcpp_noblas = True
elif accel==1:
pass
elif accel==2:
self.kcpp_usecublas = ["normal"]
elif accel==3:
elif accel==2:
self.kcpp_useclblast = [0,0]
elif accel==4:
elif accel==3:
self.kcpp_useclblast = [1,0]
elif accel==5:
elif accel==4:
self.kcpp_useclblast = [0,1]
elif accel==6:
elif accel==5:
self.kcpp_noavx2 = True
elif accel==7:
elif accel==6:
self.kcpp_noavx2 = True
self.kcpp_noblas = True
self.kcpp_nommap = True
elif accel==8:
elif accel==7:
self.kcpp_usevulkan = [0]
elif accel==9:
elif accel==8:
self.kcpp_usevulkan = [1]
pass

Expand Down
2 changes: 0 additions & 2 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ dependencies:
- gxx=10
- pip
- git=2.35.1
- libopenblas
- openblas
- clblast
- ninja
- make
Expand Down
6 changes: 3 additions & 3 deletions gpttype_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1624,7 +1624,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;

//this is used for the mem_per_token eval, openblas needs more RAM
//this is used for the mem_per_token eval, blas needs more RAM
bool v3_use_scratch = ggml_v3_cpu_has_gpublas();

int cu_parseinfo_maindevice = inputs.cublas_info<=0?0:inputs.cublas_info;
Expand Down Expand Up @@ -2362,11 +2362,11 @@ int GetThreadsToUse(bool blasmode)
{
if(!ggml_cpu_has_gpublas())
{
return 1;
return std::min(kcpp_data->n_blasthreads, 4);
}
else
{
return kcpp_data->n_blasthreads;
return kcpp_data->n_blasthreads;
}
}
return kcpp_data->n_threads;
Expand Down
Loading

0 comments on commit 53bf0fb

Please sign in to comment.