Skip to content

Commit 5885711

Browse files
committed
WIP
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 103bb04 commit 5885711

File tree

5 files changed

+44
-62
lines changed

5 files changed

+44
-62
lines changed

.gitignore

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ __pycache__/
55
*.o
66
get-sources
77
prepare-sources
8-
/backend/cpp/llama/grpc-server
9-
/backend/cpp/llama/llama.cpp
8+
/backend/cpp/llama-cpp/grpc-server
9+
/backend/cpp/llama-cpp/llama.cpp
1010
/backend/cpp/llama-*
1111
!backend/cpp/llama-cpp
12+
/backends
1213

1314
*.log
1415

@@ -57,4 +58,4 @@ docs/static/gallery.html
5758
**/venv
5859

5960
# per-developer customization files for the development container
60-
.devcontainer/customization/*
61+
.devcontainer/customization/*

Makefile

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
225225
endif
226226

227227
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
228-
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
229-
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
230-
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
231-
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
232-
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
233-
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
234228
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
235229

236230
ifeq ($(ONNX_OS),linux)
@@ -402,9 +396,6 @@ endif
402396
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
403397
rice append --exec $(BINARY_NAME)
404398

405-
build-minimal:
406-
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
407-
408399
build-api:
409400
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
410401

backend/Dockerfile.llama-cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,12 @@ COPY --from=grpc /opt/grpc /usr/local
180180

181181
COPY . /LocalAI
182182

183-
RUN cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx \
184-
llama-cpp-avx2 \
185-
llama-cpp-avx512 \
186-
llama-cpp-fallback \
187-
llama-cpp-grpc \
188-
llama-cpp-rpc-server
183+
RUN cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx
184+
RUN cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx2
185+
RUN cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx512
186+
RUN cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback
187+
RUN cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-grpc
188+
RUN cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-rpc-server
189189

190190
FROM scratch
191191

backend/cpp/llama-cpp/Makefile

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ TARGET?=--target grpc-server
1010
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
1111
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
1212

13+
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
14+
1315
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
1416
ifeq ($(BUILD_TYPE),cublas)
1517
CMAKE_ARGS+=-DGGML_CUDA=ON
@@ -59,61 +61,49 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
5961
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
6062
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
6163
LLAMA_VERSION=$(LLAMA_VERSION) \
62-
$(MAKE) grpc-server
64+
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
6365
else
6466
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
65-
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) grpc-server
67+
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
6668
endif
6769

68-
# This target is for manually building a variant with-auto detected flags
69-
llama-cpp: llama.cpp purge
70-
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
71-
$(MAKE) VARIANT="llama-cpp-copy" build-llama-cpp-grpc-server
72-
cp -rfv grpc-server llama-cpp
73-
7470
llama-cpp-avx2: llama.cpp
75-
mkdir -p llama-cpp-avx2-build
76-
cp -rf * llama-cpp-avx2-build
77-
$(MAKE) -C llama-cpp-avx2-build purge
71+
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
72+
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
7873
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
79-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
80-
cp -rfv llama-cpp-avx2-build/grpc-server llama-cpp-avx2
74+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
75+
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
8176

8277
llama-cpp-avx512: llama.cpp
83-
mkdir -p llama-cpp-avx512-build
84-
cp -rf * llama-cpp-avx512-build
85-
$(MAKE) -C llama-cpp-avx512-build purge
78+
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
79+
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
8680
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
87-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
88-
cp -rfv llama-cpp-avx512-build/grpc-server llama-cpp-avx512
81+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
82+
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
8983

9084
llama-cpp-avx: llama.cpp
91-
mkdir -p llama-cpp-avx-build
92-
cp -rf * llama-cpp-avx-build
93-
$(MAKE) -C llama-cpp-avx-build purge
85+
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
86+
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
9487
$(info ${GREEN}I llama-cpp build info:avx${RESET})
95-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
96-
cp -rfv llama-cpp-avx-build/grpc-server llama-cpp-avx
88+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
89+
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
9790

9891
llama-cpp-fallback: llama.cpp
99-
mkdir -p llama-cpp-fallback-build
100-
cp -rf * llama-cpp-fallback-build
101-
$(MAKE) -C llama-cpp-fallback-build purge
92+
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
93+
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
10294
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
103-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
104-
cp -rfv llama-cpp-fallback-build/grpc-server llama-cpp-fallback
95+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
96+
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
10597

10698
llama-cpp-grpc: llama.cpp
107-
mkdir -p llama-cpp-grpc-build
108-
cp -rf * llama-cpp-grpc-build
109-
$(MAKE) -C llama-cpp-grpc-build purge
99+
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
100+
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
110101
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
111-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
112-
cp -rfv llama-cpp-grpc-build/grpc-server llama-cpp-grpc
102+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
103+
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
113104

114105
llama-cpp-rpc-server: llama-cpp-grpc
115-
cp -rf llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
116-
106+
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
117107

118108
llama.cpp:
119109
mkdir -p llama.cpp

backend/cpp/llama-cpp/run.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,11 @@ grep -e "flags" /proc/cpuinfo | head -1
99

1010
BINARY=llama-cpp-fallback
1111

12-
# Check avx 512
13-
if grep -q -e "\savx512\s" /proc/cpuinfo ; then
14-
echo "CPU: AVX512 found OK"
15-
BINARY=llama-cpp-avx512
12+
if grep -q -e "\savx\s" /proc/cpuinfo ; then
13+
echo "CPU: AVX found OK"
14+
BINARY=llama-cpp-avx
1615
else
17-
echo "CPU: no AVX512 found"
16+
echo "CPU: no AVX found"
1817
BINARY=llama-cpp-fallback
1918
fi
2019

@@ -26,11 +25,12 @@ else
2625
BINARY=llama-cpp-fallback
2726
fi
2827

29-
if grep -q -e "\savx\s" /proc/cpuinfo ; then
30-
echo "CPU: AVX found OK"
31-
BINARY=llama-cpp-avx
28+
# Check avx 512
29+
if grep -q -e "\savx512\s" /proc/cpuinfo ; then
30+
echo "CPU: AVX512 found OK"
31+
BINARY=llama-cpp-avx512
3232
else
33-
echo "CPU: no AVX found"
33+
echo "CPU: no AVX512 found"
3434
BINARY=llama-cpp-fallback
3535
fi
3636

0 commit comments

Comments
 (0)