Skip to content

Commit 4980a37

Browse files
committed
WIP
1 parent ff7330c commit 4980a37

File tree

2 files changed

+93
-78
lines changed

2 files changed

+93
-78
lines changed

Makefile

Lines changed: 2 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -703,89 +703,14 @@ else
703703
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
704704
endif
705705

706-
# This target is for manually building a variant with-auto detected flags
707-
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
708-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-copy
709-
$(MAKE) -C backend/cpp/llama-cpp-copy purge
710-
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
711-
$(MAKE) VARIANT="llama-cpp-copy" build-llama-cpp-grpc-server
712-
cp -rfv backend/cpp/llama-cpp-copy/grpc-server backend-assets/grpc/llama-cpp
713-
714-
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
715-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx2
716-
$(MAKE) -C backend/cpp/llama-cpp-avx2 purge
717-
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
718-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
719-
cp -rfv backend/cpp/llama-cpp-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
720-
721-
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
722-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx512
723-
$(MAKE) -C backend/cpp/llama-cpp-avx512 purge
724-
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
725-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
726-
cp -rfv backend/cpp/llama-cpp-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
727-
728-
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
729-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx
730-
$(MAKE) -C backend/cpp/llama-cpp-avx purge
731-
$(info ${GREEN}I llama-cpp build info:avx${RESET})
732-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
733-
cp -rfv backend/cpp/llama-cpp-avx/grpc-server backend-assets/grpc/llama-cpp-avx
734-
735-
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
736-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-fallback
737-
$(MAKE) -C backend/cpp/llama-cpp-fallback purge
738-
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
739-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
740-
cp -rfv backend/cpp/llama-cpp-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
741-
742-
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
743-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-cuda
744-
$(MAKE) -C backend/cpp/llama-cpp-cuda purge
745-
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
746-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
747-
cp -rfv backend/cpp/llama-cpp-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
748-
749-
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
750-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-hipblas
751-
$(MAKE) -C backend/cpp/llama-cpp-hipblas purge
752-
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
753-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
754-
cp -rfv backend/cpp/llama-cpp-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
755-
756-
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
757-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-sycl_f16
758-
$(MAKE) -C backend/cpp/llama-cpp-sycl_f16 purge
759-
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
760-
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
761-
cp -rfv backend/cpp/llama-cpp-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
762-
763-
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
764-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-sycl_f32
765-
$(MAKE) -C backend/cpp/llama-cpp-sycl_f32 purge
766-
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
767-
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
768-
cp -rfv backend/cpp/llama-cpp-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
769-
770-
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
771-
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-grpc
772-
$(MAKE) -C backend/cpp/llama-cpp-grpc purge
773-
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
774-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
775-
cp -rfv backend/cpp/llama-cpp-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
776-
777-
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
778-
mkdir -p backend-assets/util/
779-
cp -rf backend/cpp/llama-cpp-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
780-
781706
backend-assets/grpc/bark-cpp: protogen-go replace backend/go/bark-cpp/libbark.a backend-assets/grpc
782707
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark-cpp/ LIBRARY_PATH=$(CURDIR)/backend/go/bark-cpp/ \
783708
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark-cpp/
784709
ifneq ($(UPX),)
785710
$(UPX) backend-assets/grpc/bark-cpp
786711
endif
787712

788-
backend-assets/grpc/piper: protogen-go replacesources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
713+
backend-assets/grpc/piper: protogen-go replace sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
789714
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
790715
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
791716
ifneq ($(UPX),)
@@ -799,7 +724,7 @@ ifneq ($(UPX),)
799724
$(UPX) backend-assets/grpc/silero-vad
800725
endif
801726

802-
backend-assets/grpc/whisper: protogen-go replacesources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
727+
backend-assets/grpc/whisper: protogen-go replace sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
803728
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
804729
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
805730
ifneq ($(UPX),)

backend/cpp/llama-cpp/Makefile

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
LLAMA_VERSION?=
2+
LLAMA_VERSION?=e75ba4c0434eb759eb7ff74e034ebe729053e575
33
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
44

55
CMAKE_ARGS?=
@@ -50,6 +50,96 @@ ifeq ($(BUILD_TYPE),sycl_f32)
5050
-DCMAKE_CXX_FLAGS="-fsycl"
5151
endif
5252

53+
build-llama-cpp-grpc-server:
54+
# Conditionally build grpc for the llama backend to use if needed
55+
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
56+
$(MAKE) -C ../../grpc build
57+
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
58+
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
59+
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
60+
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
61+
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
62+
$(MAKE) grpc-server
63+
else
64+
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
65+
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) grpc-server
66+
endif
67+
68+
69+
# This target is for manually building a variant with-auto detected flags
70+
llama-cpp: llama.cpp purge
71+
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
72+
$(MAKE) VARIANT="llama-cpp-copy" build-llama-cpp-grpc-server
73+
cp -rfv grpc-server llama-cpp
74+
75+
llama-cpp-avx2: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
76+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx2
77+
$(MAKE) -C backend/cpp/llama-cpp-avx2 purge
78+
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
79+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
80+
cp -rfv backend/cpp/llama-cpp-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
81+
82+
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
83+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx512
84+
$(MAKE) -C backend/cpp/llama-cpp-avx512 purge
85+
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
86+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
87+
cp -rfv backend/cpp/llama-cpp-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
88+
89+
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
90+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx
91+
$(MAKE) -C backend/cpp/llama-cpp-avx purge
92+
$(info ${GREEN}I llama-cpp build info:avx${RESET})
93+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
94+
cp -rfv backend/cpp/llama-cpp-avx/grpc-server backend-assets/grpc/llama-cpp-avx
95+
96+
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
97+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-fallback
98+
$(MAKE) -C backend/cpp/llama-cpp-fallback purge
99+
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
100+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
101+
cp -rfv backend/cpp/llama-cpp-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
102+
103+
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
104+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-cuda
105+
$(MAKE) -C backend/cpp/llama-cpp-cuda purge
106+
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
107+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
108+
cp -rfv backend/cpp/llama-cpp-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
109+
110+
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
111+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-hipblas
112+
$(MAKE) -C backend/cpp/llama-cpp-hipblas purge
113+
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
114+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
115+
cp -rfv backend/cpp/llama-cpp-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
116+
117+
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
118+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-sycl_f16
119+
$(MAKE) -C backend/cpp/llama-cpp-sycl_f16 purge
120+
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
121+
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
122+
cp -rfv backend/cpp/llama-cpp-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
123+
124+
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
125+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-sycl_f32
126+
$(MAKE) -C backend/cpp/llama-cpp-sycl_f32 purge
127+
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
128+
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
129+
cp -rfv backend/cpp/llama-cpp-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
130+
131+
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
132+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-grpc
133+
$(MAKE) -C backend/cpp/llama-cpp-grpc purge
134+
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
135+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
136+
cp -rfv backend/cpp/llama-cpp-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
137+
138+
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
139+
mkdir -p backend-assets/util/
140+
cp -rf backend/cpp/llama-cpp-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
141+
142+
53143
llama.cpp:
54144
mkdir -p llama.cpp
55145
cd llama.cpp && \

0 commit comments

Comments
 (0)