Skip to content

Commit e578aa8

Browse files
authored
Merge branch 'master' into update/CPPLLAMA_VERSION
2 parents 95c68b8 + 3eaf590 commit e578aa8

File tree

7 files changed

+31
-8
lines changed

7 files changed

+31
-8
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell gi
5454

5555
OPTIONAL_TARGETS?=
5656

57-
OS := $(shell uname -s)
57+
export OS := $(shell uname -s)
5858
ARCH := $(shell uname -m)
5959
GREEN := $(shell tput -Txterm setaf 2)
6060
YELLOW := $(shell tput -Txterm setaf 3)
@@ -764,7 +764,7 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
764764
cp -rf backend/cpp/llama backend/cpp/llama-grpc
765765
$(MAKE) -C backend/cpp/llama-grpc purge
766766
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
767-
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
767+
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
768768
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
769769

770770
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc

backend/cpp/llama/Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ LLAMA_VERSION?=
44
CMAKE_ARGS?=
55
BUILD_TYPE?=
66
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
7+
TARGET?=--target grpc-server
78

89
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
910
ifeq ($(BUILD_TYPE),cublas)
@@ -20,9 +21,11 @@ else ifeq ($(BUILD_TYPE),hipblas)
2021
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
2122
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
2223
# But if it's OSX without metal, disable it here
23-
else ifeq ($(OS),darwin)
24+
else ifeq ($(OS),Darwin)
2425
ifneq ($(BUILD_TYPE),metal)
2526
CMAKE_ARGS+=-DLLAMA_METAL=OFF
27+
else
28+
TARGET+=--target ggml-metal
2629
endif
2730
endif
2831

@@ -62,8 +65,8 @@ grpc-server: llama.cpp llama.cpp/examples/grpc-server
6265
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
6366
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
6467
bash -c "source $(ONEAPI_VARS); \
65-
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
68+
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
6669
else
67-
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
70+
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
6871
endif
6972
cp llama.cpp/build/bin/grpc-server .

backend/cpp/llama/grpc-server.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,8 @@ struct llama_server_context
886886
{"task_id", slot->task_id},
887887
});
888888

889+
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
890+
889891
return true;
890892
}
891893

core/backend/options.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,14 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
142142
MirostatTAU: float32(*c.LLMConfig.MirostatTAU),
143143
Debug: *c.Debug,
144144
StopPrompts: c.StopWords,
145-
Repeat: int32(c.RepeatPenalty),
145+
Repeat: int32(c.RepeatLastN),
146+
FrequencyPenalty: float32(c.FrequencyPenalty),
147+
PresencePenalty: float32(c.PresencePenalty),
148+
Penalty: float32(c.RepeatPenalty),
146149
NKeep: int32(c.Keep),
147150
Batch: int32(c.Batch),
148151
IgnoreEOS: c.IgnoreEOS,
149152
Seed: getSeed(c),
150-
FrequencyPenalty: float32(c.FrequencyPenalty),
151153
MLock: *c.MMlock,
152154
MMap: *c.MMap,
153155
MainGPU: c.MainGPU,

core/schema/prediction.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ type PredictionOptions struct {
2525
Batch int `json:"batch" yaml:"batch"`
2626
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
2727
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
28-
Keep int `json:"n_keep" yaml:"n_keep"`
28+
29+
RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
30+
31+
Keep int `json:"n_keep" yaml:"n_keep"`
2932

3033
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
3134
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`

pkg/functions/options.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,9 @@ func SetPrefix(suffix string) func(*GrammarOption) {
4242
o.Prefix = suffix
4343
}
4444
}
45+
46+
func SetPropOrder(order string) func(*GrammarOption) {
47+
return func(o *GrammarOption) {
48+
o.PropOrder = order
49+
}
50+
}

pkg/functions/parse.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ type GrammarConfig struct {
3232

3333
// ExpectStringsAfterJSON enables mixed string suffix
3434
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
35+
36+
// PropOrder selects what order to print properties
37+
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
38+
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
39+
PropOrder string `yaml:"properties_order"`
3540
}
3641

3742
// FunctionsConfig is the configuration for the tool/function call.
@@ -104,6 +109,8 @@ func (g GrammarConfig) Options() []func(o *GrammarOption) {
104109
if g.ExpectStringsAfterJSON {
105110
opts = append(opts, ExpectStringsAfterJSON)
106111
}
112+
113+
opts = append(opts, SetPropOrder(g.PropOrder))
107114
return opts
108115
}
109116

0 commit comments

Comments
 (0)