Skip to content

Commit edb30b5

Browse files
committed
Merge remote-tracking branch 'origin/master' into wasm
2 parents 427f1f7 + c6bc125 commit edb30b5

File tree

289 files changed

+25212
-16126
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

289 files changed

+25212
-16126
lines changed

.devops/s390x.Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ RUN --mount=type=cache,target=/root/.ccache \
2424
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
2525
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
2626
-DLLAMA_BUILD_TESTS=OFF \
27-
-DGGML_BACKEND_DL=OFF \
2827
-DGGML_NATIVE=OFF \
28+
-DGGML_BACKEND_DL=ON \
29+
-DGGML_CPU_ALL_VARIANTS=ON \
2930
-DGGML_BLAS=ON \
3031
-DGGML_BLAS_VENDOR=OpenBLAS && \
3132
cmake --build build --config Release -j $(nproc) && \
@@ -103,6 +104,7 @@ FROM base AS light
103104
WORKDIR /llama.cpp/bin
104105

105106
# Copy llama.cpp binaries and libraries
107+
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
106108
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
107109

108110
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
@@ -116,6 +118,7 @@ ENV LLAMA_ARG_HOST=0.0.0.0
116118
WORKDIR /llama.cpp/bin
117119

118120
# Copy llama.cpp binaries and libraries
121+
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
119122
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
120123

121124
EXPOSE 8080

.github/labeler.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ ggml:
7676
- changed-files:
7777
- any-glob-to-any-file:
7878
- ggml/**
79+
model:
80+
- changed-files:
81+
- any-glob-to-any-file:
82+
- src/models/**
7983
nix:
8084
- changed-files:
8185
- any-glob-to-any-file:

.github/workflows/build-linux-cross.yml

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,49 @@ on:
44
workflow_call:
55

66
jobs:
7-
ubuntu-24-riscv64-cpu-cross:
8-
runs-on: ubuntu-24.04
7+
# ubuntu-24-riscv64-cpu-cross:
8+
# runs-on: ubuntu-24.04
99

10-
steps:
11-
- uses: actions/checkout@v4
12-
- name: Setup Riscv
13-
run: |
14-
sudo dpkg --add-architecture riscv64
10+
# steps:
11+
# - uses: actions/checkout@v4
12+
# - name: Setup Riscv
13+
# run: |
14+
# sudo dpkg --add-architecture riscv64
1515

16-
# Add arch-specific repositories for non-amd64 architectures
17-
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21-
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22-
EOF
16+
# # Add arch-specific repositories for non-amd64 architectures
17+
# cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21+
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22+
# EOF
2323

24-
sudo apt-get update || true ;# Prevent failure due to missing URLs.
24+
# sudo apt-get update || true ;# Prevent failure due to missing URLs.
2525

26-
sudo apt-get install -y --no-install-recommends \
27-
build-essential \
28-
gcc-14-riscv64-linux-gnu \
29-
g++-14-riscv64-linux-gnu
26+
# sudo apt-get install -y --no-install-recommends \
27+
# build-essential \
28+
# gcc-14-riscv64-linux-gnu \
29+
# g++-14-riscv64-linux-gnu
3030

31-
- name: Build
32-
run: |
33-
cmake -B build -DLLAMA_CURL=OFF \
34-
-DCMAKE_BUILD_TYPE=Release \
35-
-DGGML_OPENMP=OFF \
36-
-DLLAMA_BUILD_EXAMPLES=ON \
37-
-DLLAMA_BUILD_TOOLS=ON \
38-
-DLLAMA_BUILD_TESTS=OFF \
39-
-DCMAKE_SYSTEM_NAME=Linux \
40-
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
41-
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
42-
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
43-
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
44-
-DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
45-
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
46-
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
47-
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
31+
# - name: Build
32+
# run: |
33+
# cmake -B build -DLLAMA_CURL=OFF \
34+
# -DCMAKE_BUILD_TYPE=Release \
35+
# -DGGML_OPENMP=OFF \
36+
# -DLLAMA_BUILD_EXAMPLES=ON \
37+
# -DLLAMA_BUILD_TOOLS=ON \
38+
# -DLLAMA_BUILD_TESTS=OFF \
39+
# -DCMAKE_SYSTEM_NAME=Linux \
40+
# -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
41+
# -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
42+
# -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
43+
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
44+
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
45+
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
46+
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
47+
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
4848

49-
cmake --build build --config Release -j $(nproc)
49+
# cmake --build build --config Release -j $(nproc)
5050

5151
# ubuntu-24-riscv64-vulkan-cross:
5252
# runs-on: ubuntu-24.04

.github/workflows/docker.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
# https://github.com/ggml-org/llama.cpp/issues/11888
4141
#- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
4242
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
43-
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
43+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4444
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4545
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4646
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ jobs:
134134
include:
135135
- build: 'x64'
136136
os: ubuntu-22.04
137-
- build: 's390x-z15' # z15 because our CI runners are on z15
138-
os: ubuntu-22.04-s390x
137+
- build: 's390x'
138+
os: ubuntu-24.04-s390x
139139
# GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
140140
# - build: 'arm64'
141141
# os: ubuntu-22.04-arm

CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
/ggml/src/ggml-impl.h @ggerganov @slaren
6666
/ggml/src/ggml-metal/ @ggerganov
6767
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
68-
/ggml/src/ggml-hexagon/ @max-krasnyansky
68+
/ggml/src/ggml-hexagon/ @max-krasnyansky @lhez
6969
/ggml/src/ggml-opt.cpp @JohannesGaessler
7070
/ggml/src/ggml-quants.* @ggerganov
7171
/ggml/src/ggml-rpc/ @rgerganov
@@ -89,6 +89,7 @@
8989
/src/llama-model-loader.* @slaren
9090
/src/llama-model.* @CISC
9191
/src/llama-vocab.* @CISC
92+
/src/models/ @CISC
9293
/tests/ @ggerganov
9394
/tests/test-backend-ops.cpp @slaren
9495
/tests/test-thread-safety.cpp @slaren

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,13 @@ LLM inference in C/C++
1717

1818
## Hot topics
1919

20-
- **[guide : running gpt-oss with llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/15396)**
21-
- **[[FEEDBACK] Better packaging for llama.cpp to support downstream consumers 🤗](https://github.com/ggml-org/llama.cpp/discussions/15313)**
20+
- **[guide : using the new WebUI of llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/16938)**
21+
- [guide : running gpt-oss with llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/15396)
22+
- [[FEEDBACK] Better packaging for llama.cpp to support downstream consumers 🤗](https://github.com/ggml-org/llama.cpp/discussions/15313)
2223
- Support for the `gpt-oss` model with native MXFP4 format has been added | [PR](https://github.com/ggml-org/llama.cpp/pull/15091) | [Collaboration with NVIDIA](https://blogs.nvidia.com/blog/rtx-ai-garage-openai-oss) | [Comment](https://github.com/ggml-org/llama.cpp/discussions/15095)
23-
- Hot PRs: [All](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+) | [Open](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+is%3Aopen)
2424
- Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
2525
- VS Code extension for FIM completions: https://github.com/ggml-org/llama.vscode
2626
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
27-
- Introducing GGUF-my-LoRA https://github.com/ggml-org/llama.cpp/discussions/10123
2827
- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggml-org/llama.cpp/discussions/9669
2928
- Hugging Face GGUF editor: [discussion](https://github.com/ggml-org/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor)
3029

common/arg.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2034,7 +2034,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20342034
params.system_prompt.pop_back();
20352035
}
20362036
}
2037-
).set_examples({LLAMA_EXAMPLE_MAIN}));
2037+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_DIFFUSION}));
20382038
add_opt(common_arg(
20392039
{"--in-file"}, "FNAME",
20402040
"an input file (repeat to specify multiple files)",
@@ -2772,6 +2772,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27722772
params.image.emplace_back(value);
27732773
}
27742774
).set_examples({LLAMA_EXAMPLE_MTMD}));
2775+
add_opt(common_arg(
2776+
{"--image-min-tokens"}, "N",
2777+
"minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)",
2778+
[](common_params & params, int value) {
2779+
params.image_min_tokens = value;
2780+
}
2781+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MIN_TOKENS"));
2782+
add_opt(common_arg(
2783+
{"--image-max-tokens"}, "N",
2784+
"maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)",
2785+
[](common_params & params, int value) {
2786+
params.image_max_tokens = value;
2787+
}
2788+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MAX_TOKENS"));
27752789
if (llama_supports_rpc()) {
27762790
add_opt(common_arg(
27772791
{"--rpc"}, "SERVERS",
@@ -3207,7 +3221,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32073221
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
32083222
add_opt(common_arg(
32093223
{"--parse-special"},
3210-
string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
3224+
string_format("parse special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
32113225
[](common_params & params) {
32123226
params.parse_special = true;
32133227
}
@@ -3252,7 +3266,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32523266
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
32533267
add_opt(common_arg(
32543268
{"--embd-output-format"}, "FORMAT",
3255-
"empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix",
3269+
"empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix, \"raw\" = plain whitespace-delimited output (one embedding per line)",
32563270
[](common_params & params, const std::string & value) {
32573271
params.embd_out = value;
32583272
}

0 commit comments

Comments
 (0)