rick-github
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Makefile.sync‎
Lines changed: 17 additions & 8 deletions b/‎Makefile.sync‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎convert/reader.go‎
Lines changed: 1 addition & 0 deletions b/‎convert/reader.go‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎fs/ggml/ggml.go‎
Lines changed: 4 additions & 9 deletions b/‎fs/ggml/ggml.go‎
Lines changed: 4 additions & 9 deletions
diff --git a/‎llama/build-info.cpp‎
Lines changed: 1 addition & 1 deletion b/‎llama/build-info.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama/llama.cpp/.rsync-filter‎
Lines changed: 32 additions & 23 deletions b/‎llama/llama.cpp/.rsync-filter‎
Lines changed: 32 additions & 23 deletions
@@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.21)
 project(Ollama C CXX)
 
 include(CheckLanguage)
+include(GNUInstallDirs)
 
 find_package(Threads REQUIRED)
 
@@ -51,7 +52,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
 
-add_compile_definitions(NDEBUG)
+add_compile_definitions(NDEBUG GGML_VERSION=0x0 GGML_COMMIT=0x0)
 
 set(GGML_CPU ON)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
 
@@ -1,6 +1,6 @@
-UPSTREAM=https://github.com/ggerganov/llama.cpp.git
+UPSTREAM=https://github.com/ggml-org/llama.cpp.git
 WORKDIR=llama/vendor
-FETCH_HEAD=de4c07f93783a1a96456a44dc16b9db538ee1618
+FETCH_HEAD=e54d41befcc1575f4c898c5ff4ef43970cead75f
 
 .PHONY: help
 help:
@@ -12,7 +12,7 @@ help:
 	@echo "    clean                Clean local repository"
 	@echo
 	@echo "Example:"
-	@echo "    make -f $(lastword $(MAKEFILE_LIST)) clean sync"
+	@echo "    make -f $(lastword $(MAKEFILE_LIST)) clean apply-patches sync"
 
 .PHONY: sync
 sync: llama/build-info.cpp ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal
@@ -24,12 +24,12 @@ ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-embed.metal: ml/backend/ggml/ggml
 	go generate ./$(@D)
 
 .PHONY: llama/llama.cpp
-llama/llama.cpp: llama/vendor/
-	rsync -arvzc -f "merge $@/.rsync-filter" $< $@
+llama/llama.cpp: llama/vendor
+	rsync -arvzc --delete -f "include LICENSE" -f "merge $@/.rsync-filter" $(addprefix $<,/LICENSE /) $@
 
 .PHONY: ml/backend/ggml/ggml
-ml/backend/ggml/ggml: llama/vendor/ggml/
-	rsync -arvzc -f "merge $@/.rsync-filter" $< $@
+ml/backend/ggml/ggml: llama/vendor
+	rsync -arvzc --delete -f "include LICENSE" -f "merge $@/.rsync-filter" $(addprefix $<,/LICENSE /ggml/) $@
 
 PATCHES=$(wildcard llama/patches/*.patch)
 PATCHED=$(join $(dir $(PATCHES)), $(addsuffix ed, $(addprefix ., $(notdir $(PATCHES)))))
@@ -39,7 +39,15 @@ PATCHED=$(join $(dir $(PATCHES)), $(addsuffix ed, $(addprefix ., $(notdir $(PATC
 apply-patches: $(PATCHED)
 
 llama/patches/.%.patched: llama/patches/%.patch
-	@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
+	@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then \
+		touch $@;                                                                           \
+	else                                                                                    \
+		echo "Patch failed. Resolve any conflicts then continue.";                          \
+		echo "1. Run 'git -C $(WORKDIR) am --continue'";                                    \
+		echo "2. Run 'make -f $(lastword $(MAKEFILE_LIST)) format-patches'";                \
+		echo "3. Run 'make -f $(lastword $(MAKEFILE_LIST)) clean apply-patches'";           \
+		exit 1;                                                                             \
+	fi
 
 .PHONY: checkout
 checkout: $(WORKDIR)
@@ -60,4 +68,5 @@ format-patches: llama/patches
 
 .PHONE: clean
 clean: checkout
+	@git -C $(WORKDIR) am --abort || true
 	$(RM) llama/patches/.*.patched
@@ -39,6 +39,7 @@ const (
 
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
+		strings.HasSuffix(t.name, ".bias") ||
 		t.name == "token_types.weight" ||
 		t.name == "v.positional_embedding_vlm" ||
 		t.name == "v.tile_position_embd.weight" ||
 
@@ -180,7 +180,7 @@ func (kv KV) OllamaEngineRequired() bool {
 		"llama4",
 		"mllama",
 		"qwen25vl",
-		"gptoss",
+		"gptoss", "gpt-oss",
 	}, kv.Architecture())
 }
 
@@ -328,7 +328,7 @@ func (t TensorType) TypeSize() uint64 {
 		return 2 + blockSize/2
 	case TensorTypeQ4_1:
 		return 2 + 2 + blockSize/2
-	case TensorTypeMXFP4:
+	case TensorTypeMXFP4, 39:
 		return 1 + blockSize/2
 	case TensorTypeQ5_0:
 		return 2 + 4 + blockSize/2
@@ -665,7 +665,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
 					4*qkvBias.Shape[0],
 			)
 		}
-	case "gptoss":
+	case "gptoss", "gpt-oss":
 		kv = make([]uint64, f.KV().BlockCount())
 		for i := range kv {
 			kv[i] = uint64(float64((embeddingHeadsK+embeddingHeadsV)*headsKV) * bytesPerElement)
@@ -675,8 +675,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
 				kv[i] *= context
 			}
 		}
-		fullOffload = 4 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6
-		partialOffload = fullOffload
+		partialOffload = 2 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6
 	}
 
 	return
@@ -761,10 +760,6 @@ func (f GGML) SupportsFlashAttention() bool {
 		return false
 	}
 
-	if f.KV().Architecture() == "gptoss" {
-		return false
-	}
-
 	// Check head counts match and are non-zero
 	headCountK := f.KV().EmbeddingHeadCountK()
 	headCountV := f.KV().EmbeddingHeadCountV()
 
@@ -1,23 +1,32 @@
-protect **/*.go
-include common/
-include common/base64.*
-include common/common.*
-include common/json-schema-to-grammar.*
-include common/json.*
-include common/log.*
-include common/sampling.*
-include common/stb_image.*
-include include/
-include include/llama.*
-include include/llama-*.*
-include tools/
-include tools/mtmd/
-include tools/mtmd/clip.*
-include tools/mtmd/clip-impl.*
-include tools/mtmd/llava.*
-include src/
-include src/llama.*
-include src/llama-*.*
-include src/unicode-data.*
-include src/unicode.*
-exclude *
+protect .rsync-filter
+protect *.go
+include /common/
+include /common/base64.*
+include /common/common.*
+include /common/json-schema-to-grammar.*
+include /common/json.*
+include /common/log.*
+include /common/sampling.*
+include /include/
+include /include/llama.*
+include /include/llama-*.*
+include /tools/
+include /tools/mtmd/
+include /tools/mtmd/*.h
+include /tools/mtmd/clip.cpp
+include /tools/mtmd/mtmd.cpp
+include /tools/mtmd/mtmd-audio.cpp
+include /tools/mtmd/mtmd-helper.cpp
+include /src/
+include /src/llama.*
+include /src/llama-*.*
+include /src/unicode-data.*
+include /src/unicode.*
+include /vendor/
+include /vendor/miniaudio/
+include /vendor/miniaudio/*.h
+include /vendor/nlohmann/
+include /vendor/nlohmann/*.hpp
+include /vendor/stb/
+include /vendor/stb/*.h
+hide *