Skip to content

Commit 033cec2

Browse files
committed
count gemma3 vision tensors
1 parent 6b45b1d commit 033cec2

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

fs/ggml/ggml.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,14 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
611611
embeddingLength*numPatches*maxNumTiles +
612612
9*embeddingLength*numPaddedPatches*maxNumTiles +
613613
numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
614+
case "gemma3":
615+
for name, layer := range llm.Tensors().GroupLayers() {
616+
if strings.HasPrefix(name, "v.") {
617+
for _, tensor := range layer {
618+
weights += tensor.Size()
619+
}
620+
}
621+
}
614622
}
615623
return weights, graphSize
616624
}

llm/memory.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
218218
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
219219
layerSize = blk.Size()
220220
layerSize += kv / f.KV().BlockCount()
221+
memoryWeights += blk.Size()
221222
}
222-
memoryWeights += layerSize
223223

224224
if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
225225
// Stop allocating on GPU(s) once we hit the users target NumGPU
@@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
376376
// memory of the weights
377377
"total", format.HumanBytes2(m.memoryWeights),
378378
// memory of repeating layers
379-
"repeating", format.HumanBytes2(m.memoryWeights-m.memoryLayerOutput),
379+
"repeating", format.HumanBytes2(m.memoryWeights),
380380
// memory of non-repeating layers
381381
"nonrepeating", format.HumanBytes2(m.memoryLayerOutput),
382382
),

0 commit comments

Comments
 (0)