Skip to content

Commit eb2b22b

Browse files
authored
server/internal/client: use chunksums for concurrent blob verification (ollama#9746)
Replace large-chunk blob downloads with parallel small-chunk verification to solve timeout and performance issues. Registry users experienced progressively slowing download speeds as large-chunk transfers aged, often timing out completely. The previous approach downloaded blobs in a few large chunks but required a separate, single-threaded pass to read the entire blob back from disk for verification after download completion. This change uses the new chunksums API to fetch many smaller chunk+digest pairs, allowing concurrent downloads and immediate verification as each chunk arrives. Chunks are written directly to their final positions, eliminating the entire separate verification pass. The result is more reliable downloads that maintain speed throughout the transfer process and significantly faster overall completion, especially over unstable connections or with large blobs.
1 parent 4ea4d2b commit eb2b22b

File tree

8 files changed

+433
-283
lines changed

8 files changed

+433
-283
lines changed

server/internal/cache/blob/cache.go

Lines changed: 22 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ func debugger(err *error) func(step string) {
146146
// be in either of the following forms:
147147
//
148148
// @<digest>
149-
// <name>
149+
// <name>@<digest>
150150
// <name>
151151
//
152152
// If a digest is provided, it is returned as is and nothing else happens.
@@ -160,8 +160,6 @@ func debugger(err *error) func(step string) {
160160
// hashed is passed to a PutBytes call to ensure that the manifest is in the
161161
// blob store. This is done to ensure that future calls to [Get] succeed in
162162
// these cases.
163-
//
164-
// TODO(bmizerany): Move Links/Resolve/etc. out of this package.
165163
func (c *DiskCache) Resolve(name string) (Digest, error) {
166164
name, digest := splitNameDigest(name)
167165
if digest != "" {
@@ -279,18 +277,6 @@ func (c *DiskCache) Get(d Digest) (Entry, error) {
279277
// It returns an error if either the name or digest is invalid, or if link
280278
// creation encounters any issues.
281279
func (c *DiskCache) Link(name string, d Digest) error {
282-
// TODO(bmizerany): Move link handling from cache to registry.
283-
//
284-
// We originally placed links in the cache due to its storage
285-
// knowledge. However, the registry likely offers better context for
286-
// naming concerns, and our API design shouldn't be tightly coupled to
287-
// our on-disk format.
288-
//
289-
// Links work effectively when independent from physical location -
290-
// they can reference content with matching SHA regardless of storage
291-
// location. In an upcoming change, we plan to shift this
292-
// responsibility to the registry where it better aligns with the
293-
// system's conceptual model.
294280
manifest, err := c.manifestPath(name)
295281
if err != nil {
296282
return err
@@ -341,7 +327,9 @@ func (c *DiskCache) GetFile(d Digest) string {
341327
return absJoin(c.dir, "blobs", filename)
342328
}
343329

344-
// Links returns a sequence of links in the cache in lexical order.
330+
// Links returns a sequence of link names. The sequence is in lexical order.
331+
// Names are converted from their relative path form to their name form but are
332+
// not guaranteed to be valid. Callers should validate the names before using.
345333
func (c *DiskCache) Links() iter.Seq2[string, error] {
346334
return func(yield func(string, error) bool) {
347335
for path, err := range c.links() {
@@ -414,12 +402,14 @@ func (c *DiskCache) links() iter.Seq2[string, error] {
414402
}
415403

416404
type checkWriter struct {
417-
d Digest
418405
size int64
419-
n int64
420-
h hash.Hash
406+
d Digest
421407
f *os.File
422-
err error
408+
h hash.Hash
409+
410+
w io.Writer // underlying writer; set by creator
411+
n int64
412+
err error
423413

424414
testHookBeforeFinalWrite func(*os.File)
425415
}
@@ -435,6 +425,10 @@ func (w *checkWriter) seterr(err error) error {
435425
// underlying writer is guaranteed to be the last byte of p as verified by the
436426
// hash.
437427
func (w *checkWriter) Write(p []byte) (int, error) {
428+
if w.err != nil {
429+
return 0, w.err
430+
}
431+
438432
_, err := w.h.Write(p)
439433
if err != nil {
440434
return 0, w.seterr(err)
@@ -453,7 +447,7 @@ func (w *checkWriter) Write(p []byte) (int, error) {
453447
if nextSize > w.size {
454448
return 0, w.seterr(fmt.Errorf("content exceeds expected size: %d > %d", nextSize, w.size))
455449
}
456-
n, err := w.f.Write(p)
450+
n, err := w.w.Write(p)
457451
w.n += int64(n)
458452
return n, w.seterr(err)
459453
}
@@ -493,10 +487,12 @@ func (c *DiskCache) copyNamedFile(name string, file io.Reader, out Digest, size
493487

494488
// Copy file to f, but also into h to double-check hash.
495489
cw := &checkWriter{
496-
d: out,
497-
size: size,
498-
h: sha256.New(),
499-
f: f,
490+
d: out,
491+
size: size,
492+
h: sha256.New(),
493+
f: f,
494+
w: f,
495+
500496
testHookBeforeFinalWrite: c.testHookBeforeFinalWrite,
501497
}
502498
n, err := io.Copy(cw, file)
@@ -532,11 +528,6 @@ func splitNameDigest(s string) (name, digest string) {
532528
var errInvalidName = errors.New("invalid name")
533529

534530
func nameToPath(name string) (_ string, err error) {
535-
if strings.Contains(name, "@") {
536-
// TODO(bmizerany): HACK: Fix names.Parse to validate.
537-
// TODO(bmizerany): merge with default parts (maybe names.Merge(a, b))
538-
return "", errInvalidName
539-
}
540531
n := names.Parse(name)
541532
if !n.IsFullyQualified() {
542533
return "", errInvalidName
@@ -547,8 +538,7 @@ func nameToPath(name string) (_ string, err error) {
547538
func absJoin(pp ...string) string {
548539
abs, err := filepath.Abs(filepath.Join(pp...))
549540
if err != nil {
550-
// Likely a bug bug or a bad OS problem. Just panic.
551-
panic(err)
541+
panic(err) // this should never happen
552542
}
553543
return abs
554544
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package blob
2+
3+
import (
4+
"crypto/sha256"
5+
"errors"
6+
"io"
7+
"os"
8+
9+
"github.com/ollama/ollama/server/internal/chunks"
10+
)
11+
12+
type Chunk = chunks.Chunk // TODO: move chunks here?
13+
14+
// Chunker writes to a blob in chunks.
15+
// Its zero value is invalid. Use [DiskCache.Chunked] to create a new Chunker.
16+
type Chunker struct {
17+
digest Digest
18+
size int64
19+
f *os.File // nil means pre-validated
20+
}
21+
22+
// Chunked returns a new Chunker, ready for use storing a blob of the given
23+
// size in chunks.
24+
//
25+
// Use [Chunker.Put] to write data to the blob at specific offsets.
26+
func (c *DiskCache) Chunked(d Digest, size int64) (*Chunker, error) {
27+
name := c.GetFile(d)
28+
info, err := os.Stat(name)
29+
if err == nil && info.Size() == size {
30+
return &Chunker{}, nil
31+
}
32+
f, err := os.OpenFile(name, os.O_CREATE|os.O_WRONLY, 0o666)
33+
if err != nil {
34+
return nil, err
35+
}
36+
return &Chunker{digest: d, size: size, f: f}, nil
37+
}
38+
39+
// Put copies chunk.Size() bytes from r to the blob at the given offset,
40+
// merging the data with the existing blob. It returns an error if any. As a
41+
// special case, if r has less than chunk.Size() bytes, Put returns
42+
// io.ErrUnexpectedEOF.
43+
func (c *Chunker) Put(chunk Chunk, d Digest, r io.Reader) error {
44+
if c.f == nil {
45+
return nil
46+
}
47+
48+
cw := &checkWriter{
49+
d: d,
50+
size: chunk.Size(),
51+
h: sha256.New(),
52+
f: c.f,
53+
w: io.NewOffsetWriter(c.f, chunk.Start),
54+
}
55+
56+
_, err := io.CopyN(cw, r, chunk.Size())
57+
if err != nil && errors.Is(err, io.EOF) {
58+
return io.ErrUnexpectedEOF
59+
}
60+
return err
61+
}
62+
63+
// Close closes the underlying file.
64+
func (c *Chunker) Close() error {
65+
return c.f.Close()
66+
}

server/internal/cache/blob/digest.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ func (d Digest) Short() string {
6363
return fmt.Sprintf("%x", d.sum[:4])
6464
}
6565

66+
func (d Digest) Sum() [32]byte {
67+
return d.sum
68+
}
69+
6670
func (d Digest) Compare(other Digest) int {
6771
return slices.Compare(d.sum[:], other.sum[:])
6872
}

server/internal/chunks/chunks.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,21 @@ func ParseRange(s string) (unit string, _ Chunk, _ error) {
3131
}
3232

3333
// Parse parses a string in the form "start-end" and returns the Chunk.
34-
func Parse(s string) (Chunk, error) {
35-
startStr, endStr, _ := strings.Cut(s, "-")
36-
start, err := strconv.ParseInt(startStr, 10, 64)
34+
func Parse[S ~string | ~[]byte](s S) (Chunk, error) {
35+
startPart, endPart, found := strings.Cut(string(s), "-")
36+
if !found {
37+
return Chunk{}, fmt.Errorf("chunks: invalid range %q: missing '-'", s)
38+
}
39+
start, err := strconv.ParseInt(startPart, 10, 64)
3740
if err != nil {
38-
return Chunk{}, fmt.Errorf("invalid start: %v", err)
41+
return Chunk{}, fmt.Errorf("chunks: invalid start to %q: %v", s, err)
3942
}
40-
end, err := strconv.ParseInt(endStr, 10, 64)
43+
end, err := strconv.ParseInt(endPart, 10, 64)
4144
if err != nil {
42-
return Chunk{}, fmt.Errorf("invalid end: %v", err)
45+
return Chunk{}, fmt.Errorf("chunks: invalid end to %q: %v", s, err)
4346
}
4447
if start > end {
45-
return Chunk{}, fmt.Errorf("invalid range %d-%d: start > end", start, end)
48+
return Chunk{}, fmt.Errorf("chunks: invalid range %q: start > end", s)
4649
}
4750
return Chunk{start, end}, nil
4851
}

0 commit comments

Comments
 (0)