Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions pkg/distribution/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,23 @@ import (
"github.com/docker/model-runner/pkg/distribution/types"
)

// BuildOption configures the behavior of FromPath and FromPaths.
type BuildOption func(*buildOptions)

type buildOptions struct {
created *time.Time
}

// WithCreated sets a specific creation timestamp for the model artifact.
// When not set, the current time (time.Now()) is used.
// This is useful for producing deterministic OCI digests when the same model
// content should always yield the same artifact regardless of when it was built.
func WithCreated(t time.Time) BuildOption {
return func(opts *buildOptions) {
opts.created = &t
}
}

// Builder builds a model artifact
type Builder struct {
model types.ModelArtifact
Expand All @@ -22,7 +39,7 @@ type Builder struct {
// FromPath returns a *Builder that builds model artifacts from a file path.
// It auto-detects the model format (GGUF or Safetensors) and discovers any shards.
// This is the preferred entry point for creating models from local files.
func FromPath(path string) (*Builder, error) {
func FromPath(path string, opts ...BuildOption) (*Builder, error) {
// Auto-detect format from file extension
f, err := format.DetectFromPath(path)
if err != nil {
Expand All @@ -36,12 +53,12 @@ func FromPath(path string) (*Builder, error) {
}

// Create model using the format abstraction
return fromFormat(f, paths)
return fromFormat(f, paths, opts...)
}

// FromPaths returns a *Builder that builds model artifacts from multiple file paths.
// All paths must be of the same format. Use this when you already have the list of files.
func FromPaths(paths []string) (*Builder, error) {
func FromPaths(paths []string, opts ...BuildOption) (*Builder, error) {
if len(paths) == 0 {
return nil, fmt.Errorf("at least one path is required")
}
Expand All @@ -53,12 +70,17 @@ func FromPaths(paths []string) (*Builder, error) {
}

// Create model using the format abstraction
return fromFormat(f, paths)
return fromFormat(f, paths, opts...)
}

// fromFormat creates a Builder using the unified format abstraction.
// This is the internal implementation that creates layers and config.
func fromFormat(f format.Format, paths []string) (*Builder, error) {
func fromFormat(f format.Format, paths []string, opts ...BuildOption) (*Builder, error) {
options := &buildOptions{}
for _, opt := range opts {
opt(options)
}

// Create layers from paths
layers := make([]oci.Layer, len(paths))
diffIDs := make([]oci.Hash, len(paths))
Expand All @@ -83,8 +105,15 @@ func fromFormat(f format.Format, paths []string) (*Builder, error) {
return nil, fmt.Errorf("extract config: %w", err)
}

// Use the provided creation time, or fall back to current time
var created time.Time
if options.created != nil {
created = *options.created
} else {
created = time.Now()
}

// Build the model
created := time.Now()
mdl := &partial.BaseModel{
ModelConfigFile: types.ConfigFile{
Config: config,
Expand Down
97 changes: 97 additions & 0 deletions pkg/distribution/builder/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,109 @@ import (
"path/filepath"
"strings"
"testing"
"time"

"github.com/docker/model-runner/pkg/distribution/builder"
"github.com/docker/model-runner/pkg/distribution/oci"
"github.com/docker/model-runner/pkg/distribution/types"
)

// TestWithCreatedDeterministicDigest verifies that using WithCreated produces
// deterministic digests: the same file + same timestamp should always yield
// the same manifest digest, while different timestamps yield different digests.
func TestWithCreatedDeterministicDigest(t *testing.T) {
ggufPath := filepath.Join("..", "assets", "dummy.gguf")
fixedTime := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC)

// Build twice with the same fixed timestamp
b1, err := builder.FromPath(ggufPath, builder.WithCreated(fixedTime))
if err != nil {
t.Fatalf("FromPath (first) failed: %v", err)
}
b2, err := builder.FromPath(ggufPath, builder.WithCreated(fixedTime))
if err != nil {
t.Fatalf("FromPath (second) failed: %v", err)
}

target1 := &fakeTarget{}
target2 := &fakeTarget{}
if err := b1.Build(t.Context(), target1, nil); err != nil {
t.Fatalf("Build (first) failed: %v", err)
}
if err := b2.Build(t.Context(), target2, nil); err != nil {
t.Fatalf("Build (second) failed: %v", err)
}

digest1, err := target1.artifact.Digest()
if err != nil {
t.Fatalf("Digest (first) failed: %v", err)
}
digest2, err := target2.artifact.Digest()
if err != nil {
t.Fatalf("Digest (second) failed: %v", err)
}

if digest1 != digest2 {
t.Errorf("Expected identical digests with same timestamp, got %v and %v", digest1, digest2)
}

// Build with a different timestamp and verify digest differs
differentTime := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)
b3, err := builder.FromPath(ggufPath, builder.WithCreated(differentTime))
if err != nil {
t.Fatalf("FromPath (third) failed: %v", err)
}
target3 := &fakeTarget{}
if err := b3.Build(t.Context(), target3, nil); err != nil {
t.Fatalf("Build (third) failed: %v", err)
}
digest3, err := target3.artifact.Digest()
if err != nil {
t.Fatalf("Digest (third) failed: %v", err)
}

if digest1 == digest3 {
t.Errorf("Expected different digests with different timestamps, but both were %v", digest1)
}
}

// TestWithCreatedFromPaths verifies that WithCreated works with FromPaths as well.
func TestWithCreatedFromPaths(t *testing.T) {
ggufPath := filepath.Join("..", "assets", "dummy.gguf")
fixedTime := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC)

b1, err := builder.FromPaths([]string{ggufPath}, builder.WithCreated(fixedTime))
if err != nil {
t.Fatalf("FromPaths (first) failed: %v", err)
}
b2, err := builder.FromPaths([]string{ggufPath}, builder.WithCreated(fixedTime))
if err != nil {
t.Fatalf("FromPaths (second) failed: %v", err)
}

target1 := &fakeTarget{}
target2 := &fakeTarget{}
if err := b1.Build(t.Context(), target1, nil); err != nil {
t.Fatalf("Build (first) failed: %v", err)
}
if err := b2.Build(t.Context(), target2, nil); err != nil {
t.Fatalf("Build (second) failed: %v", err)
}

digest1, err := target1.artifact.Digest()
if err != nil {
t.Fatalf("Digest (first) failed: %v", err)
}
digest2, err := target2.artifact.Digest()
if err != nil {
t.Fatalf("Digest (second) failed: %v", err)
}

if digest1 != digest2 {
t.Errorf("Expected identical digests with same timestamp, got %v and %v", digest1, digest2)
}
}

func TestBuilder(t *testing.T) {
// Create a builder from a GGUF file
b, err := builder.FromPath(filepath.Join("..", "assets", "dummy.gguf"))
Expand Down
24 changes: 23 additions & 1 deletion pkg/distribution/builder/from_directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ type DirectoryOptions struct {
// - Glob patterns (e.g., "*.log", "*.tmp") - excludes files matching the pattern
// - Paths with slashes (e.g., "logs/debug.log") - excludes specific paths
Exclusions []string

// Created is an optional creation timestamp for the model artifact.
// When set, it overrides the default behavior of using time.Now().
// This is useful for producing deterministic OCI digests.
Created *time.Time
}

// DirectoryOption is a functional option for configuring FromDirectory.
Expand All @@ -45,6 +50,16 @@ func WithExclusions(patterns ...string) DirectoryOption {
}
}

// WithCreatedTime sets a specific creation timestamp for the model artifact
// built from a directory. When not set, the current time (time.Now()) is used.
// This is useful for producing deterministic OCI digests when the same directory
// content should always yield the same artifact regardless of when it was built.
func WithCreatedTime(t time.Time) DirectoryOption {
return func(opts *DirectoryOptions) {
opts.Created = &t
}
Comment on lines +53 to +60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with the BuildOption defined in pkg/distribution/builder/builder.go, it would be better to name this function WithCreated instead of WithCreatedTime. Both functions serve the same purpose of setting a creation timestamp, and using the same name improves code clarity and maintainability. You will need to update the call sites in pkg/distribution/builder/from_directory_test.go as well.

Suggested change
// WithCreatedTime sets a specific creation timestamp for the model artifact
// built from a directory. When not set, the current time (time.Now()) is used.
// This is useful for producing deterministic OCI digests when the same directory
// content should always yield the same artifact regardless of when it was built.
func WithCreatedTime(t time.Time) DirectoryOption {
return func(opts *DirectoryOptions) {
opts.Created = &t
}
// WithCreated sets a specific creation timestamp for the model artifact
// built from a directory. When not set, the current time (time.Now()) is used.
// This is useful for producing deterministic OCI digests when the same directory
// content should always yield the same artifact regardless of when it was built.
func WithCreated(t time.Time) DirectoryOption {
return func(opts *DirectoryOptions) {
opts.Created = &t
}
}

}

// FromDirectory creates a Builder from a directory containing model files.
// It recursively scans the directory and adds each non-hidden file as a separate layer.
// Each layer's filepath annotation preserves the relative path from the directory root.
Expand Down Expand Up @@ -190,8 +205,15 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) {
// TODO: Extract additional metadata from weight files if needed
// For safetensors, we might want to read config.json from the directory

// Use the provided creation time, or fall back to current time
var created time.Time
if options.Created != nil {
created = *options.Created
} else {
created = time.Now()
}

// Build the model with V0.2 config (layer-per-file with annotations)
created := time.Now()
mdl := &partial.BaseModel{
ModelConfigFile: types.ConfigFile{
Config: config,
Expand Down
48 changes: 48 additions & 0 deletions pkg/distribution/builder/from_directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,52 @@ func (m *mockFileInfo) ModTime() time.Time { return time.Time{} }
func (m *mockFileInfo) IsDir() bool { return m.isDir }
func (m *mockFileInfo) Sys() interface{} { return nil }

func TestFromDirectoryWithCreatedTime(t *testing.T) {
// Create a temporary directory with a safetensors file and a config
tmpDir := t.TempDir()
createTestFile(t, tmpDir, "model.safetensors", "fake safetensors content")
createTestFile(t, tmpDir, "config.json", `{"model_type": "test"}`)

// Build twice with the same fixed timestamp
fixedTime := time.Date(2024, 1, 15, 10, 30, 0, 0, time.UTC)

b1, err := FromDirectory(tmpDir, WithCreatedTime(fixedTime))
if err != nil {
t.Fatalf("First FromDirectory failed: %v", err)
}
digest1, err := b1.Model().Digest()
if err != nil {
t.Fatalf("First digest failed: %v", err)
}

b2, err := FromDirectory(tmpDir, WithCreatedTime(fixedTime))
if err != nil {
t.Fatalf("Second FromDirectory failed: %v", err)
}
digest2, err := b2.Model().Digest()
if err != nil {
t.Fatalf("Second digest failed: %v", err)
}

// Same content + same timestamp = same digest
if digest1 != digest2 {
t.Errorf("Expected identical digests with same timestamp, got %v != %v", digest1, digest2)
}

// Build with a different timestamp - should produce a different digest
differentTime := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC)
b3, err := FromDirectory(tmpDir, WithCreatedTime(differentTime))
if err != nil {
t.Fatalf("Third FromDirectory failed: %v", err)
}
digest3, err := b3.Model().Digest()
if err != nil {
t.Fatalf("Third digest failed: %v", err)
}

if digest1 == digest3 {
t.Errorf("Expected different digests with different timestamps, but both were %v", digest1)
}
}

// Need to import time for mockFileInfo
42 changes: 42 additions & 0 deletions pkg/distribution/huggingface/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"path"
"strings"
"time"
)

const (
Expand Down Expand Up @@ -174,6 +176,46 @@ func (c *Client) DownloadFile(ctx context.Context, repo, revision, filename stri
return resp.Body, resp.ContentLength, nil
}

// RepoInfo contains metadata about a HuggingFace repository
type RepoInfo struct {
LastModified time.Time `json:"lastModified"`
}

// GetRepoInfo fetches repository metadata from the HuggingFace API.
// This returns information such as the last modified timestamp, which is useful
// for producing deterministic OCI digests.
func (c *Client) GetRepoInfo(ctx context.Context, repo, revision string) (*RepoInfo, error) {
if revision == "" {
revision = "main"
}

reqURL := fmt.Sprintf("%s/api/models/%s/revision/%s", c.baseURL, repo, url.PathEscape(revision))

req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, http.NoBody)
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}

c.setHeaders(req)

resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("get repo info: %w", err)
}
defer resp.Body.Close()

if err := c.checkResponse(resp, repo); err != nil {
return nil, err
}

var info RepoInfo
if err := json.NewDecoder(resp.Body).Decode(&info); err != nil {
return nil, fmt.Errorf("decode response: %w", err)
}

return &info, nil
}

// setHeaders sets common headers for HuggingFace API requests
func (c *Client) setHeaders(req *http.Request) {
req.Header.Set("User-Agent", c.userAgent)
Expand Down
Loading