Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 44 additions & 6 deletions core/gallery/backend_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package gallery

import (
"fmt"
"strings"

"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/system"
Expand Down Expand Up @@ -30,19 +31,56 @@ type GalleryBackend struct {
CapabilitiesMap map[string]string `json:"capabilities,omitempty" yaml:"capabilities,omitempty"`
}

// stripVersionSuffix removes version suffix from capability strings
// e.g., "nvidia-cuda-13" -> "nvidia", "cuda12-faster-whisper" -> "cuda"
func stripVersionSuffix(capability string) string {
// Strip suffixes like "-cuda-12", "-cuda-13" to get parent capability
if idx := strings.LastIndex(capability, "-cuda-"); idx != -1 {
return capability[:idx]
}
// Also handle other version suffixes like "-v1", "-v2"
if idx := strings.LastIndex(capability, "-v"); idx != -1 && idx > 0 {
return capability[:idx]
}
return ""
}

func (backend *GalleryBackend) FindBestBackendFromMeta(systemState *system.SystemState, backends GalleryElements[*GalleryBackend]) *GalleryBackend {
if systemState == nil {
return nil
}

realBackend := backend.CapabilitiesMap[systemState.Capability(backend.CapabilitiesMap)]
if realBackend == "" {
xlog.Debug("No backend found for reported capability", "backend", backend.Name, "reportedCapability", systemState.Capability(backend.CapabilitiesMap))
return nil
// Try exact capability match first
capability := systemState.Capability(backend.CapabilitiesMap)
realBackend := backend.CapabilitiesMap[capability]

// Try to find the backend with exact match
if result := backends.FindByName(realBackend); result != nil {
return result
}

// Fallback: try parent capability (strip version suffix)
if parentCapability := stripVersionSuffix(capability); parentCapability != "" {
if parentBackend := backend.CapabilitiesMap[parentCapability]; parentBackend != "" {
if result := backends.FindByName(parentBackend); result != nil {
xlog.Debug("Using parent capability fallback", "backend", backend.Name,
"originalCapability", capability, "parentCapability", parentCapability)
return result
}
}
}

// Final fallback: try "default" capability
if defaultBackend := backend.CapabilitiesMap["default"]; defaultBackend != "" {
if result := backends.FindByName(defaultBackend); result != nil {
xlog.Debug("Using default capability fallback", "backend", backend.Name,
"originalCapability", capability)
return result
}
}

xlog.Debug("Found backend for reported capability", "backend", backend.Name, "reportedCapability", systemState.Capability(backend.CapabilitiesMap))
return backends.FindByName(realBackend)
xlog.Debug("No backend found for reported capability", "backend", backend.Name, "reportedCapability", capability)
return nil
}

func (m *GalleryBackend) GetInstalled() bool {
Expand Down
128 changes: 128 additions & 0 deletions core/gallery/backends_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1025,3 +1025,131 @@ var _ = Describe("Gallery Backends", func() {
})
})
})

var _ = Describe("FindBestBackendFromMeta fallback logic", func() {
It("should fallback to parent capability when exact match not found", func() {
// Simulate the issue: system has nvidia-cuda-13 but gallery only has cuda12
metaBackend := &GalleryBackend{
Metadata: Metadata{
Name: "faster-whisper",
},
CapabilitiesMap: map[string]string{
"nvidia-cuda-13": "cuda13-faster-whisper",
"nvidia-cuda-12": "cuda12-faster-whisper",
"nvidia": "cuda-faster-whisper",
"default": "cpu-faster-whisper",
},
}

cuda13Backend := &GalleryBackend{
Metadata: Metadata{
Name: "cuda13-faster-whisper",
},
URI: testImage,
}

cuda12Backend := &GalleryBackend{
Metadata: Metadata{
Name: "cuda12-faster-whisper",
},
URI: testImage,
}

cudaBackend := &GalleryBackend{
Metadata: Metadata{
Name: "cuda-faster-whisper",
},
URI: testImage,
}

cpuBackend := &GalleryBackend{
Metadata: Metadata{
Name: "cpu-faster-whisper",
},
URI: testImage,
}

// Test case 1: exact match - nvidia-cuda-12 -> cuda12-faster-whisper
backends := GalleryElements[*GalleryBackend]{cuda13Backend, cuda12Backend, cudaBackend, cpuBackend}
systemState := &system.SystemState{GPUVendor: "nvidia", CUDAGPUModel: "A100", CUDAMajor: 13, VRAM: 8 * 1024 * 1024 * 1024}

bestBackend := metaBackend.FindBestBackendFromMeta(systemState, backends)
// Should find cuda13 exact match
Expect(bestBackend).To(Equal(cuda13Backend))

// Test case 2: no exact match, fallback to parent (nvidia-cuda-13 -> nvidia -> cuda)
// Remove cuda13 from backends to simulate the issue
backendsNoCuda13 := GalleryElements[*GalleryBackend]{cuda12Backend, cudaBackend, cpuBackend}
bestBackend = metaBackend.FindBestBackendFromMeta(systemState, backendsNoCuda13)
// Should fallback to cuda12 (parent of nvidia-cuda-13 is nvidia, but we have nvidia-cuda-12)
Expect(bestBackend).To(Equal(cuda12Backend))

// Test case 3: no parent match, fallback to default
backendsNoMatch := GalleryElements[*GalleryBackend]{cpuBackend}
bestBackend = metaBackend.FindBestBackendFromMeta(systemState, backendsNoMatch)
// Should fallback to default
Expect(bestBackend).To(Equal(cpuBackend))
})

It("should return nil when no fallback available", func() {
metaBackend := &GalleryBackend{
Metadata: Metadata{
Name: "faster-whisper",
},
CapabilitiesMap: map[string]string{
"nvidia-cuda-13": "cuda13-faster-whisper",
"default": "cpu-faster-whisper",
},
}

// Only have a different backend
otherBackend := &GalleryBackend{
Metadata: Metadata{
Name: "other-backend",
},
URI: testImage,
}

backends := GalleryElements[*GalleryBackend]{otherBackend}
systemState := &system.SystemState{GPUVendor: "nvidia", CUDAMajor: 13, VRAM: 8 * 1024 * 1024 * 1024}

bestBackend := metaBackend.FindBestBackendFromMeta(systemState, backends)
Expect(bestBackend).To(BeNil())
})

It("should handle stripVersionSuffix correctly", func() {
// Test the stripVersionSuffix function via integration
metaBackend := &GalleryBackend{
Metadata: Metadata{
Name: "test-backend",
},
CapabilitiesMap: map[string]string{
"nvidia-cuda-13": "cuda13",
"nvidia": "cuda",
"default": "cpu",
},
}

cudaBackend := &GalleryBackend{
Metadata: Metadata{
Name: "cuda",
},
URI: testImage,
}

cpuBackend := &GalleryBackend{
Metadata: Metadata{
Name: "cpu",
},
URI: testImage,
}

backends := GalleryElements[*GalleryBackend]{cudaBackend, cpuBackend}

// Test with cuda-13 capability that doesn't have exact match
systemState := &system.SystemState{GPUVendor: "nvidia", CUDAMajor: 13, VRAM: 8 * 1024 * 1024 * 1024}
bestBackend := metaBackend.FindBestBackendFromMeta(systemState, backends)
// Should fallback to "nvidia" -> "cuda"
Expect(bestBackend).To(Equal(cudaBackend))
})
})
Loading