Skip to content

feat(llama.cpp/clip): inject gpu options if we detect GPUs #5243

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions core/config/guesser.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"os"
"path/filepath"

"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log"
gguf "github.com/thxcode/gguf-parser-go"
)
Expand Down Expand Up @@ -35,4 +36,10 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int)
}
cfg.ContextSize = &defaultCtx
}

if cfg.Options == nil {
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
cfg.Options = []string{"gpu"}
}
}
}
60 changes: 16 additions & 44 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,6 @@ func orderBackends(backends map[string][]string) ([]string, error) {
// selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities
// Note: this is now relevant only for llama.cpp
func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string {
foundCUDA := false
foundAMDGPU := false
foundIntelGPU := false
var grpcProcess string

// Select backend now just for llama.cpp
if backend != LLamaCPP {
Expand All @@ -198,48 +194,24 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
}

// Check for GPU-binaries that are shipped with single binary releases
gpus, err := xsysinfo.GPUs()
if err == nil {
for _, gpu := range gpus {
if strings.Contains(gpu.String(), "nvidia") {
p := backendPath(assetDir, LLamaCPPCUDA)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
grpcProcess = p
foundCUDA = true
} else {
log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
}
}
if strings.Contains(gpu.String(), "amd") {
p := backendPath(assetDir, LLamaCPPHipblas)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
grpcProcess = p
foundAMDGPU = true
} else {
log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
}
}
if strings.Contains(gpu.String(), "intel") {
backend := LLamaCPPSycl16
if !f16 {
backend = LLamaCPPSycl32
}
p := backendPath(assetDir, backend)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with Intel variant", backend)
grpcProcess = p
foundIntelGPU = true
} else {
log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
}
}
}
gpuBinaries := map[string]string{
"nvidia": LLamaCPPCUDA,
"amd": LLamaCPPHipblas,
"intel": LLamaCPPSycl16,
}

if !f16 {
gpuBinaries["intel"] = LLamaCPPSycl32
}

if foundCUDA || foundAMDGPU || foundIntelGPU {
return grpcProcess
for vendor, binary := range gpuBinaries {
if xsysinfo.HasGPU(vendor) {
p := backendPath(assetDir, binary)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with %s variant (vendor: %s)", backend, binary, vendor)
return p
}
}
}

// No GPU found or no specific binaries found, try to load the CPU variant(s)
Expand Down
18 changes: 18 additions & 0 deletions pkg/xsysinfo/gpu.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package xsysinfo

import (
"strings"

"github.com/jaypipes/ghw"
"github.com/jaypipes/ghw/pkg/gpu"
)
Expand All @@ -13,3 +15,19 @@ func GPUs() ([]*gpu.GraphicsCard, error) {

return gpu.GraphicsCards, nil
}

func HasGPU(vendor string) bool {
gpus, err := GPUs()
if err != nil {
return false
}
if vendor == "" {
return len(gpus) > 0
}
for _, gpu := range gpus {
if strings.Contains(gpu.String(), vendor) {
return true
}
}
return false
}
Loading