mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-09 12:03:15 +00:00
feat(llama.cpp): estimate vram usage (#5299)
Some checks failed
Explorer deployment / build-linux (push) Has been cancelled
GPU tests / ubuntu-latest (1.21.x) (push) Has been cancelled
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Has been cancelled
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-core) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f16) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f32) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda11) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda12) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-core) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , true, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, ) (push) Has been cancelled
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, -core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan-core) (push) Has been cancelled
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64-core, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64-core) (push) Has been cancelled
Security Scan / tests (push) Has been cancelled
Tests extras backends / tests-transformers (push) Has been cancelled
Tests extras backends / tests-rerankers (push) Has been cancelled
Tests extras backends / tests-diffusers (push) Has been cancelled
Tests extras backends / tests-coqui (push) Has been cancelled
tests / tests-linux (1.21.x) (push) Has been cancelled
tests / tests-aio-container (push) Has been cancelled
tests / tests-apple (1.21.x) (push) Has been cancelled
Update swagger / swagger (push) Has been cancelled
Check if checksums are up-to-date / checksum_check (push) Has been cancelled
Bump dependencies / bump (mudler/LocalAI) (push) Has been cancelled
Bump dependencies / bump (main, PABannier/bark.cpp, BARKCPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggerganov/whisper.cpp, WHISPER_CPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggml-org/llama.cpp, CPPLLAMA_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, leejet/stable-diffusion.cpp, STABLEDIFFUSION_GGML_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-piper, PIPER_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-stable-diffusion, STABLEDIFFUSION_VERSION) (push) Has been cancelled
generate and publish GRPC docker caches / generate_caches (ubuntu:22.04, linux/amd64,linux/arm64, arc-runner-set) (push) Has been cancelled
Some checks failed
Explorer deployment / build-linux (push) Has been cancelled
GPU tests / ubuntu-latest (1.21.x) (push) Has been cancelled
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Has been cancelled
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-core) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f16) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f32) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda11) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda12) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-core) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , true, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, ) (push) Has been cancelled
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, -core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan-core) (push) Has been cancelled
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64-core, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64-core) (push) Has been cancelled
Security Scan / tests (push) Has been cancelled
Tests extras backends / tests-transformers (push) Has been cancelled
Tests extras backends / tests-rerankers (push) Has been cancelled
Tests extras backends / tests-diffusers (push) Has been cancelled
Tests extras backends / tests-coqui (push) Has been cancelled
tests / tests-linux (1.21.x) (push) Has been cancelled
tests / tests-aio-container (push) Has been cancelled
tests / tests-apple (1.21.x) (push) Has been cancelled
Update swagger / swagger (push) Has been cancelled
Check if checksums are up-to-date / checksum_check (push) Has been cancelled
Bump dependencies / bump (mudler/LocalAI) (push) Has been cancelled
Bump dependencies / bump (main, PABannier/bark.cpp, BARKCPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggerganov/whisper.cpp, WHISPER_CPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggml-org/llama.cpp, CPPLLAMA_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, leejet/stable-diffusion.cpp, STABLEDIFFUSION_GGML_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-piper, PIPER_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-stable-diffusion, STABLEDIFFUSION_VERSION) (push) Has been cancelled
generate and publish GRPC docker caches / generate_caches (ubuntu:22.04, linux/amd64,linux/arm64, arc-runner-set) (push) Has been cancelled
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
bace6516f1
commit
5c6cd50ed6
@ -7,11 +7,11 @@ import (
|
|||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
|
gguf "github.com/gpustack/gguf-parser-go"
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/gallery"
|
"github.com/mudler/LocalAI/core/gallery"
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
gguf "github.com/thxcode/gguf-parser-go"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type UtilCMD struct {
|
type UtilCMD struct {
|
||||||
@ -51,7 +51,7 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
|||||||
log.Info().
|
log.Info().
|
||||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||||
Any("modelName", f.Model().Name).
|
Any("modelName", f.Metadata().Name).
|
||||||
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
|
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
|
||||||
|
|
||||||
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
|
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
|
||||||
|
@ -3,9 +3,10 @@ package config
|
|||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
gguf "github.com/thxcode/gguf-parser-go"
|
gguf "github.com/gpustack/gguf-parser-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type familyType uint8
|
type familyType uint8
|
||||||
@ -23,6 +24,7 @@ const (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
defaultContextSize = 1024
|
defaultContextSize = 1024
|
||||||
|
defaultNGPULayers = 99999999
|
||||||
)
|
)
|
||||||
|
|
||||||
type settingsConfig struct {
|
type settingsConfig struct {
|
||||||
@ -147,7 +149,7 @@ var knownTemplates = map[string]familyType{
|
|||||||
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||||
|
|
||||||
if defaultCtx == 0 && cfg.ContextSize == nil {
|
if defaultCtx == 0 && cfg.ContextSize == nil {
|
||||||
ctxSize := f.EstimateLLaMACppUsage().ContextSize
|
ctxSize := f.EstimateLLaMACppRun().ContextSize
|
||||||
if ctxSize > 0 {
|
if ctxSize > 0 {
|
||||||
cSize := int(ctxSize)
|
cSize := int(ctxSize)
|
||||||
cfg.ContextSize = &cSize
|
cfg.ContextSize = &cSize
|
||||||
@ -157,6 +159,46 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GPU options
|
||||||
|
if cfg.Options == nil {
|
||||||
|
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
|
||||||
|
cfg.Options = []string{"gpu"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// vram estimation
|
||||||
|
vram, err := xsysinfo.TotalAvailableVRAM()
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("guessDefaultsFromFile(TotalAvailableVRAM): %s", err)
|
||||||
|
} else {
|
||||||
|
estimate, err := xsysinfo.EstimateGGUFVRAMUsage(f, vram)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("guessDefaultsFromFile(EstimateGGUFVRAMUsage): %s", err)
|
||||||
|
} else {
|
||||||
|
if estimate.IsFullOffload {
|
||||||
|
log.Warn().Msgf("guessDefaultsFromFile: %s", "full offload is recommended")
|
||||||
|
}
|
||||||
|
|
||||||
|
if estimate.EstimatedVRAM > vram {
|
||||||
|
log.Warn().Msgf("guessDefaultsFromFile: %s", "estimated VRAM usage is greater than available VRAM")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.NGPULayers == nil && estimate.EstimatedLayers > 0 {
|
||||||
|
log.Debug().Msgf("guessDefaultsFromFile: %d layers estimated", estimate.EstimatedLayers)
|
||||||
|
cfg.NGPULayers = &estimate.EstimatedLayers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.NGPULayers == nil {
|
||||||
|
// we assume we want to offload all layers
|
||||||
|
defaultHigh := defaultNGPULayers
|
||||||
|
cfg.NGPULayers = &defaultHigh
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug().Any("NGPULayers", cfg.NGPULayers).Msgf("guessDefaultsFromFile: %s", "NGPULayers set")
|
||||||
|
|
||||||
|
// template estimations
|
||||||
if cfg.HasTemplate() {
|
if cfg.HasTemplate() {
|
||||||
// nothing to guess here
|
// nothing to guess here
|
||||||
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
||||||
@ -166,12 +208,12 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
|||||||
log.Debug().
|
log.Debug().
|
||||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||||
Any("modelName", f.Model().Name).
|
Any("modelName", f.Metadata().Name).
|
||||||
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
||||||
|
|
||||||
// guess the name
|
// guess the name
|
||||||
if cfg.Name == "" {
|
if cfg.Name == "" {
|
||||||
cfg.Name = f.Model().Name
|
cfg.Name = f.Metadata().Name
|
||||||
}
|
}
|
||||||
|
|
||||||
family := identifyFamily(f)
|
family := identifyFamily(f)
|
||||||
@ -207,6 +249,7 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
|||||||
cfg.TemplateConfig.JinjaTemplate = true
|
cfg.TemplateConfig.JinjaTemplate = true
|
||||||
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func identifyFamily(f *gguf.GGUFFile) familyType {
|
func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||||
@ -231,7 +274,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
|
|||||||
commandR := arch == "command-r" && eosTokenID == 255001
|
commandR := arch == "command-r" && eosTokenID == 255001
|
||||||
qwen2 := arch == "qwen2"
|
qwen2 := arch == "qwen2"
|
||||||
phi3 := arch == "phi-3"
|
phi3 := arch == "phi-3"
|
||||||
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
|
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma")
|
||||||
deepseek2 := arch == "deepseek2"
|
deepseek2 := arch == "deepseek2"
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
|
@ -4,9 +4,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
gguf "github.com/gpustack/gguf-parser-go"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
gguf "github.com/thxcode/gguf-parser-go"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
|
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
|
||||||
@ -36,10 +35,4 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int)
|
|||||||
}
|
}
|
||||||
cfg.ContextSize = &defaultCtx
|
cfg.ContextSize = &defaultCtx
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.Options == nil {
|
|
||||||
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
|
|
||||||
cfg.Options = []string{"gpu"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
6
go.mod
6
go.mod
@ -27,6 +27,7 @@ require (
|
|||||||
github.com/golang/protobuf v1.5.4
|
github.com/golang/protobuf v1.5.4
|
||||||
github.com/google/go-containerregistry v0.19.2
|
github.com/google/go-containerregistry v0.19.2
|
||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
|
github.com/gpustack/gguf-parser-go v0.17.0
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0
|
github.com/grpc-ecosystem/grpc-gateway v1.5.0
|
||||||
github.com/hpcloud/tail v1.0.0
|
github.com/hpcloud/tail v1.0.0
|
||||||
github.com/ipfs/go-log v1.0.5
|
github.com/ipfs/go-log v1.0.5
|
||||||
@ -110,6 +111,7 @@ require (
|
|||||||
github.com/pion/turn/v2 v2.1.6 // indirect
|
github.com/pion/turn/v2 v2.1.6 // indirect
|
||||||
github.com/pion/turn/v4 v4.0.0 // indirect
|
github.com/pion/turn/v4 v4.0.0 // indirect
|
||||||
github.com/pion/webrtc/v4 v4.0.9 // indirect
|
github.com/pion/webrtc/v4 v4.0.9 // indirect
|
||||||
|
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
|
||||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
|
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
|
||||||
github.com/shirou/gopsutil/v4 v4.24.7 // indirect
|
github.com/shirou/gopsutil/v4 v4.24.7 // indirect
|
||||||
github.com/wlynxg/anet v0.0.5 // indirect
|
github.com/wlynxg/anet v0.0.5 // indirect
|
||||||
@ -188,7 +190,7 @@ require (
|
|||||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||||
github.com/hashicorp/golang-lru v1.0.2 // indirect
|
github.com/hashicorp/golang-lru v1.0.2 // indirect
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||||
github.com/henvic/httpretty v0.1.3 // indirect
|
github.com/henvic/httpretty v0.1.4 // indirect
|
||||||
github.com/huandu/xstrings v1.5.0 // indirect
|
github.com/huandu/xstrings v1.5.0 // indirect
|
||||||
github.com/huin/goupnp v1.3.0 // indirect
|
github.com/huin/goupnp v1.3.0 // indirect
|
||||||
github.com/ipfs/boxo v0.27.4 // indirect
|
github.com/ipfs/boxo v0.27.4 // indirect
|
||||||
@ -278,7 +280,7 @@ require (
|
|||||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||||
github.com/shopspring/decimal v1.4.0 // indirect
|
github.com/shopspring/decimal v1.4.0 // indirect
|
||||||
github.com/sirupsen/logrus v1.9.3 // indirect
|
github.com/sirupsen/logrus v1.9.3 // indirect
|
||||||
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||||
github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
|
github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
|
||||||
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
||||||
github.com/spf13/cast v1.7.0 // indirect
|
github.com/spf13/cast v1.7.0 // indirect
|
||||||
|
12
go.sum
12
go.sum
@ -295,6 +295,8 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
|
|||||||
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
|
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
|
||||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.17.0 h1:DkSziWLsiQM0pqqkr/zMcaBn94KY7iQTi4zmaHixDus=
|
||||||
|
github.com/gpustack/gguf-parser-go v0.17.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
|
||||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
|
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
|
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
|
||||||
@ -307,8 +309,8 @@ github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iP
|
|||||||
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
|
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||||
github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8=
|
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||||
github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg=
|
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||||
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
||||||
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
||||||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
||||||
@ -660,6 +662,8 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc
|
|||||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||||
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
||||||
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
||||||
|
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8=
|
||||||
|
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA=
|
||||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||||
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
||||||
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||||
@ -712,8 +716,8 @@ github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic
|
|||||||
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c=
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||||
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||||
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
|
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
|
||||||
github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
|
github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
|
||||||
github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=
|
github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=
|
||||||
|
52
pkg/xsysinfo/gguf.go
Normal file
52
pkg/xsysinfo/gguf.go
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
package xsysinfo
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
gguf "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
type VRAMEstimate struct {
|
||||||
|
TotalVRAM uint64
|
||||||
|
AvailableVRAM uint64
|
||||||
|
ModelSize uint64
|
||||||
|
EstimatedLayers int
|
||||||
|
EstimatedVRAM uint64
|
||||||
|
IsFullOffload bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
|
||||||
|
// Get model metadata
|
||||||
|
m := f.Metadata()
|
||||||
|
a := f.Architecture()
|
||||||
|
|
||||||
|
// Calculate base model size
|
||||||
|
modelSize := uint64(m.Size)
|
||||||
|
|
||||||
|
if a.BlockCount == 0 {
|
||||||
|
return nil, errors.New("block count is 0")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimate number of layers that can fit in VRAM
|
||||||
|
// Each layer typically requires about 1/32 of the model size
|
||||||
|
layerSize := modelSize / uint64(a.BlockCount)
|
||||||
|
estimatedLayers := int(availableVRAM / layerSize)
|
||||||
|
|
||||||
|
// If we can't fit even one layer, we need to do full offload
|
||||||
|
isFullOffload := estimatedLayers <= 0
|
||||||
|
if isFullOffload {
|
||||||
|
estimatedLayers = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate estimated VRAM usage
|
||||||
|
estimatedVRAM := uint64(estimatedLayers) * layerSize
|
||||||
|
|
||||||
|
return &VRAMEstimate{
|
||||||
|
TotalVRAM: availableVRAM,
|
||||||
|
AvailableVRAM: availableVRAM,
|
||||||
|
ModelSize: modelSize,
|
||||||
|
EstimatedLayers: estimatedLayers,
|
||||||
|
EstimatedVRAM: estimatedVRAM,
|
||||||
|
IsFullOffload: isFullOffload,
|
||||||
|
}, nil
|
||||||
|
}
|
@ -16,6 +16,22 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
|
|||||||
return gpu.GraphicsCards, nil
|
return gpu.GraphicsCards, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TotalAvailableVRAM() (uint64, error) {
|
||||||
|
gpus, err := GPUs()
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var totalVRAM uint64
|
||||||
|
for _, gpu := range gpus {
|
||||||
|
if gpu.Node.Memory.TotalUsableBytes > 0 {
|
||||||
|
totalVRAM += uint64(gpu.Node.Memory.TotalUsableBytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalVRAM, nil
|
||||||
|
}
|
||||||
|
|
||||||
func HasGPU(vendor string) bool {
|
func HasGPU(vendor string) bool {
|
||||||
gpus, err := GPUs()
|
gpus, err := GPUs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user