diff --git a/core/cli/util.go b/core/cli/util.go
index 57b8ad9e..5802d996 100644
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -7,11 +7,11 @@ import (
 
 	"github.com/rs/zerolog/log"
 
+	gguf "github.com/gpustack/gguf-parser-go"
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/pkg/downloader"
-	gguf "github.com/thxcode/gguf-parser-go"
 )
 
 type UtilCMD struct {
@@ -51,7 +51,7 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
 	log.Info().
 		Any("eosTokenID", f.Tokenizer().EOSTokenID).
 		Any("bosTokenID", f.Tokenizer().BOSTokenID).
-		Any("modelName", f.Model().Name).
+		Any("modelName", f.Metadata().Name).
 		Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
 
 	log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
diff --git a/core/config/gguf.go b/core/config/gguf.go
index cf9eacaa..1c8db29c 100644
--- a/core/config/gguf.go
+++ b/core/config/gguf.go
@@ -3,9 +3,10 @@ package config
 import (
 	"strings"
 
+	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 
-	gguf "github.com/thxcode/gguf-parser-go"
+	gguf "github.com/gpustack/gguf-parser-go"
 )
 
 type familyType uint8
@@ -23,6 +24,7 @@ const (
 
 const (
 	defaultContextSize = 1024
+	defaultNGPULayers  = 99999999
 )
 
 type settingsConfig struct {
@@ -147,7 +149,7 @@ var knownTemplates = map[string]familyType{
 func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
 
 	if defaultCtx == 0 && cfg.ContextSize == nil {
-		ctxSize := f.EstimateLLaMACppUsage().ContextSize
+		ctxSize := f.EstimateLLaMACppRun().ContextSize
 		if ctxSize > 0 {
 			cSize := int(ctxSize)
 			cfg.ContextSize = &cSize
@@ -157,6 +159,46 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
 		}
 	}
 
+	// GPU options
+	if cfg.Options == nil {
+		if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
+			cfg.Options = []string{"gpu"}
+		}
+	}
+
+	// vram estimation
+	vram, err := xsysinfo.TotalAvailableVRAM()
+	if err != nil {
+		log.Error().Msgf("guessDefaultsFromFile(TotalAvailableVRAM): %s", err)
+	} else {
+		estimate, err := xsysinfo.EstimateGGUFVRAMUsage(f, vram)
+		if err != nil {
+			log.Error().Msgf("guessDefaultsFromFile(EstimateGGUFVRAMUsage): %s", err)
+		} else {
+			if estimate.IsFullOffload {
+				log.Warn().Msgf("guessDefaultsFromFile: %s", "full offload is recommended")
+			}
+
+			if estimate.EstimatedVRAM > vram {
+				log.Warn().Msgf("guessDefaultsFromFile: %s", "estimated VRAM usage is greater than available VRAM")
+			}
+
+			if cfg.NGPULayers == nil && estimate.EstimatedLayers > 0 {
+				log.Debug().Msgf("guessDefaultsFromFile: %d layers estimated", estimate.EstimatedLayers)
+				cfg.NGPULayers = &estimate.EstimatedLayers
+			}
+		}
+	}
+
+	if cfg.NGPULayers == nil {
+		// we assume we want to offload all layers
+		defaultHigh := defaultNGPULayers
+		cfg.NGPULayers = &defaultHigh
+	}
+
+	log.Debug().Any("NGPULayers", cfg.NGPULayers).Msgf("guessDefaultsFromFile: %s", "NGPULayers set")
+
+	// template estimations
 	if cfg.HasTemplate() {
 		// nothing to guess here
 		log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
@@ -166,12 +208,12 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
 	log.Debug().
 		Any("eosTokenID", f.Tokenizer().EOSTokenID).
 		Any("bosTokenID", f.Tokenizer().BOSTokenID).
-		Any("modelName", f.Model().Name).
+		Any("modelName", f.Metadata().Name).
 		Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
 
 	// guess the name
 	if cfg.Name == "" {
-		cfg.Name = f.Model().Name
+		cfg.Name = f.Metadata().Name
 	}
 
 	family := identifyFamily(f)
@@ -207,6 +249,7 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
 		cfg.TemplateConfig.JinjaTemplate = true
 		cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
 	}
+
 }
 
 func identifyFamily(f *gguf.GGUFFile) familyType {
@@ -231,7 +274,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
 	commandR := arch == "command-r" && eosTokenID == 255001
 	qwen2 := arch == "qwen2"
 	phi3 := arch == "phi-3"
-	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
+	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma")
 	deepseek2 := arch == "deepseek2"
 
 	switch {
diff --git a/core/config/guesser.go b/core/config/guesser.go
index e66df70d..260f5a64 100644
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -4,9 +4,8 @@ import (
 	"os"
 	"path/filepath"
 
-	"github.com/mudler/LocalAI/pkg/xsysinfo"
+	gguf "github.com/gpustack/gguf-parser-go"
 	"github.com/rs/zerolog/log"
-	gguf "github.com/thxcode/gguf-parser-go"
 )
 
 func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
@@ -36,10 +35,4 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int)
 		}
 		cfg.ContextSize = &defaultCtx
 	}
-
-	if cfg.Options == nil {
-		if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
-			cfg.Options = []string{"gpu"}
-		}
-	}
 }
diff --git a/go.mod b/go.mod
index 856d41f5..757376ab 100644
--- a/go.mod
+++ b/go.mod
@@ -27,6 +27,7 @@ require (
 	github.com/golang/protobuf v1.5.4
 	github.com/google/go-containerregistry v0.19.2
 	github.com/google/uuid v1.6.0
+	github.com/gpustack/gguf-parser-go v0.17.0
 	github.com/grpc-ecosystem/grpc-gateway v1.5.0
 	github.com/hpcloud/tail v1.0.0
 	github.com/ipfs/go-log v1.0.5
@@ -110,6 +111,7 @@ require (
 	github.com/pion/turn/v2 v2.1.6 // indirect
 	github.com/pion/turn/v4 v4.0.0 // indirect
 	github.com/pion/webrtc/v4 v4.0.9 // indirect
+	github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
 	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
 	github.com/shirou/gopsutil/v4 v4.24.7 // indirect
 	github.com/wlynxg/anet v0.0.5 // indirect
@@ -188,7 +190,7 @@ require (
 	github.com/hashicorp/go-multierror v1.1.1 // indirect
 	github.com/hashicorp/golang-lru v1.0.2 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
-	github.com/henvic/httpretty v0.1.3 // indirect
+	github.com/henvic/httpretty v0.1.4 // indirect
 	github.com/huandu/xstrings v1.5.0 // indirect
 	github.com/huin/goupnp v1.3.0 // indirect
 	github.com/ipfs/boxo v0.27.4 // indirect
@@ -278,7 +280,7 @@ require (
 	github.com/shoenig/go-m1cpu v0.1.6 // indirect
 	github.com/shopspring/decimal v1.4.0 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
-	github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
+	github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
 	github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
 	github.com/spf13/cast v1.7.0 // indirect
diff --git a/go.sum b/go.sum
index 06e0238f..aad5d177 100644
--- a/go.sum
+++ b/go.sum
@@ -295,6 +295,8 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
 github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
 github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
 github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/gpustack/gguf-parser-go v0.17.0 h1:DkSziWLsiQM0pqqkr/zMcaBn94KY7iQTi4zmaHixDus=
+github.com/gpustack/gguf-parser-go v0.17.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
 github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
 github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
@@ -307,8 +309,8 @@ github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iP
 github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
-github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8=
-github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg=
+github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
+github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
@@ -660,6 +662,8 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
 github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8=
+github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA=
 github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
 github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
@@ -712,8 +716,8 @@ github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic
 github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c=
-github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
+github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
+github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
 github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
 github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
 github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=
diff --git a/pkg/xsysinfo/gguf.go b/pkg/xsysinfo/gguf.go
new file mode 100644
index 00000000..d5c8b5bd
--- /dev/null
+++ b/pkg/xsysinfo/gguf.go
@@ -0,0 +1,52 @@
+package xsysinfo
+
+import (
+	"errors"
+
+	gguf "github.com/gpustack/gguf-parser-go"
+)
+
+type VRAMEstimate struct {
+	TotalVRAM       uint64
+	AvailableVRAM   uint64
+	ModelSize       uint64
+	EstimatedLayers int
+	EstimatedVRAM   uint64
+	IsFullOffload   bool
+}
+
+func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
+	// Get model metadata
+	m := f.Metadata()
+	a := f.Architecture()
+
+	// Calculate base model size
+	modelSize := uint64(m.Size)
+
+	if a.BlockCount == 0 {
+		return nil, errors.New("block count is 0")
+	}
+
+	// Estimate number of layers that can fit in VRAM
+	// Each layer typically requires about 1/32 of the model size
+	layerSize := modelSize / uint64(a.BlockCount)
+	estimatedLayers := int(availableVRAM / layerSize)
+
+	// If we can't fit even one layer, we need to do full offload
+	isFullOffload := estimatedLayers <= 0
+	if isFullOffload {
+		estimatedLayers = 0
+	}
+
+	// Calculate estimated VRAM usage
+	estimatedVRAM := uint64(estimatedLayers) * layerSize
+
+	return &VRAMEstimate{
+		TotalVRAM:       availableVRAM,
+		AvailableVRAM:   availableVRAM,
+		ModelSize:       modelSize,
+		EstimatedLayers: estimatedLayers,
+		EstimatedVRAM:   estimatedVRAM,
+		IsFullOffload:   isFullOffload,
+	}, nil
+}
diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go
index a692c775..9a70e17b 100644
--- a/pkg/xsysinfo/gpu.go
+++ b/pkg/xsysinfo/gpu.go
@@ -16,6 +16,22 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
 	return gpu.GraphicsCards, nil
 }
 
+func TotalAvailableVRAM() (uint64, error) {
+	gpus, err := GPUs()
+	if err != nil {
+		return 0, err
+	}
+
+	var totalVRAM uint64
+	for _, gpu := range gpus {
+		if gpu.Node.Memory.TotalUsableBytes > 0 {
+			totalVRAM += uint64(gpu.Node.Memory.TotalUsableBytes)
+		}
+	}
+
+	return totalVRAM, nil
+}
+
 func HasGPU(vendor string) bool {
 	gpus, err := GPUs()
 	if err != nil {