mirror of
https://github.com/mudler/LocalAI.git
synced 2025-01-04 20:04:09 +00:00
133987b1fb
* start by checking /scan during the checksum update Signed-off-by: Dave Lee <dave@gray101.com> * add back in golang side features: downloader/uri gets struct and scan function, gallery uses it, and secscan/models calls it. Signed-off-by: Dave Lee <dave@gray101.com> * add a param to scan specific urls - useful for debugging Signed-off-by: Dave Lee <dave@gray101.com> * helpful printouts Signed-off-by: Dave Lee <dave@gray101.com> * fix offsets Signed-off-by: Dave Lee <dave@gray101.com> * fix error and naming Signed-off-by: Dave Lee <dave@gray101.com> * expose error Signed-off-by: Dave Lee <dave@gray101.com> * fix json tags Signed-off-by: Dave Lee <dave@gray101.com> * slight wording change Signed-off-by: Dave Lee <dave@gray101.com> * go mod tidy - getting warnings Signed-off-by: Dave Lee <dave@gray101.com> * split out python to make editing easier, add some simple code to delete contaminated entries from gallery Signed-off-by: Dave Lee <dave@gray101.com> * o7 to my favorite part of our old name, go-skynet Signed-off-by: Dave Lee <dave@gray101.com> * merge fix Signed-off-by: Dave Lee <dave@gray101.com> * merge fix Signed-off-by: Dave Lee <dave@gray101.com> * merge fix Signed-off-by: Dave Lee <dave@gray101.com> * address review comments Signed-off-by: Dave Lee <dave@gray101.com> * forgot secscan could accept multiple URL at once Signed-off-by: Dave Lee <dave@gray101.com> * invert naming and actually use it Signed-off-by: Dave Lee <dave@gray101.com> * missed cli/models.go Signed-off-by: Dave Lee <dave@gray101.com> * Update .github/check_and_update.py Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Signed-off-by: Dave <dave@gray101.com> --------- Signed-off-by: Dave Lee <dave@gray101.com> Signed-off-by: Dave <dave@gray101.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
102 lines
3.8 KiB
Go
102 lines
3.8 KiB
Go
package cli
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
|
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/core/gallery"
|
|
"github.com/mudler/LocalAI/pkg/downloader"
|
|
gguf "github.com/thxcode/gguf-parser-go"
|
|
)
|
|
|
|
type UtilCMD struct {
|
|
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
|
|
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
|
|
}
|
|
|
|
type GGUFInfoCMD struct {
|
|
Args []string `arg:"" optional:"" name:"args" help:"Arguments to pass to the utility command"`
|
|
Header bool `optional:"" default:"false" name:"header" help:"Show header information"`
|
|
}
|
|
|
|
type HFScanCMD struct {
|
|
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
|
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
|
ToScan []string `arg:""`
|
|
}
|
|
|
|
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
|
if u.Args == nil || len(u.Args) == 0 {
|
|
return fmt.Errorf("no GGUF file provided")
|
|
}
|
|
// We try to guess only if we don't have a template defined already
|
|
f, err := gguf.ParseGGUFFile(u.Args[0])
|
|
if err != nil {
|
|
// Only valid for gguf files
|
|
log.Error().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
|
|
return err
|
|
}
|
|
|
|
log.Info().
|
|
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
|
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
|
Any("modelName", f.Model().Name).
|
|
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
|
|
|
|
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
|
|
log.Info().Any("architecture", fmt.Sprintf("%+v", f.Architecture())).Msg("Architecture")
|
|
|
|
v, exists := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
|
if exists {
|
|
log.Info().Msgf("chat_template: %s", v.ValueString())
|
|
}
|
|
|
|
if u.Header {
|
|
for _, metadata := range f.Header.MetadataKV {
|
|
log.Info().Msgf("%s: %+v", metadata.Key, metadata.Value)
|
|
}
|
|
// log.Info().Any("header", fmt.Sprintf("%+v", f.Header)).Msg("Header")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
|
|
log.Info().Msg("LocalAI Security Scanner - This is BEST EFFORT functionality! Currently limited to huggingface models!")
|
|
if len(hfscmd.ToScan) == 0 {
|
|
log.Info().Msg("Checking all installed models against galleries")
|
|
var galleries []config.Gallery
|
|
if err := json.Unmarshal([]byte(hfscmd.Galleries), &galleries); err != nil {
|
|
log.Error().Err(err).Msg("unable to load galleries")
|
|
}
|
|
|
|
err := gallery.SafetyScanGalleryModels(galleries, hfscmd.ModelsPath)
|
|
if err == nil {
|
|
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
|
|
} else {
|
|
log.Error().Err(err).Msg("! WARNING ! A known-vulnerable model is installed!")
|
|
}
|
|
return err
|
|
} else {
|
|
var errs error = nil
|
|
for _, uri := range hfscmd.ToScan {
|
|
log.Info().Str("uri", uri).Msg("scanning specific uri")
|
|
scanResults, err := downloader.HuggingFaceScan(uri)
|
|
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
|
|
log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
|
|
errs = errors.Join(errs, err)
|
|
}
|
|
}
|
|
if errs != nil {
|
|
return errs
|
|
}
|
|
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
|
|
return nil
|
|
}
|
|
}
|