LocalAI/core/cli/soundgeneration.go

package cli

import (
	"context"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/mudler/LocalAI/core/backend"
	cliContext "github.com/mudler/LocalAI/core/cli/context"
	"github.com/mudler/LocalAI/core/config"
	"github.com/mudler/LocalAI/pkg/model"
	"github.com/rs/zerolog/log"
)

type SoundGenerationCMD struct {
	Text []string `arg:""`

	Backend                string   `short:"b" required:"" help:"Backend to run the SoundGeneration model"`
	Model                  string   `short:"m" required:"" help:"Model name to run the SoundGeneration"`
	Duration               string   `short:"d" help:"If specified, the length of audio to generate in seconds"`
	Temperature            string   `short:"t" help:"If specified, the temperature of the generation"`
	InputFile              string   `short:"i" help:"If specified, the input file to condition generation upon"`
	InputFileSampleDivisor string   `short:"f" help:"If InputFile and this divisor is specified, the first portion of the sample file will be used"`
	DoSample               bool     `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
	OutputFile             string   `short:"o" type:"path" help:"The path to write the output wav file"`
	ModelsPath             string   `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
	BackendAssetsPath      string   `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
	ExternalGRPCBackends   []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
}

func parseToFloat32Ptr(input string) *float32 {
	f, err := strconv.ParseFloat(input, 32)
	if err != nil {
		return nil
	}
	f2 := float32(f)
	return &f2
}

func parseToInt32Ptr(input string) *int32 {
	i, err := strconv.ParseInt(input, 10, 32)
	if err != nil {
		return nil
	}
	i2 := int32(i)
	return &i2
}

func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
	outputFile := t.OutputFile
	outputDir := t.BackendAssetsPath
	if outputFile != "" {
		outputDir = filepath.Dir(outputFile)
	}

	text := strings.Join(t.Text, " ")

	externalBackends := make(map[string]string)
	// split ":" to get backend name and the uri
	for _, v := range t.ExternalGRPCBackends {
		backend := v[:strings.IndexByte(v, ':')]
		uri := v[strings.IndexByte(v, ':')+1:]
		externalBackends[backend] = uri
		fmt.Printf("TMP externalBackends[%q]=%q\n\n", backend, uri)
	}

	opts := &config.ApplicationConfig{
		ModelPath:            t.ModelsPath,
		Context:              context.Background(),
		AudioDir:             outputDir,
		AssetsDestination:    t.BackendAssetsPath,
		ExternalGRPCBackends: externalBackends,
	}
	ml := model.NewModelLoader(opts.ModelPath)

	defer func() {
		err := ml.StopAllGRPC()
		if err != nil {
			log.Error().Err(err).Msg("unable to stop all grpc processes")
		}
	}()

	options := config.BackendConfig{}
	options.SetDefaults()
	options.Backend = t.Backend

	var inputFile *string
	if t.InputFile != "" {
		inputFile = &t.InputFile
	}

	filePath, _, err := backend.SoundGeneration(t.Model, text,
		parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
		inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)

	if err != nil {
		return err
	}
	if outputFile != "" {
		if err := os.Rename(filePath, outputFile); err != nil {
			return err
		}
		fmt.Printf("Generate file %s\n", outputFile)
	} else {
		fmt.Printf("Generate file %s\n", filePath)
	}
	return nil
}
feat: elevenlabs `sound-generation` api (#3355) * initial version of elevenlabs compatible soundgeneration api and cli command Signed-off-by: Dave Lee <dave@gray101.com> * minor cleanup Signed-off-by: Dave Lee <dave@gray101.com> * restore TTS, add test Signed-off-by: Dave Lee <dave@gray101.com> * remove stray s Signed-off-by: Dave Lee <dave@gray101.com> * fix Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Dave Lee <dave@gray101.com> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> 2024-08-24 00:20:28 +00:00			`package cli`

			`import (`
			`"context"`
			`"fmt"`
			`"os"`
			`"path/filepath"`
			`"strconv"`
			`"strings"`

			`"github.com/mudler/LocalAI/core/backend"`
			`cliContext "github.com/mudler/LocalAI/core/cli/context"`
			`"github.com/mudler/LocalAI/core/config"`
			`"github.com/mudler/LocalAI/pkg/model"`
			`"github.com/rs/zerolog/log"`
			`)`

			`type SoundGenerationCMD struct {`
			Text []string `arg:""`

			Backend string `short:"b" required:"" help:"Backend to run the SoundGeneration model"`
			Model string `short:"m" required:"" help:"Model name to run the SoundGeneration"`
			Duration string `short:"d" help:"If specified, the length of audio to generate in seconds"`
			Temperature string `short:"t" help:"If specified, the temperature of the generation"`
			InputFile string `short:"i" help:"If specified, the input file to condition generation upon"`
			InputFileSampleDivisor string `short:"f" help:"If InputFile and this divisor is specified, the first portion of the sample file will be used"`
			DoSample bool `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
			OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
			ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
			BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
			ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
			`}`

			`func parseToFloat32Ptr(input string) *float32 {`
			`f, err := strconv.ParseFloat(input, 32)`
			`if err != nil {`
			`return nil`
			`}`
			`f2 := float32(f)`
			`return &f2`
			`}`

			`func parseToInt32Ptr(input string) *int32 {`
			`i, err := strconv.ParseInt(input, 10, 32)`
			`if err != nil {`
			`return nil`
			`}`
			`i2 := int32(i)`
			`return &i2`
			`}`

			`func (t SoundGenerationCMD) Run(ctx cliContext.Context) error {`
			`outputFile := t.OutputFile`
			`outputDir := t.BackendAssetsPath`
			`if outputFile != "" {`
			`outputDir = filepath.Dir(outputFile)`
			`}`

			`text := strings.Join(t.Text, " ")`

			`externalBackends := make(map[string]string)`
			`// split ":" to get backend name and the uri`
			`for _, v := range t.ExternalGRPCBackends {`
			`backend := v[:strings.IndexByte(v, ':')]`
			`uri := v[strings.IndexByte(v, ':')+1:]`
			`externalBackends[backend] = uri`
			`fmt.Printf("TMP externalBackends[%q]=%q\n\n", backend, uri)`
			`}`

			`opts := &config.ApplicationConfig{`
			`ModelPath: t.ModelsPath,`
			`Context: context.Background(),`
			`AudioDir: outputDir,`
			`AssetsDestination: t.BackendAssetsPath,`
			`ExternalGRPCBackends: externalBackends,`
			`}`
			`ml := model.NewModelLoader(opts.ModelPath)`

			`defer func() {`
			`err := ml.StopAllGRPC()`
			`if err != nil {`
			`log.Error().Err(err).Msg("unable to stop all grpc processes")`
			`}`
			`}()`

			`options := config.BackendConfig{}`
			`options.SetDefaults()`
feat: track internally started models by ID (#3693) * chore(refactor): track internally started models by ID Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Just extend options, no need to copy Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Improve debugging for rerankers failures Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify model loading with rerankers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Be more consistent when generating model options Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Uncommitted code Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Make deleteProcess more idiomatic Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt CLI for sound generation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup threads definition Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Handle corner case where c.Seed is nil Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Consistently use ModelOptions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt new code to refactoring Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Dave <dave@gray101.com> 2024-10-02 06:55:58 +00:00			`options.Backend = t.Backend`
feat: elevenlabs `sound-generation` api (#3355) * initial version of elevenlabs compatible soundgeneration api and cli command Signed-off-by: Dave Lee <dave@gray101.com> * minor cleanup Signed-off-by: Dave Lee <dave@gray101.com> * restore TTS, add test Signed-off-by: Dave Lee <dave@gray101.com> * remove stray s Signed-off-by: Dave Lee <dave@gray101.com> * fix Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Dave Lee <dave@gray101.com> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> 2024-08-24 00:20:28 +00:00
			`var inputFile *string`
			`if t.InputFile != "" {`
			`inputFile = &t.InputFile`
			`}`

feat: track internally started models by ID (#3693) * chore(refactor): track internally started models by ID Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Just extend options, no need to copy Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Improve debugging for rerankers failures Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify model loading with rerankers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Be more consistent when generating model options Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Uncommitted code Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Make deleteProcess more idiomatic Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt CLI for sound generation Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup threads definition Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Handle corner case where c.Seed is nil Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Consistently use ModelOptions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt new code to refactoring Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Dave <dave@gray101.com> 2024-10-02 06:55:58 +00:00			`filePath, _, err := backend.SoundGeneration(t.Model, text,`
feat: elevenlabs `sound-generation` api (#3355) * initial version of elevenlabs compatible soundgeneration api and cli command Signed-off-by: Dave Lee <dave@gray101.com> * minor cleanup Signed-off-by: Dave Lee <dave@gray101.com> * restore TTS, add test Signed-off-by: Dave Lee <dave@gray101.com> * remove stray s Signed-off-by: Dave Lee <dave@gray101.com> * fix Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Dave Lee <dave@gray101.com> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com> 2024-08-24 00:20:28 +00:00			`parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,`
			`inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)`

			`if err != nil {`
			`return err`
			`}`
			`if outputFile != "" {`
			`if err := os.Rename(filePath, outputFile); err != nil {`
			`return err`
			`}`
			`fmt.Printf("Generate file %s\n", outputFile)`
			`} else {`
			`fmt.Printf("Generate file %s\n", filePath)`
			`}`
			`return nil`
			`}`