2024-01-05 12:04:46 -05:00
|
|
|
package localai
|
|
|
|
|
|
|
|
import (
|
2024-06-23 01:24:36 -07:00
|
|
|
"github.com/mudler/LocalAI/core/backend"
|
|
|
|
"github.com/mudler/LocalAI/core/config"
|
2025-02-10 06:06:16 -05:00
|
|
|
"github.com/mudler/LocalAI/core/http/middleware"
|
2024-06-23 01:24:36 -07:00
|
|
|
"github.com/mudler/LocalAI/pkg/model"
|
2024-01-05 12:04:46 -05:00
|
|
|
|
|
|
|
"github.com/gofiber/fiber/v2"
|
2024-06-23 01:24:36 -07:00
|
|
|
"github.com/mudler/LocalAI/core/schema"
|
2024-03-01 10:19:53 -05:00
|
|
|
"github.com/rs/zerolog/log"
|
2024-11-02 20:13:35 +01:00
|
|
|
|
|
|
|
"github.com/mudler/LocalAI/pkg/utils"
|
2024-01-05 12:04:46 -05:00
|
|
|
)
|
|
|
|
|
2024-03-29 22:29:33 +01:00
|
|
|
// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
|
2024-11-02 20:13:35 +01:00
|
|
|
//
|
|
|
|
// @Summary Generates audio from the input text.
|
|
|
|
// @Accept json
|
|
|
|
// @Produce audio/x-wav
|
|
|
|
// @Param request body schema.TTSRequest true "query params"
|
|
|
|
// @Success 200 {string} binary "generated audio/wav file"
|
|
|
|
// @Router /v1/audio/speech [post]
|
|
|
|
// @Router /tts [post]
|
2024-04-17 23:33:49 +02:00
|
|
|
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
2024-01-05 12:04:46 -05:00
|
|
|
return func(c *fiber.Ctx) error {
|
2025-02-10 06:06:16 -05:00
|
|
|
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TTSRequest)
|
|
|
|
if !ok || input.Model == "" {
|
|
|
|
return fiber.ErrBadRequest
|
2024-01-05 12:04:46 -05:00
|
|
|
}
|
|
|
|
|
2025-02-10 06:06:16 -05:00
|
|
|
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
|
|
|
if !ok || cfg == nil {
|
|
|
|
return fiber.ErrBadRequest
|
2024-04-17 23:33:49 +02:00
|
|
|
}
|
|
|
|
|
2025-02-10 06:06:16 -05:00
|
|
|
log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request recieved")
|
2024-04-17 23:33:49 +02:00
|
|
|
|
2025-02-10 06:06:16 -05:00
|
|
|
if cfg.Backend == "" {
|
|
|
|
if input.Backend != "" {
|
|
|
|
cfg.Backend = input.Backend
|
|
|
|
} else {
|
|
|
|
cfg.Backend = model.PiperBackend
|
|
|
|
}
|
2024-02-10 21:37:03 +01:00
|
|
|
}
|
|
|
|
|
2024-06-01 20:26:27 +02:00
|
|
|
if input.Language != "" {
|
|
|
|
cfg.Language = input.Language
|
|
|
|
}
|
|
|
|
|
|
|
|
if input.Voice != "" {
|
|
|
|
cfg.Voice = input.Voice
|
|
|
|
}
|
|
|
|
|
2025-02-10 06:06:16 -05:00
|
|
|
filePath, _, err := backend.ModelTTS(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
2024-04-17 23:33:49 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2024-01-05 12:04:46 -05:00
|
|
|
}
|
2024-11-02 20:13:35 +01:00
|
|
|
|
|
|
|
// Convert generated file to target format
|
|
|
|
filePath, err = utils.AudioConvert(filePath, input.Format)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-04-17 23:33:49 +02:00
|
|
|
return c.Download(filePath)
|
2024-01-05 12:04:46 -05:00
|
|
|
}
|
|
|
|
}
|