feat: add tts with go-piper (#649)

Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-06-22 17:53:10 +02:00 committed by GitHub
parent cc31c58235
commit a7bb029d23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 237 additions and 29 deletions

View File

@ -5,8 +5,13 @@ FROM golang:$GO_VERSION as requirements
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=7
ARG SPDLOG_VERSION="1.11.0"
ARG PIPER_PHONEMIZE_VERSION='1.0.0'
ARG TARGETARCH
ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE}
ARG GO_TAGS="stablediffusion tts"
RUN apt-get update && \
apt-get install -y ca-certificates cmake curl patch
@ -23,6 +28,8 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}
WORKDIR /build
# OpenBLAS requirements
RUN apt-get install -y libopenblas-dev
@ -30,19 +37,37 @@ RUN apt-get install -y libopenblas-dev
RUN apt-get install -y libopencv-dev && \
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
# piper requirements
# Use pre-compiled Piper phonemization library (includes onnxruntime)
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
tar -xzvf - && \
mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
cd "spdlog-${SPDLOG_VERSION}/build" && \
cmake .. && \
make -j8 && \
cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
cd /build && \
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH}${TARGETVARIANT}.tar.gz" | \
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
# \
# ; fi
FROM requirements as builder
ARG GO_TAGS=stablediffusion
ARG GO_TAGS="stablediffusion tts"
ENV GO_TAGS=${GO_TAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
WORKDIR /build
COPY . .
RUN make build
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
FROM requirements

View File

@ -11,6 +11,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=f5a8c45396741470583f59b916a2a7641e63bcd0
WHISPER_CPP_VERSION?=57543c169e27312e7546d07ed0d8c6eb806ebc36
BERT_VERSION?=6069103f54b9969c02e789d0fb12a23bd614285f
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
export BUILD_TYPE?=
CGO_LDFLAGS?=
@ -18,8 +19,9 @@ CUDA_LIBPATH?=/usr/local/cuda/lib64/
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
GO_TAGS?=
BUILD_ID?=git
LD_FLAGS=?=
LD_FLAGS?=
OPTIONAL_TARGETS?=
ESPEAK_DATA?=
OS := $(shell uname -s)
ARCH := $(shell uname -m)
@ -30,7 +32,7 @@ CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-piper:$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
@ -55,10 +57,15 @@ ifeq ($(STATIC),true)
LD_FLAGS=-linkmode external -extldflags -static
endif
ifeq ($(GO_TAGS),stablediffusion)
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
endif
ifeq ($(findstring tts,$(GO_TAGS)),tts)
OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
endif
.PHONY: all test build vendor
all: help
@ -82,6 +89,10 @@ gpt4all:
@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.go" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
@find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} +
## go-piper
go-piper:
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
## BERT embeddings
go-bert:
@ -133,6 +144,14 @@ backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
backend-assets/espeak-ng-data:
mkdir -p backend-assets/espeak-ng-data
ifdef ESPEAK_DATA
@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
else
@touch backend-assets/espeak-ng-data/keep
endif
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
@ -172,6 +191,9 @@ go-llama:
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
go-piper/libpiper_binding.a:
$(MAKE) -C go-piper libpiper_binding.a example/main
replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
@ -181,8 +203,9 @@ replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
prepare-sources: go-llama go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
$(GOCMD) mod download
## GENERIC
@ -195,6 +218,7 @@ rebuild: ## Rebuilds the project
$(MAKE) -C go-stable-diffusion clean
$(MAKE) -C go-bert clean
$(MAKE) -C bloomz clean
$(MAKE) -C go-piper clean
$(MAKE) build
prepare: prepare-sources backend-assets/gpt4all $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
@ -210,6 +234,7 @@ clean: ## Remove build related file
rm -rf ./go-bert
rm -rf ./bloomz
rm -rf ./whisper.cpp
rm -rf ./go-piper
rm -rf $(BINARY_NAME)
rm -rf release/

View File

@ -128,6 +128,7 @@ func App(opts ...AppOption) (*fiber.App, error) {
// audio
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, options))
app.Post("/tts", ttsEndpoint(cm, options))
// images
app.Post("/v1/images/generations", imageEndpoint(cm, options))
@ -136,6 +137,10 @@ func App(opts ...AppOption) (*fiber.App, error) {
app.Static("/generated-images", options.imageDir)
}
if options.audioDir != "" {
app.Static("/generated-audio", options.audioDir)
}
ok := func(c *fiber.Ctx) error {
return c.SendStatus(200)
}

78
api/localai.go Normal file
View File

@ -0,0 +1,78 @@
package api
import (
"fmt"
"os"
"path/filepath"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/tts"
"github.com/go-skynet/LocalAI/pkg/utils"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
)
type TTSRequest struct {
Model string `json:"model" yaml:"model"`
Input string `json:"input" yaml:"input"`
}
func generateUniqueFileName(dir, baseName, ext string) string {
counter := 1
fileName := baseName + ext
for {
filePath := filepath.Join(dir, fileName)
_, err := os.Stat(filePath)
if os.IsNotExist(err) {
return fileName
}
counter++
fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
}
}
func ttsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(TTSRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
piperModel, err := o.loader.BackendLoader(model.PiperBackend, input.Model, []llama.ModelOption{}, uint32(0), o.assetsDestination)
if err != nil {
return err
}
if piperModel == nil {
return fmt.Errorf("could not load piper model")
}
w, ok := piperModel.(*tts.Piper)
if !ok {
return fmt.Errorf("loader returned non-piper object %+v", w)
}
if err := os.MkdirAll(o.audioDir, 0755); err != nil {
return err
}
fileName := generateUniqueFileName(o.audioDir, "piper", ".wav")
filePath := filepath.Join(o.audioDir, fileName)
modelPath := filepath.Join(o.loader.ModelPath, input.Model)
if err := utils.VerifyPath(modelPath, o.loader.ModelPath); err != nil {
return err
}
if err := w.TTS(input.Input, modelPath, filePath); err != nil {
return err
}
return c.Download(filePath)
}
}

View File

@ -15,6 +15,7 @@ type Option struct {
f16 bool
debug, disableMessage bool
imageDir string
audioDir string
cors bool
preloadJSONModels string
preloadModelsFromPath string
@ -130,6 +131,12 @@ func WithDisableMessage(disableMessage bool) AppOption {
}
}
func WithAudioDir(audioDir string) AppOption {
return func(o *Option) {
o.audioDir = audioDir
}
}
func WithImageDir(imageDir string) AppOption {
return func(o *Option) {
o.imageDir = imageDir

View File

@ -5,7 +5,7 @@ cd /build
if [ "$REBUILD" != "false" ]; then
rm -rf ./local-ai
make build
ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
fi
./local-ai "$@"

3
go.mod
View File

@ -1,6 +1,6 @@
module github.com/go-skynet/LocalAI
go 1.19
go 1.20
require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674
@ -52,6 +52,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mudler/go-piper v0.0.0-00010101000000-000000000000 // indirect
github.com/otiai10/mint v1.5.1 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect

View File

@ -78,7 +78,13 @@ func main() {
Name: "image-path",
Usage: "Image directory",
EnvVars: []string{"IMAGE_PATH"},
Value: "",
Value: "/tmp/generated/images",
},
&cli.StringFlag{
Name: "audio-path",
Usage: "audio directory",
EnvVars: []string{"AUDIO_PATH"},
Value: "/tmp/generated/audio",
},
&cli.StringFlag{
Name: "backend-assets-path",
@ -125,6 +131,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
api.WithContextSize(ctx.Int("context-size")),
api.WithDebug(ctx.Bool("debug")),
api.WithImageDir(ctx.String("image-path")),
api.WithAudioDir(ctx.String("audio-path")),
api.WithF16(ctx.Bool("f16")),
api.WithDisableMessage(false),
api.WithCors(ctx.Bool("cors")),

View File

@ -10,6 +10,7 @@ import (
"path/filepath"
"strconv"
"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/imdario/mergo"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
@ -80,21 +81,6 @@ func ReadConfigFile(filePath string) (*Config, error) {
return &config, nil
}
func inTrustedRoot(path string, trustedRoot string) error {
for path != "/" {
path = filepath.Dir(path)
if path == trustedRoot {
return nil
}
}
return fmt.Errorf("path is outside of trusted root")
}
func verifyPath(path, basePath string) error {
c := filepath.Clean(filepath.Join(basePath, path))
return inTrustedRoot(c, basePath)
}
func Apply(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error {
// Create base path if it doesn't exist
err := os.MkdirAll(basePath, 0755)
@ -110,7 +96,7 @@ func Apply(basePath, nameOverride string, config *Config, configOverrides map[st
for _, file := range config.Files {
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
if err := verifyPath(file.Filename, basePath); err != nil {
if err := utils.VerifyPath(file.Filename, basePath); err != nil {
return err
}
// Create file path
@ -196,7 +182,7 @@ func Apply(basePath, nameOverride string, config *Config, configOverrides map[st
// Write prompt template contents to separate files
for _, template := range config.PromptTemplates {
if err := verifyPath(template.Name+".tmpl", basePath); err != nil {
if err := utils.VerifyPath(template.Name+".tmpl", basePath); err != nil {
return err
}
// Create file path
@ -221,7 +207,7 @@ func Apply(basePath, nameOverride string, config *Config, configOverrides map[st
name = nameOverride
}
if err := verifyPath(name+".yaml", basePath); err != nil {
if err := utils.VerifyPath(name+".yaml", basePath); err != nil {
return err
}

View File

@ -9,6 +9,7 @@ import (
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-skynet/LocalAI/pkg/langchain"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
"github.com/go-skynet/LocalAI/pkg/tts"
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
@ -39,6 +40,7 @@ const (
RwkvBackend = "rwkv"
WhisperBackend = "whisper"
StableDiffusionBackend = "stablediffusion"
PiperBackend = "piper"
LCHuggingFaceBackend = "langchain-huggingface"
)
@ -103,6 +105,12 @@ var stableDiffusion = func(assetDir string) (interface{}, error) {
return stablediffusion.New(assetDir)
}
func piperTTS(assetDir string) func(s string) (interface{}, error) {
return func(s string) (interface{}, error) {
return tts.New(assetDir)
}
}
var whisperModel = func(modelFile string) (interface{}, error) {
return whisper.New(modelFile)
}
@ -158,6 +166,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
return ml.LoadModel(modelFile, replit)
case StableDiffusionBackend:
return ml.LoadModel(modelFile, stableDiffusion)
case PiperBackend:
return ml.LoadModel(modelFile, piperTTS(filepath.Join(assetDir, "backend-assets", "espeak-ng-data")))
case StarcoderBackend:
return ml.LoadModel(modelFile, starCoder)
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:

12
pkg/tts/generate.go Normal file
View File

@ -0,0 +1,12 @@
//go:build tts
// +build tts
package tts
import (
piper "github.com/mudler/go-piper"
)
func tts(text, model, assetDir, arLib, dst string) error {
return piper.TextToWav(text, model, assetDir, arLib, dst)
}

View File

@ -0,0 +1,10 @@
//go:build !tts
// +build !tts
package tts
import "fmt"
func tts(text, model, assetDir, arLib, dst string) error {
return fmt.Errorf("this version of LocalAI was built without the tts tag")
}

20
pkg/tts/piper.go Normal file
View File

@ -0,0 +1,20 @@
package tts
import "os"
type Piper struct {
assetDir string
}
func New(assetDir string) (*Piper, error) {
if _, err := os.Stat(assetDir); err != nil {
return nil, err
}
return &Piper{
assetDir: assetDir,
}, nil
}
func (s *Piper) TTS(text, model, dst string) error {
return tts(text, model, s.assetDir, "", dst)
}

22
pkg/utils/path.go Normal file
View File

@ -0,0 +1,22 @@
package utils
import (
"fmt"
"path/filepath"
)
func inTrustedRoot(path string, trustedRoot string) error {
for path != "/" {
path = filepath.Dir(path)
if path == trustedRoot {
return nil
}
}
return fmt.Errorf("path is outside of trusted root")
}
// VerifyPath verifies that path is based in basePath.
func VerifyPath(path, basePath string) error {
c := filepath.Clean(filepath.Join(basePath, path))
return inTrustedRoot(c, basePath)
}