LocalAI/backend/go/transcribe/transcript.go

101 lines
2.0 KiB
Go
Raw Normal View History

package main
2023-05-09 09:43:50 +00:00
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/go-skynet/LocalAI/pkg/schema"
2023-05-09 09:43:50 +00:00
)
func sh(c string) (string, error) {
cmd := exec.Command("/bin/sh", "-c", c)
cmd.Env = os.Environ()
o, err := cmd.CombinedOutput()
return string(o), err
}
// AudioToWav converts audio to wav for transcribe. It bashes out to ffmpeg
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst))
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
}
return nil
}
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.WhisperResult, error) {
res := schema.WhisperResult{}
2023-05-09 09:43:50 +00:00
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := audioToWav(audiopath, convertedPath); err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := model.NewContext()
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
context.SetThreads(threads)
2023-05-09 09:43:50 +00:00
if language != "" {
context.SetLanguage(language)
} else {
context.SetLanguage("auto")
2023-05-09 09:43:50 +00:00
}
if err := context.Process(data, nil, nil); err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
for {
s, err := context.NextSegment()
2023-05-09 09:43:50 +00:00
if err != nil {
break
}
var tokens []int
for _, t := range s.Tokens {
tokens = append(tokens, t.Id)
}
segment := schema.WhisperSegment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
res.Segments = append(res.Segments, segment)
res.Text += s.Text
2023-05-09 09:43:50 +00:00
}
return res, nil
2023-05-09 09:43:50 +00:00
}