2024-01-05 17:04:46 +00:00
|
|
|
package backend
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
2024-09-02 13:48:53 +00:00
|
|
|
"time"
|
2024-01-05 17:04:46 +00:00
|
|
|
|
2024-06-23 08:24:36 +00:00
|
|
|
"github.com/mudler/LocalAI/core/config"
|
|
|
|
"github.com/mudler/LocalAI/core/schema"
|
2024-01-05 17:04:46 +00:00
|
|
|
|
2024-06-23 08:24:36 +00:00
|
|
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
2024-09-10 06:57:16 +00:00
|
|
|
"github.com/mudler/LocalAI/pkg/model"
|
2024-01-05 17:04:46 +00:00
|
|
|
)
|
|
|
|
|
2024-06-24 17:21:22 +00:00
|
|
|
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
2024-01-05 17:04:46 +00:00
|
|
|
|
2024-10-02 06:55:58 +00:00
|
|
|
if backendConfig.Backend == "" {
|
|
|
|
backendConfig.Backend = model.WhisperBackend
|
|
|
|
}
|
|
|
|
|
|
|
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
2024-01-05 17:04:46 +00:00
|
|
|
|
2024-09-10 06:57:16 +00:00
|
|
|
transcriptionModel, err := ml.BackendLoader(opts...)
|
2024-01-05 17:04:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-09-10 06:57:16 +00:00
|
|
|
if transcriptionModel == nil {
|
|
|
|
return nil, fmt.Errorf("could not load transcription model")
|
2024-01-05 17:04:46 +00:00
|
|
|
}
|
|
|
|
|
2024-09-10 06:57:16 +00:00
|
|
|
r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
|
2024-06-24 17:21:22 +00:00
|
|
|
Dst: audio,
|
|
|
|
Language: language,
|
|
|
|
Translate: translate,
|
|
|
|
Threads: uint32(*backendConfig.Threads),
|
2024-01-05 17:04:46 +00:00
|
|
|
})
|
2024-09-02 13:48:53 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
tr := &schema.TranscriptionResult{
|
|
|
|
Text: r.Text,
|
|
|
|
}
|
|
|
|
for _, s := range r.Segments {
|
|
|
|
var tks []int
|
|
|
|
for _, t := range s.Tokens {
|
|
|
|
tks = append(tks, int(t))
|
|
|
|
}
|
|
|
|
tr.Segments = append(tr.Segments,
|
|
|
|
schema.Segment{
|
|
|
|
Text: s.Text,
|
|
|
|
Id: int(s.Id),
|
|
|
|
Start: time.Duration(s.Start),
|
|
|
|
End: time.Duration(s.End),
|
|
|
|
Tokens: tks,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return tr, err
|
2024-01-05 17:04:46 +00:00
|
|
|
}
|