fix(vall-e-x): Fix voice cloning (#1696)

This commit is contained in:
Ettore Di Giacinto 2024-02-11 11:20:00 +01:00 committed by GitHub
parent 58cdf97361
commit fd68bf7084
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 3 deletions

View File

@ -55,6 +55,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
print("Preparing models, please wait", file=sys.stderr) print("Preparing models, please wait", file=sys.stderr)
# download and load all models # download and load all models
preload_models() preload_models()
self.clonedVoice = False
# Assume directory from request.ModelFile. # Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path # Only if request.LoraAdapter it's not an absolute path
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath): if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
@ -65,6 +66,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.AudioPath != "": if request.AudioPath != "":
print("Generating model", file=sys.stderr) print("Generating model", file=sys.stderr)
make_prompt(name=model_name, audio_prompt_path=request.AudioPath) make_prompt(name=model_name, audio_prompt_path=request.AudioPath)
self.clonedVoice = True
### Use given transcript ### Use given transcript
##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav", ##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav",
## transcript="Just, what was that? Paimon thought we were gonna get eaten.") ## transcript="Just, what was that? Paimon thought we were gonna get eaten.")
@ -91,6 +93,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
try: try:
audio_array = None audio_array = None
if model != "": if model != "":
if self.clonedVoice:
model = os.path.basename(request.model)
audio_array = generate_audio(request.text, prompt=model) audio_array = generate_audio(request.text, prompt=model)
else: else:
audio_array = generate_audio(request.text) audio_array = generate_audio(request.text)

View File

@ -145,14 +145,14 @@ parameters:
vall-e: vall-e:
# The path to the audio file to be cloned # The path to the audio file to be cloned
# relative to the models directory # relative to the models directory
audio_path: "path-to-wav-source.wav" # Max 15s
audio_path: "audio-sample.wav"
``` ```
Then you can specify the model name in the requests: Then you can specify the model name in the requests:
``` ```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "vall-e-x",
"model": "cloned-voice", "model": "cloned-voice",
"input":"Hello!" "input":"Hello!"
}' | aplay }' | aplay