From d0975b0c6ad01b4385512755af32fa505c4f3bdb Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Mon, 5 Feb 2024 20:37:39 +0100 Subject: [PATCH] upgraded xtts --- lollms/media.py | 2 +- lollms/server/endpoints/lollms_xtts.py | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/lollms/media.py b/lollms/media.py index 13004c3..0730648 100644 --- a/lollms/media.py +++ b/lollms/media.py @@ -142,7 +142,7 @@ class AudioRecorder: run_async(partial(self.sio.emit,'transcript', result["text"])) return {"text":result["text"], "audio":transcription_fn} else: - return {"text":"", "audio":transcription_fn} + return {"text":""} def update_spectrogram(self): diff --git a/lollms/server/endpoints/lollms_xtts.py b/lollms/server/endpoints/lollms_xtts.py index 0fba47c..bdcd257 100644 --- a/lollms/server/endpoints/lollms_xtts.py +++ b/lollms/server/endpoints/lollms_xtts.py @@ -55,8 +55,14 @@ async def set_voice(request: Request): lollmsElfServer.error(ex) return {"status":False,"error":str(ex)} + +class LollmsText2AudioRequest(BaseModel): + text: str + voice: str = None + fn:str = None + @router.post("/text2Audio") -async def text2Audio(request: Request): +async def text2Audio(request: LollmsText2AudioRequest): """ Executes Python code and returns the output. @@ -65,18 +71,21 @@ async def text2Audio(request: Request): """ try: - data = (await request.json()) # Get the JSON data from the POST request. try: from lollms.services.xtts.lollms_xtts import LollmsXTTS if lollmsElfServer.tts is None: - lollmsElfServer.tts = LollmsXTTS(lollmsElfServer, voice_samples_path=Path(__file__).parent/"voices", xtts_base_url= lollmsElfServer.config.xtts_base_url) + lollmsElfServer.tts = LollmsXTTS( + lollmsElfServer, + voice_samples_path=Path(__file__).parent/"voices", + xtts_base_url= lollmsElfServer.config.xtts_base_url + ) except: return {"url": None} - voice=data.get("voice",lollmsElfServer.config.current_voice) + voice=lollmsElfServer.config.current_voice if request.voice is None else request.voice index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav") - output_fn=data.get("fn",f"voice_sample_{index}.wav") + output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn if voice is None: voice = "main_voice" lollmsElfServer.info("Starting to build voice") @@ -91,7 +100,7 @@ async def text2Audio(request: Request): voices_folder = Path(__file__).parent.parent/"voices" lollmsElfServer.tts.set_speaker_folder(voices_folder) url = f"audio/{output_fn}" - preprocessed_text= add_period(data['text']) + preprocessed_text= add_period(request.text) lollmsElfServer.tts.tts_to_file(preprocessed_text, f"{voice}.wav", f"{output_fn}", language=language) lollmsElfServer.info("Voice file ready")