upgraded functions in tts and stt

This commit is contained in:
Saifeddine ALOUI 2024-05-20 17:27:09 +02:00
parent 2b4f4cf7f6
commit 14ff02b6d2
7 changed files with 29 additions and 77 deletions

View File

@ -384,7 +384,7 @@ class RTCom:
ASCIIColors.red(" -------------------------------------------------")
self.lc.info("Talking")
ASCIIColors.green("<<TALKING>>")
self.lc.tts.tts_to_audio(lollms_text, speaker=self.voice, file_name_or_path=str(Path(self.logs_folder)/filename)+"_answer.wave")
self.lc.tts.tts_audio(lollms_text, speaker=self.voice, file_name_or_path=str(Path(self.logs_folder)/filename)+"_answer.wave")
except Exception as ex:
trace_exception(ex)
self.block_listening = False

View File

@ -99,7 +99,7 @@ async def text2Audio(request: LollmsAudio2TextRequest):
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
if len(voice_file)==0:
return {"status":False,"error":"Voice not found"}
lollmsElfServer.asr.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
lollmsElfServer.asr.tts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
else:
lollmsElfServer.InfoMessage("asr is not up yet.\nPlease wait for it to load then try again. This may take some time.")
return {"status":False, "error":"Service not ready yet"}

View File

@ -121,14 +121,16 @@ async def text2Audio(request: LollmsText2AudioRequest):
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
if request.fn:
request.fn = os.path.realpath(str((lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn))
request.fn = sanitize_path(request.fn)
request.fn = (lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
else:
request.fn = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out"/"tts2audio.wav"
try:
if lollmsElfServer.tts is None:
return {"url": None, "error":f"No TTS service is on"}
if lollmsElfServer.tts.ready:
response = lollmsElfServer.tts.tts_to_audio(request.text, request.voice, file_name_or_path=request.fn)
response = lollmsElfServer.tts.tts_audio(request.text, request.voice, file_name_or_path=request.fn)
return response
else:
return {"url": None, "error":f"TTS service is not ready yet"}
@ -152,70 +154,20 @@ async def text2Wav(request: LollmsText2AudioRequest):
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
if request.fn:
request.fn = os.path.realpath(str((lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn))
request.fn = sanitize_path(request.fn)
request.fn = (lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
else:
request.fn = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out"/"tts2audio.wav"
try:
# Get the JSON data from the POST request.
try:
from lollms.services.xtts.lollms_xtts import LollmsXTTS
voice=lollmsElfServer.config.xtts_current_voice
if lollmsElfServer.tts is None:
voice=lollmsElfServer.config.xtts_current_voice
if voice!="main_voice":
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
else:
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
if lollmsElfServer.tts.ready:
response = lollmsElfServer.tts.tts_file(request.text, request.voice, file_name_or_path=request.fn)
return response
else:
return {"url": None, "error":f"TTS service is not ready yet"}
lollmsElfServer.tts = LollmsXTTS(
lollmsElfServer,
voices_folder=voices_folder,
voice_samples_path=Path(__file__).parent/"voices",
xtts_base_url= lollmsElfServer.config.xtts_base_url,
use_deep_speed= lollmsElfServer.config.xtts_use_deep_speed,
use_streaming_mode= lollmsElfServer.config.xtts_use_streaming_mode,
)
except Exception as ex:
return {"url": None, "error":f"{ex}"}
voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
if voice is None:
voice = "main_voice"
lollmsElfServer.info("Starting to build voice")
try:
from lollms.services.xtts.lollms_xtts import LollmsXTTS
if voice!="main_voice":
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
else:
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
if lollmsElfServer.tts is None:
lollmsElfServer.tts = LollmsXTTS(
lollmsElfServer,
voices_folder=voices_folder,
voice_samples_path=Path(__file__).parent/"voices",
xtts_base_url= lollmsElfServer.config.xtts_base_url,
use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode
)
if lollmsElfServer.tts.ready:
language = lollmsElfServer.config.xtts_current_language# convert_language_name()
lollmsElfServer.tts.set_speaker_folder(voices_folder)
url = f"audio/{output_fn}"
preprocessed_text= add_period(request.text)
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
if len(voice_file)==0:
return {"status":False,"error":"Voice not found"}
lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
lollmsElfServer.info(f"Voice file ready at {url}")
else:
lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.")
return {"status":False, "error":"Service not ready yet"}
return {"url": url}
except Exception as ex:
trace_exception(ex)
return {"url": None}
except Exception as ex:
trace_exception(ex)
lollmsElfServer.error(ex)

View File

@ -186,7 +186,7 @@ class LollmsASR:
if self.voices_folder is not None:
print("Generating sample audio.")
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
self.tts_to_audio("asr is ready",voice_file[0].name)
self.tts_audio("asr is ready",voice_file[0].name)
print("Service is available.")
if self.app is not None:
self.app.success("asr Service is now available.")

View File

@ -73,7 +73,7 @@ class LollmsOpenAITTS(LollmsTTS):
self.ready = True
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
def tts_file(self, text, speaker, file_name_or_path, language="en"):
speech_file_path = file_name_or_path
response = self.client.audio.speech.create(
model=self.model,
@ -85,7 +85,7 @@ class LollmsOpenAITTS(LollmsTTS):
response.write_to_file(speech_file_path)
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
def tts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
speech_file_path = file_name_or_path
response = self.client.audio.speech.create(
model=self.model,

View File

@ -220,7 +220,7 @@ class LollmsXTTS(LollmsTTS):
if self.voices_folder is not None:
print("Generating sample audio.")
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
self.tts_to_audio("x t t s is ready",voice_file[0].stem)
self.tts_audio("x t t s is ready",voice_file[0].stem)
print("Service is available.")
if self.app is not None:
self.app.success("XTTS Service is now available.")
@ -259,8 +259,8 @@ class LollmsXTTS(LollmsTTS):
print("Request failed with status code:", response.status_code)
return False
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
url = f"{self.xtts_base_url}/tts_to_file"
def tts_file(self, text, speaker, file_name_or_path, language="en"):
url = f"{self.xtts_base_url}/tts_file"
# Define the request body
payload = {
@ -284,7 +284,7 @@ class LollmsXTTS(LollmsTTS):
else:
print("Request failed with status code:", response.status_code)
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
def tts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
voice=self.app.config.xtts_current_voice if speaker is None else speaker
index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
@ -307,13 +307,13 @@ class LollmsXTTS(LollmsTTS):
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
if len(voice_file)==0:
return {"status":False,"error":"Voice not found"}
self.xtts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
self.xtts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
except Exception as ex:
trace_exception(ex)
return {"status":False,"error":f"{ex}"}
def xtts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
def xtts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
# Remove HTML tags
text = re.sub(r'<.*?>', '', text)
# Remove code blocks (assuming they're enclosed in backticks or similar markers)
@ -323,7 +323,7 @@ class LollmsXTTS(LollmsTTS):
text = re.sub(r'[\{\}\[\]\(\)<>]', '', text)
text = text.replace("\\","")
def tts2_audio_th(thread_uid=None):
url = f"{self.xtts_base_url}/tts_to_audio"
url = f"{self.xtts_base_url}/tts_audio"
# Define the request body
payload = {

View File

@ -62,7 +62,7 @@ class LollmsTTS:
self.voices = [] # To be filled by the child class
self.models = [] # To be filled by the child class
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
def tts_file(self, text, speaker, file_name_or_path, language="en"):
"""
Converts the given text to speech and saves it to a file.
@ -74,7 +74,7 @@ class LollmsTTS:
"""
pass
def tts_to_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
def tts_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
"""
Converts the given text to speech and returns the audio data.