mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 04:37:54 +00:00
upgraded functions in tts and stt
This commit is contained in:
parent
2b4f4cf7f6
commit
14ff02b6d2
@ -384,7 +384,7 @@ class RTCom:
|
||||
ASCIIColors.red(" -------------------------------------------------")
|
||||
self.lc.info("Talking")
|
||||
ASCIIColors.green("<<TALKING>>")
|
||||
self.lc.tts.tts_to_audio(lollms_text, speaker=self.voice, file_name_or_path=str(Path(self.logs_folder)/filename)+"_answer.wave")
|
||||
self.lc.tts.tts_audio(lollms_text, speaker=self.voice, file_name_or_path=str(Path(self.logs_folder)/filename)+"_answer.wave")
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
self.block_listening = False
|
||||
|
@ -99,7 +99,7 @@ async def text2Audio(request: LollmsAudio2TextRequest):
|
||||
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
||||
if len(voice_file)==0:
|
||||
return {"status":False,"error":"Voice not found"}
|
||||
lollmsElfServer.asr.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||
lollmsElfServer.asr.tts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||
else:
|
||||
lollmsElfServer.InfoMessage("asr is not up yet.\nPlease wait for it to load then try again. This may take some time.")
|
||||
return {"status":False, "error":"Service not ready yet"}
|
||||
|
@ -121,14 +121,16 @@ async def text2Audio(request: LollmsText2AudioRequest):
|
||||
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
||||
|
||||
if request.fn:
|
||||
request.fn = os.path.realpath(str((lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn))
|
||||
request.fn = sanitize_path(request.fn)
|
||||
request.fn = (lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn
|
||||
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
|
||||
|
||||
else:
|
||||
request.fn = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out"/"tts2audio.wav"
|
||||
try:
|
||||
if lollmsElfServer.tts is None:
|
||||
return {"url": None, "error":f"No TTS service is on"}
|
||||
if lollmsElfServer.tts.ready:
|
||||
response = lollmsElfServer.tts.tts_to_audio(request.text, request.voice, file_name_or_path=request.fn)
|
||||
response = lollmsElfServer.tts.tts_audio(request.text, request.voice, file_name_or_path=request.fn)
|
||||
return response
|
||||
else:
|
||||
return {"url": None, "error":f"TTS service is not ready yet"}
|
||||
@ -152,70 +154,20 @@ async def text2Wav(request: LollmsText2AudioRequest):
|
||||
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
||||
|
||||
if request.fn:
|
||||
request.fn = os.path.realpath(str((lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn))
|
||||
request.fn = sanitize_path(request.fn)
|
||||
request.fn = (lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn
|
||||
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
|
||||
|
||||
else:
|
||||
request.fn = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out"/"tts2audio.wav"
|
||||
|
||||
try:
|
||||
# Get the JSON data from the POST request.
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
voice=lollmsElfServer.config.xtts_current_voice
|
||||
if lollmsElfServer.tts is None:
|
||||
voice=lollmsElfServer.config.xtts_current_voice
|
||||
if voice!="main_voice":
|
||||
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
if lollmsElfServer.tts.ready:
|
||||
response = lollmsElfServer.tts.tts_file(request.text, request.voice, file_name_or_path=request.fn)
|
||||
return response
|
||||
else:
|
||||
return {"url": None, "error":f"TTS service is not ready yet"}
|
||||
|
||||
lollmsElfServer.tts = LollmsXTTS(
|
||||
lollmsElfServer,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=Path(__file__).parent/"voices",
|
||||
xtts_base_url= lollmsElfServer.config.xtts_base_url,
|
||||
use_deep_speed= lollmsElfServer.config.xtts_use_deep_speed,
|
||||
use_streaming_mode= lollmsElfServer.config.xtts_use_streaming_mode,
|
||||
)
|
||||
except Exception as ex:
|
||||
return {"url": None, "error":f"{ex}"}
|
||||
|
||||
voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
|
||||
index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
|
||||
output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
|
||||
if voice is None:
|
||||
voice = "main_voice"
|
||||
lollmsElfServer.info("Starting to build voice")
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
if voice!="main_voice":
|
||||
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
if lollmsElfServer.tts is None:
|
||||
lollmsElfServer.tts = LollmsXTTS(
|
||||
lollmsElfServer,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=Path(__file__).parent/"voices",
|
||||
xtts_base_url= lollmsElfServer.config.xtts_base_url,
|
||||
use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
|
||||
use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode
|
||||
)
|
||||
if lollmsElfServer.tts.ready:
|
||||
language = lollmsElfServer.config.xtts_current_language# convert_language_name()
|
||||
lollmsElfServer.tts.set_speaker_folder(voices_folder)
|
||||
url = f"audio/{output_fn}"
|
||||
preprocessed_text= add_period(request.text)
|
||||
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
||||
if len(voice_file)==0:
|
||||
return {"status":False,"error":"Voice not found"}
|
||||
lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||
lollmsElfServer.info(f"Voice file ready at {url}")
|
||||
else:
|
||||
lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.")
|
||||
return {"status":False, "error":"Service not ready yet"}
|
||||
return {"url": url}
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
return {"url": None}
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
lollmsElfServer.error(ex)
|
||||
|
@ -186,7 +186,7 @@ class LollmsASR:
|
||||
if self.voices_folder is not None:
|
||||
print("Generating sample audio.")
|
||||
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
|
||||
self.tts_to_audio("asr is ready",voice_file[0].name)
|
||||
self.tts_audio("asr is ready",voice_file[0].name)
|
||||
print("Service is available.")
|
||||
if self.app is not None:
|
||||
self.app.success("asr Service is now available.")
|
||||
|
@ -73,7 +73,7 @@ class LollmsOpenAITTS(LollmsTTS):
|
||||
self.ready = True
|
||||
|
||||
|
||||
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
def tts_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
speech_file_path = file_name_or_path
|
||||
response = self.client.audio.speech.create(
|
||||
model=self.model,
|
||||
@ -85,7 +85,7 @@ class LollmsOpenAITTS(LollmsTTS):
|
||||
|
||||
response.write_to_file(speech_file_path)
|
||||
|
||||
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
def tts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
speech_file_path = file_name_or_path
|
||||
response = self.client.audio.speech.create(
|
||||
model=self.model,
|
||||
|
@ -220,7 +220,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
if self.voices_folder is not None:
|
||||
print("Generating sample audio.")
|
||||
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
|
||||
self.tts_to_audio("x t t s is ready",voice_file[0].stem)
|
||||
self.tts_audio("x t t s is ready",voice_file[0].stem)
|
||||
print("Service is available.")
|
||||
if self.app is not None:
|
||||
self.app.success("XTTS Service is now available.")
|
||||
@ -259,8 +259,8 @@ class LollmsXTTS(LollmsTTS):
|
||||
print("Request failed with status code:", response.status_code)
|
||||
return False
|
||||
|
||||
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
url = f"{self.xtts_base_url}/tts_to_file"
|
||||
def tts_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
url = f"{self.xtts_base_url}/tts_file"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
@ -284,7 +284,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
else:
|
||||
print("Request failed with status code:", response.status_code)
|
||||
|
||||
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
def tts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
voice=self.app.config.xtts_current_voice if speaker is None else speaker
|
||||
index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
|
||||
output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
|
||||
@ -307,13 +307,13 @@ class LollmsXTTS(LollmsTTS):
|
||||
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
||||
if len(voice_file)==0:
|
||||
return {"status":False,"error":"Voice not found"}
|
||||
self.xtts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||
self.xtts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
return {"status":False,"error":f"{ex}"}
|
||||
|
||||
def xtts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
def xtts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
# Remove HTML tags
|
||||
text = re.sub(r'<.*?>', '', text)
|
||||
# Remove code blocks (assuming they're enclosed in backticks or similar markers)
|
||||
@ -323,7 +323,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
text = re.sub(r'[\{\}\[\]\(\)<>]', '', text)
|
||||
text = text.replace("\\","")
|
||||
def tts2_audio_th(thread_uid=None):
|
||||
url = f"{self.xtts_base_url}/tts_to_audio"
|
||||
url = f"{self.xtts_base_url}/tts_audio"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
|
@ -62,7 +62,7 @@ class LollmsTTS:
|
||||
self.voices = [] # To be filled by the child class
|
||||
self.models = [] # To be filled by the child class
|
||||
|
||||
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
def tts_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
"""
|
||||
Converts the given text to speech and saves it to a file.
|
||||
|
||||
@ -74,7 +74,7 @@ class LollmsTTS:
|
||||
"""
|
||||
pass
|
||||
|
||||
def tts_to_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||
def tts_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||
"""
|
||||
Converts the given text to speech and returns the audio data.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user