mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-24 06:46:40 +00:00
upgraded
This commit is contained in:
parent
9050181458
commit
78f736ccec
@ -20,6 +20,9 @@ import pipmaster as pm
|
||||
if version.parse(str(pm.get_installed_version("numpy"))) > version.parse(str("1.26.9")):
|
||||
pm.install_version("numpy", "1.26.4")
|
||||
|
||||
if pm.is_installed("pydub"):
|
||||
pm.install("pydub")
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@ -33,6 +36,11 @@ if not PackageManager.check_package_installed("simpleaudio"):
|
||||
if not PackageManager.check_package_installed("wave"):
|
||||
PackageManager.install_or_update("wave")
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from pydub import AudioSegment
|
||||
|
||||
|
||||
import wave
|
||||
from TTS.api import TTS
|
||||
import simpleaudio as sa
|
||||
@ -112,6 +120,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
return potential_speaker_wav
|
||||
|
||||
raise FileNotFoundError(f"Speaker file '{speaker}.wav' not found in any of the specified folders.")
|
||||
|
||||
def tts_file(self, text, file_name_or_path, speaker=None, language="en") -> str:
|
||||
speaker_wav = None
|
||||
|
||||
@ -120,9 +129,53 @@ class LollmsXTTS(LollmsTTS):
|
||||
else:
|
||||
speaker_wav = self.get_speaker_wav("main_voice")
|
||||
|
||||
self.tts.tts_to_file(text=text, file_path=file_name_or_path, speaker_wav=speaker_wav, language=language)
|
||||
# Split the text into sentences
|
||||
sentences = re.split('(?<=[.!?])\s+', text)
|
||||
|
||||
# Initialize an empty list to store audio segments
|
||||
audio_segments = []
|
||||
|
||||
# Process sentences in chunks of less than 400 tokens
|
||||
chunk = []
|
||||
chunk_tokens = 0
|
||||
output_path = Path(file_name_or_path)
|
||||
|
||||
for i, sentence in enumerate(sentences):
|
||||
sentence_tokens = len(sentence.split())
|
||||
if chunk_tokens + sentence_tokens > 400:
|
||||
# Process the current chunk
|
||||
chunk_text = " ".join(chunk)
|
||||
temp_file = output_path.with_suffix(f".temp{i}.wav")
|
||||
self.tts.tts_to_file(text=chunk_text, file_path=str(temp_file), speaker_wav=speaker_wav, language=language)
|
||||
audio_segments.append(AudioSegment.from_wav(str(temp_file)))
|
||||
|
||||
# Reset the chunk
|
||||
chunk = [sentence]
|
||||
chunk_tokens = sentence_tokens
|
||||
else:
|
||||
chunk.append(sentence)
|
||||
chunk_tokens += sentence_tokens
|
||||
|
||||
# Process the last chunk if it's not empty
|
||||
if chunk:
|
||||
chunk_text = " ".join(chunk)
|
||||
temp_file = output_path.with_suffix(f".temp{len(sentences)}.wav")
|
||||
self.tts.tts_to_file(text=chunk_text, file_path=str(temp_file), speaker_wav=speaker_wav, language=language)
|
||||
audio_segments.append(AudioSegment.from_wav(str(temp_file)))
|
||||
|
||||
# Combine all audio segments
|
||||
combined_audio = sum(audio_segments)
|
||||
|
||||
# Export the combined audio to the final file
|
||||
combined_audio.export(file_name_or_path, format="wav")
|
||||
|
||||
# Clean up temporary files
|
||||
for temp_file in output_path.parent.glob(f"{output_path.stem}.temp*.wav"):
|
||||
temp_file.unlink()
|
||||
|
||||
return file_name_or_path
|
||||
|
||||
|
||||
def tts_audio(self, text, speaker=None, file_name_or_path: Path | str | None = None, language="en", use_threading=False):
|
||||
# Split text into sentences
|
||||
sentences = re.split(r'(?<=[.!?]) +', text)
|
||||
|
Loading…
Reference in New Issue
Block a user