upgraded xtts

This commit is contained in:
Saifeddine ALOUI 2024-07-15 00:38:56 +02:00
parent aa21fdfb49
commit 3a00e968fb
15 changed files with 248 additions and 424 deletions

View File

@ -142,10 +142,6 @@ whisper_model: base
tts_output_device: 0
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
xtts_use_deepspeed: false
xtts_use_streaming_mode: true
auto_read: false
xtts_current_voice: null
xtts_current_language: en

View File

@ -78,8 +78,6 @@ auto_show_browser: true
copy_to_clipboard_add_all_details: false
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
auto_read: false
xtts_current_voice: null
xtts_current_language: en

View File

@ -78,8 +78,6 @@ auto_show_browser: true
copy_to_clipboard_add_all_details: false
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
auto_read: false
xtts_current_voice: null
xtts_current_language: en

View File

@ -78,8 +78,6 @@ auto_show_browser: true
copy_to_clipboard_add_all_details: false
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
auto_read: false
xtts_current_voice: null
xtts_current_language: en

View File

@ -341,14 +341,10 @@ class LollmsApplication(LoLLMsCom):
self.xtts = LollmsXTTS(
self,
voices_folder=voices_folder,
voice_samples_path=self.lollms_paths.custom_voices_path,
xtts_base_url=self.config.xtts_base_url,
wait_for_service=False,
use_deep_speed=self.config.xtts_use_deepspeed,
use_streaming_mode=self.config.xtts_use_streaming_mode
voices_folders=[voices_folder, self.lollms_paths.custom_voices_path],
)
except:
except Exception as ex:
trace_exception(ex)
self.warning(f"Couldn't load XTTS")
ASCIIColors.blue("Loading local TTI services")
@ -458,14 +454,10 @@ class LollmsApplication(LoLLMsCom):
self.xtts = LollmsXTTS(
self,
voices_folder=voices_folder,
voice_samples_path=self.lollms_paths.custom_voices_path,
xtts_base_url=self.config.xtts_base_url,
wait_for_service=False,
use_deep_speed=self.config.xtts_use_deepspeed,
use_streaming_mode=self.config.xtts_use_streaming_mode
voices_folders=[voices_folder, self.lollms_paths.custom_voices_path],
)
except:
except Exception as ex:
trace_exception(ex)
self.warning(f"Couldn't load XTTS")
ASCIIColors.blue("Loading local TTI services")

View File

@ -142,10 +142,6 @@ whisper_model: base
tts_output_device: 0
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
xtts_use_deepspeed: false
xtts_use_streaming_mode: true
auto_read: false
xtts_current_voice: null
xtts_current_language: en

View File

@ -72,6 +72,14 @@ class LollmsPaths:
self.personal_outputs_path = self.personal_path / "outputs"
self.personal_user_infos_path = self.personal_path / "user_infos"
self.personal_services_path = self.personal_path / "services"
self.personal_stt_services_path = self.personal_services_path / "stt"
self.personal_tts_services_path = self.personal_services_path / "tts"
self.personal_tti_services_path = self.personal_services_path / "tti"
self.personal_ttm_services_path = self.personal_services_path / "ttm"
self.personal_trainers_path = self.personal_path / "trainers"
self.gptqlora_path = self.personal_trainers_path / "gptqlora"
@ -117,6 +125,25 @@ class LollmsPaths:
ASCIIColors.yellow(f"{self.personal_models_path}")
ASCIIColors.red("personal_user_infos_path:",end="")
ASCIIColors.yellow(f"{self.personal_user_infos_path}")
ASCIIColors.red("personal_services_path:",end="")
ASCIIColors.yellow(f"{self.personal_services_path}")
ASCIIColors.red("personal_stt_services_path:", end="")
ASCIIColors.yellow(f"{self.personal_stt_services_path}")
ASCIIColors.red("personal_tts_services_path:", end="")
ASCIIColors.yellow(f"{self.personal_tts_services_path}")
ASCIIColors.red("personal_tti_services_path:", end="")
ASCIIColors.yellow(f"{self.personal_tti_services_path}")
ASCIIColors.red("personal_ttm_services_path:", end="")
ASCIIColors.yellow(f"{self.personal_ttm_services_path}")
ASCIIColors.red("personal_trainers_path:",end="")
ASCIIColors.yellow(f"{self.personal_trainers_path}")
ASCIIColors.red("personal_trainers_path:",end="")
@ -162,6 +189,12 @@ class LollmsPaths:
"Personal user infos path": self.personal_user_infos_path,
"Personal trainers path": self.personal_trainers_path,
"Personal gptqlora trainer path": self.gptqlora_path,
"Personal services path": self.personal_services_path,
"Personal STT services path": self.personal_stt_services_path,
"Personal TTS services path": self.personal_tts_services_path,
"Personal TTI services path": self.personal_tti_services_path,
"Personal TTM services path": self.personal_ttm_services_path,
}
return "\n".join([f"{category}: {path}" for category, path in directories.items()])
@ -180,6 +213,12 @@ class LollmsPaths:
self.personal_outputs_path.mkdir(parents=True, exist_ok=True)
self.personal_uploads_path.mkdir(parents=True, exist_ok=True)
self.personal_user_infos_path.mkdir(parents=True, exist_ok=True)
self.personal_services_path.mkdir(parents=True, exist_ok=True)
self.personal_stt_services_path.mkdir(parents=True, exist_ok=True)
self.personal_tts_services_path.mkdir(parents=True, exist_ok=True)
self.personal_tti_services_path.mkdir(parents=True, exist_ok=True)
self.personal_ttm_services_path.mkdir(parents=True, exist_ok=True)
self.personal_trainers_path.mkdir(parents=True, exist_ok=True)
self.custom_personalities_path.mkdir(parents=True, exist_ok=True)
self.custom_voices_path.mkdir(parents=True, exist_ok=True)

View File

@ -139,10 +139,6 @@ whisper_model: base
tts_output_device: 0
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
xtts_use_deepspeed: false
xtts_use_streaming_mode: true
auto_read: false
xtts_current_voice: null
xtts_current_language: en

View File

@ -9,7 +9,7 @@ description:
"""
from fastapi import APIRouter, Request
from pydantic import BaseModel, Field
from lollms_webui import LOLLMSWebUI
from lollms.server.elf_server import LOLLMSElfServer
from pydantic import BaseModel
from lollms.security import check_access
from starlette.responses import StreamingResponse
@ -24,7 +24,7 @@ import platform
# ----------------------- Defining router and main class ------------------------------
router = APIRouter()
lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
# ----------------------- voice ------------------------------

View File

@ -7,7 +7,7 @@ description:
"""
from fastapi import APIRouter, Request
from lollms_webui import LOLLMSWebUI
from lollms.server.elf_server import LOLLMSElfServer
from pydantic import BaseModel
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
@ -22,7 +22,7 @@ import platform
# ----------------------- Defining router and main class ------------------------------
router = APIRouter()
lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
class Identification(BaseModel):
client_id: str

View File

@ -8,7 +8,7 @@ description:
"""
from fastapi import APIRouter, Request
from lollms_webui import LOLLMSWebUI
from lollms.server.elf_server import LOLLMSElfServer
from pydantic import BaseModel
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
@ -43,7 +43,7 @@ class DeleteDiscussionParameters(BaseModel):
# ----------------------- Defining router and main class ------------------------------
router = APIRouter()
lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
@router.get("/list_discussions")

View File

@ -56,6 +56,16 @@ def list_stt_models():
ASCIIColors.yellow("Listing voices")
return {"voices":lollmsElfServer.stt.get_models()}
@router.get("/list_tts_models")
def list_tts_models():
if lollmsElfServer.config.headless_server_mode:
return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
ASCIIColors.yellow("Listing voices")
return {"voices":lollmsElfServer.tts.get_models()}
@router.post("/set_voice")
async def set_voice(request: Request):
@ -134,7 +144,11 @@ async def text2Audio(request: LollmsText2AudioRequest):
if lollmsElfServer.tts is None:
return {"url": None, "error":f"No TTS service is on"}
if lollmsElfServer.tts.ready:
response = lollmsElfServer.tts.tts_audio(request.text, request.voice, file_name_or_path=request.fn, use_threading=True)
if request.voice:
voice = request.voice
else:
voice = lollmsElfServer.config.xtts_current_voice
response = lollmsElfServer.tts.tts_audio(request.text, voice, file_name_or_path=request.fn, use_threading=True)
return response
else:
return {"url": None, "error":f"TTS service is not ready yet"}
@ -143,8 +157,8 @@ async def text2Audio(request: LollmsText2AudioRequest):
lollmsElfServer.error(ex)
return {"status":False,"error":str(ex)}
@router.post("/text2wav")
async def text2Wav(request: LollmsText2AudioRequest):
@router.post("/text2Wave")
async def text2Wave(request: LollmsText2AudioRequest):
"""
Executes Python code and returns the output.
@ -168,9 +182,14 @@ async def text2Wav(request: LollmsText2AudioRequest):
request.fn.parent.mkdir(exist_ok=True, parents=True)
try:
if request.voice:
voice = request.voice
else:
voice = lollmsElfServer.config.xtts_current_voice
# Get the JSON data from the POST request.
if lollmsElfServer.tts.ready:
response = lollmsElfServer.tts.tts_file(request.text, request.voice, file_name_or_path=request.fn)
response = lollmsElfServer.tts.tts_file(request.text, request.fn, voice)
return response
else:
return {"url": None, "error":f"TTS service is not ready yet"}
@ -215,11 +234,7 @@ def start_xtts():
lollmsElfServer.tts = LollmsXTTS(
lollmsElfServer,
voices_folder=voices_folder,
voice_samples_path=Path(__file__).parent/"voices",
xtts_base_url= lollmsElfServer.config.xtts_base_url,
use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode
voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path],
)
lollmsElfServer.HideBlockingMessage()
except Exception as ex:

View File

@ -1,404 +1,193 @@
# Title LollmsXTTS
# Licence: MIT
# Author : Paris Neo
# Adapted from the work of daswer123's xtts-api-server
# check it out : https://github.com/daswer123/xtts-api-server
# Here is a copy of the LICENCE https://github.com/daswer123/xtts-api-server/blob/main/LICENSE
# All rights are reserved
"""
project: lollms_tts
file: lollms_tts.py
author: ParisNeo
description:
This file hosts the LollmsXTTS service which provides text-to-speech functionalities using the TTS library.
"""
from pathlib import Path
import sys
from lollms.app import LollmsApplication
from lollms.paths import LollmsPaths
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
from lollms.utilities import PackageManager, find_first_available_file_index, add_period
import time
import io
import sys
import requests
import os
import base64
import subprocess
import time
import json
import re
import platform
import threading
from dataclasses import dataclass
from PIL import Image, PngImagePlugin
from enum import Enum
from typing import List, Dict, Any
import uuid
from ascii_colors import ASCIIColors, trace_exception
from lollms.paths import LollmsPaths
from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
from lollms.tts import LollmsTTS
import subprocess
import platform
from lollms.utilities import run_pip_in_env
from typing import List
import threading
import numpy as np
# Ensure required packages are installed
if not PackageManager.check_package_installed("TTS"):
PackageManager.install_or_update("TTS")
if not PackageManager.check_package_installed("simpleaudio"):
PackageManager.install_or_update("simpleaudio")
if not PackageManager.check_package_installed("wave"):
PackageManager.install_or_update("wave")
import wave
from TTS.api import TTS
import simpleaudio as sa
import time
from queue import Queue
import re
class LollmsXTTS(LollmsTTS):
def __init__(
self,
app:LollmsApplication,
xtts_base_url=None,
share=False,
max_retries=20,
voices_folder=None,
voice_samples_path="",
wait_for_service=True,
use_deep_speed=False,
use_streaming_mode = True
):
super().__init__("xtts",app)
def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]):
super().__init__("lollms_xtts", app)
self.generation_threads = {}
self.voices_folder = voices_folder
self.ready = False
if xtts_base_url=="" or xtts_base_url=="http://127.0.0.1:8020":
xtts_base_url = None
# Get the current directory
lollms_paths = app.lollms_paths
root_dir = lollms_paths.personal_path
self.voice_samples_path = voice_samples_path
self.use_deep_speed = use_deep_speed
self.use_streaming_mode = use_streaming_mode
# Store the path to the script
if xtts_base_url is None:
self.xtts_base_url = "http://127.0.0.1:8020"
if not LollmsXTTS.verify(lollms_paths):
LollmsXTTS.install(app)
else:
self.xtts_base_url = xtts_base_url
self.auto_xtts_url = self.xtts_base_url+"/sdapi/v1"
shared_folder = root_dir/"shared"
self.xtts_path = shared_folder / "xtts"
self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"]
self.stop_event = threading.Event()
# Show a cool LOGO using ASCIIColors
ASCIIColors.red(" __ ___ __ __ __ __ ___ _ ")
ASCIIColors.red(" / / /___\/ / / / /\/\ / _\ \ \/ / |_| |_ ___ ")
ASCIIColors.red(" / / // // / / / / \ \ \ _____\ /| __| __/ __| ")
ASCIIColors.red("/ /___/ \_// /___/ /___/ /\/\ \_\ \_____/ \| |_| |_\__ \ ")
ASCIIColors.red("\____/\___/\____/\____/\/ \/\__/ /_/\_\\__|\__|___/ ")
ASCIIColors.red(" Forked from daswer123's XTTS server")
ASCIIColors.red(" Integration in lollms by ParisNeo using daswer123's webapi")
ASCIIColors.red(" Address :",end="")
ASCIIColors.yellow(f"{self.xtts_base_url}")
self.output_folder = app.lollms_paths.personal_outputs_path/"audio_out"
self.output_folder.mkdir(parents=True, exist_ok=True)
# Load the TTS model
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
self.tts.to("cuda")
self.wav_queue = Queue()
self.play_obj = None
self.thread = None
self.ready = True
if not self.wait_for_service(1,False):
ASCIIColors.info("Loading lollms_xtts")
# Launch the Flask service using the appropriate script for the platform
self.process = self.run_xtts_api_server()
# Wait until the service is available at http://127.0.0.1:7860/
if wait_for_service:
self.wait_for_service()
else:
self.wait_for_service_in_another_thread(max_retries=max_retries)
def install(lollms_app:LollmsApplication):
ASCIIColors.green("XTTS installation started")
repo_url = "https://github.com/ParisNeo/xtts-api-server"
root_dir = lollms_app.lollms_paths.personal_path
shared_folder = root_dir/"shared"
xtts_path = shared_folder / "xtts"
# Step 1: Clone or update the repository
if os.path.exists(xtts_path):
print("Repository already exists. Pulling latest changes...")
try:
subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
except:
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
else:
print("Cloning repository...")
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
# Step 2: Create or update the Conda environment
if environment_exists("xtts"):
print("Conda environment 'xtts' already exists. Updating...")
# Here you might want to update the environment, e.g., update Python or dependencies
# This step is highly dependent on how you manage your Conda environments and might involve
# running `conda update` commands or similar.
else:
print("Creating Conda environment 'xtts'...")
create_conda_env("xtts", "3.10")
# Step 3: Install or update dependencies using your custom function
requirements_path = os.path.join(xtts_path, "requirements.txt")
run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
run_pip_in_env("xtts", f"install -e {xtts_path}", cwd=xtts_path)
# Step 4: Launch the server
# Assuming the server can be started with a Python script in the cloned repository
print("Launching XTTS API server...")
run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
print("XTTS API server setup and launch completed.")
ASCIIColors.cyan("Done")
ASCIIColors.cyan("Installing xtts-api-server")
ASCIIColors.green("XTTS server installed successfully")
def install(lollms_app: LollmsApplication):
ASCIIColors.green("LollmsXTTS installation started")
# Here you can perform installation of needed things, or create configuration files or download needed assets etc.
run_pip_in_env("TTS")
run_pip_in_env("simpleaudio")
@staticmethod
def verify(lollms_paths:LollmsPaths)->bool:
# Clone repository
root_dir = lollms_paths.personal_path
shared_folder = root_dir/"shared"
xtts_path = shared_folder / "xtts"
return xtts_path.exists()
@staticmethod
def get(app: LollmsApplication) -> 'LollmsXTTS':
root_dir = app.lollms_paths.personal_path
shared_folder = root_dir/"shared"
xtts_path = shared_folder / "xtts"
xtts_script_path = xtts_path / "lollms_xtts.py"
git_pull(xtts_path)
if xtts_script_path.exists():
ASCIIColors.success("lollms_xtts found.")
ASCIIColors.success("Loading source file...",end="")
# use importlib to load the module from the file path
from lollms.services.xtts.lollms_xtts import LollmsXTTS
ASCIIColors.success("ok")
return LollmsXTTS
def run_xtts_api_server(self):
root_dir = self.app.lollms_paths.personal_path
shared_folder = root_dir/"shared"
xtts_path = shared_folder / "xtts"
# Get the path to the current Python interpreter
ASCIIColors.yellow("Loading XTTS ")
options= ""
if self.use_deep_speed:
options += " --deepspeed"
if self.use_streaming_mode:
options += " --streaming-mode --streaming-mode-improve --stream-play-sync"
process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", cwd=xtts_path, wait= False)
return process
def wait_for_service_in_another_thread(self, max_retries=150, show_warning=True):
thread = threading.Thread(target=self.wait_for_service, args=(max_retries, show_warning))
thread.start()
return thread
def update_settings(self):
def verify(lollms_paths: LollmsPaths) -> bool:
# Verify that the service is installed either by verifying the libraries are installed or that some files or folders exist
try:
settings = {
"stream_chunk_size": int(self.app.config.xtts_stream_chunk_size),
"temperature": float(self.app.config.xtts_temperature),
"speed": float(self.app.config.xtts_speed),
"length_penalty": float(self.app.config.xtts_length_penalty),
"repetition_penalty": float(self.app.config.xtts_repetition_penalty),
"top_p": float(self.app.config.xtts_top_p),
"top_k": int(self.app.config.xtts_top_k),
"enable_text_splitting": bool(self.app.config.xtts_enable_text_splitting)
}
print("set_tts_settings")
print(f"{settings}")
response = requests.post(f"{self.xtts_base_url}/set_tts_settings", settings,headers={
'accept': 'application/json',
'Content-Type': 'application/json'
})
if response.status_code == 200:
ASCIIColors.success("XTTS updated successfully")
except Exception as ex:
trace_exception(ex)
pass
def wait_for_service(self, max_retries = 150, show_warning=True):
print(f"Waiting for xtts service (max_retries={max_retries})")
url = f"{self.xtts_base_url}/languages"
# Adjust this value as needed
retries = 0
while retries < max_retries or max_retries<0:
try:
response = requests.get(url)
if response.status_code == 200:
self.update_settings()
print(f"voices_folder is {self.voices_folder}.")
self.ready = True
if self.voices_folder is not None:
print("Generating sample audio.")
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
try:
self.tts_audio("x t t s is ready",voice_file[0].stem)
except Exception as ex:
return True
print("Service is available.")
if self.app is not None:
self.app.success("XTTS Service is now available.")
return True
except:
pass
retries += 1
ASCIIColors.yellow("Waiting for xtts...")
time.sleep(5)
if show_warning:
print("Service did not become available within the given time.")
if self.app is not None:
self.app.error("XTTS Service did not become available within the given time.")
return False
def set_speaker_folder(self, speaker_folder):
url = f"{self.xtts_base_url}/set_speaker_folder"
# Define the request body
payload = {
"speaker_folder": str(speaker_folder)
}
# Send the POST request
response = requests.post(url, json=payload)
# Check the response status code
if response.status_code == 200:
print("Request successful")
import TTS
import simpleaudio
return True
# You can access the response data using response.json()
else:
print("Request failed with status code:", response.status_code)
except ImportError:
return False
def tts_file(self, text, file_name_or_path, speaker=None, language="en")->str:
text = self.clean_text(text)
url = f"{self.xtts_base_url}/tts_to_file"
# Define the request body
payload = {
"text": text,
"speaker_wav": speaker,
"language": language,
"file_name_or_path": file_name_or_path
}
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
# Send the POST request
response = requests.post(url, headers=headers, data=json.dumps(payload))
# Check the response status code
if response.status_code == 200:
print("Request successful")
# You can access the response data using response.json()
@staticmethod
def get(app: LollmsApplication) -> 'LollmsXTTS':
# Verify if the service is installed and if true then return an instance of LollmsXTTS
if LollmsXTTS.verify(app.lollms_paths):
return LollmsXTTS(app, app.lollms_paths.custom_voices_path)
else:
print("Request failed with status code:", response.status_code)
return file_name_or_path
def tts_audio(self, text, speaker=None, file_name_or_path:Path|str=None, language="en", use_threading=False):
voice=self.app.config.xtts_current_voice if speaker is None else speaker
index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
if voice is None:
voice = "main_voice"
self.app.info("Starting to build voice")
try:
from lollms.services.xtts.lollms_xtts import LollmsXTTS
# If the personality has a voice, then use it
personality_audio:Path = self.app.personality.personality_package_path/"audio"
if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
voices_folder = personality_audio
elif voice!="main_voice":
voices_folder = self.app.lollms_paths.custom_voices_path
else:
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
language = self.app.config.xtts_current_language# convert_language_name()
self.set_speaker_folder(voices_folder)
preprocessed_text= add_period(text)
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
if len(voice_file)==0:
return {"status":False,"error":"Voice not found"}
self.xtts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language, use_threading=use_threading)
except Exception as ex:
trace_exception(ex)
return {"status":False,"error":f"{ex}"}
def xtts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
text = self.clean_text(text)
def tts2_audio_th(thread_uid=None):
url = f"{self.xtts_base_url}/tts_to_audio"
# Define the request body
payload = {
"text": text,
"speaker_wav": speaker,
"language": language
}
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
# Send the POST request
response = requests.post(url, headers=headers, data=json.dumps(payload))
if response.status_code == 200:
print("Request successful")
print("Response headers:", response.headers)
# Basic logging for debugging
print("First 100 bytes of response content:", response.content[:100])
if file_name_or_path is not None:
try:
with open(self.output_folder / file_name_or_path, 'wb') as file:
# Write the binary content to the file
file.write(response.content)
print(f"File {file_name_or_path} written successfully.")
except Exception as e:
print(f"Failed to write the file. Error: {e}")
else:
print("Request failed with status code:", response.status_code)
if thread_uid:
self.generation_threads.pop(thread_uid, None)
if use_threading:
thread_uid = str(uuid.uuid4())
thread = threading.Thread(target=tts2_audio_th, args=(thread_uid,))
self.generation_threads[thread_uid]=thread
self.thread = thread
thread.start()
ASCIIColors.green("Generation started")
return thread
else:
return tts2_audio_th()
def stop(self):
url = f"{self.xtts_base_url}/stop_streaming"
# Define the request body
payload = {
}
headers = {
'accept': 'application/json',
'Content-Type': 'application/json'
}
# Send the POST request
response = requests.post(url, headers=headers, data=json.dumps(payload))
if response.status_code == 200:
print("Request successful")
raise Exception("LollmsXTTS service is not installed properly.")
def get_speaker_wav(self, speaker) -> Path:
"""
Searches for the speaker file in the specified folders.
:param speaker: The name of the speaker file (without extension).
:return: The path to the speaker file if found.
:raises FileNotFoundError: If the speaker file is not found in any of the folders.
"""
for folder in self.voices_folders:
potential_speaker_wav = Path(folder) / f"{speaker}.wav"
if potential_speaker_wav.exists():
return potential_speaker_wav
raise FileNotFoundError(f"Speaker file '{speaker}.wav' not found in any of the specified folders.")
def tts_file(self, text, file_name_or_path, speaker=None, language="en") -> str:
speaker_wav = None
if speaker:
speaker_wav = self.get_speaker_wav(speaker)
else:
speaker_wav = self.get_speaker_wav("main_voice")
self.tts.tts_to_file(text=text, file_path=file_name_or_path, speaker_wav=speaker_wav, language=language)
return file_name_or_path
def tts_audio(self, text, speaker=None, file_name_or_path: Path | str | None = None, language="en", use_threading=False):
# Split text into sentences
sentences = re.split(r'(?<=[.!?]) +', text)
if speaker:
speaker_wav = self.get_speaker_wav(speaker)
else:
speaker_wav = self.get_speaker_wav("main_voice")
if use_threading:
self.stop_event.clear()
generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
generator_thread.start()
self.thread = threading.Thread(target=self._play_audio)
self.thread.start()
else:
self.stop_event.clear()
generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
generator_thread.start()
self._play_audio()
def _generate_audio(self, sentences, speaker_wav, language, file_name_or_path):
wav_data = []
for sentence in sentences:
if self.stop_event.is_set():
break
wav = self.tts.tts(text=sentence, speaker_wav=speaker_wav, language=language)
wav_array = np.array(wav, dtype=np.float32)
wav_array = np.int16(wav_array * 32767)
self.wav_queue.put(wav_array)
wav_data.append(wav_array)
self.wav_queue.put(None) # Signal that generation is done
if file_name_or_path:
self._save_wav(wav_data, file_name_or_path)
def _play_audio(self):
buffered_sentences = 0
buffer = []
while not self.stop_event.is_set():
wav = self.wav_queue.get()
if wav is None:
# Play any remaining buffered sentences
for buffered_wav in buffer:
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
self.play_obj.wait_done()
time.sleep(0.5) # Pause between sentences
ASCIIColors.green("Audio done")
break
buffer.append(wav)
buffered_sentences += 1
if buffered_sentences >= 2:
for buffered_wav in buffer:
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
self.play_obj.wait_done()
time.sleep(0.5) # Pause between sentences
buffer = []
buffered_sentences = 0
def _save_wav(self, wav_data, file_name_or_path):
with wave.open(str(file_name_or_path), 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(22050)
for wav in wav_data:
wf.writeframes(wav.tobytes())
def stop(self):
self.stop_event.set()
if self.thread and self.thread.is_alive():
self.thread.join()
if self.play_obj:
self.play_obj.stop()
def get_voices(self):
# List voices from the folder
ASCIIColors.yellow("Listing voices")
voices=["main_voice"]
voices_dir:Path=self.app.lollms_paths.custom_voices_path
voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
voices = []
for voices_folder in self.voices_folders:
voices += [v.stem for v in voices_folder.iterdir() if v.suffix == ".wav"]
return voices
if __name__ == "__main__":
# Here do some example
app = LollmsApplication()
lollms_xtts_service = LollmsXTTS.get(app)
lollms_xtts_service.tts_file("Hello, this is a test.", "output.wav", speaker="ParisNeo_Original_voice", language="en")

View File

@ -143,7 +143,17 @@ class LollmsTTS:
list: A list of available voices.
"""
return self.voices
def get_models(self):
"""
Retrieves the available models for TTS.
Returns:
list: A list of available models.
"""
return self.models
def get_devices(self):
devices = sd.query_devices()
@ -152,7 +162,6 @@ class LollmsTTS:
"device_names": [device['name'] for device in devices if device["max_output_channels"]>0],
"device_indexes": [device['index'] for device in devices if device["max_output_channels"]>0]
}
@staticmethod
def clean_text(text):
# Remove HTML tags

View File

@ -78,8 +78,6 @@ auto_show_browser: true
copy_to_clipboard_add_all_details: false
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
auto_read: false
xtts_current_voice: null
xtts_current_language: en