diff --git a/configs/config.yaml b/configs/config.yaml index 2ca4891..3dd9dd6 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -142,10 +142,6 @@ whisper_model: base tts_output_device: 0 # Voice service -xtts_enable: false -xtts_base_url: http://localhost:8020 -xtts_use_deepspeed: false -xtts_use_streaming_mode: true auto_read: false xtts_current_voice: null xtts_current_language: en diff --git a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml index 53bec80..b9e36f5 100644 --- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml @@ -78,8 +78,6 @@ auto_show_browser: true copy_to_clipboard_add_all_details: false # Voice service -xtts_enable: false -xtts_base_url: http://localhost:8020 auto_read: false xtts_current_voice: null xtts_current_language: en diff --git a/elf_test_cfg/personal/configs/lollms_elf_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_config.yaml index 1dcddc4..e04f530 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml @@ -78,8 +78,6 @@ auto_show_browser: true copy_to_clipboard_add_all_details: false # Voice service -xtts_enable: false -xtts_base_url: http://localhost:8020 auto_read: false xtts_current_voice: null xtts_current_language: en diff --git a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml index 53bec80..b9e36f5 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml @@ -78,8 +78,6 @@ auto_show_browser: true copy_to_clipboard_add_all_details: false # Voice service -xtts_enable: false -xtts_base_url: http://localhost:8020 auto_read: false xtts_current_voice: null xtts_current_language: en diff --git a/lollms/app.py b/lollms/app.py index ce2f972..a616efd 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -341,14 +341,10 @@ class LollmsApplication(LoLLMsCom): self.xtts = LollmsXTTS( self, - voices_folder=voices_folder, - voice_samples_path=self.lollms_paths.custom_voices_path, - xtts_base_url=self.config.xtts_base_url, - wait_for_service=False, - use_deep_speed=self.config.xtts_use_deepspeed, - use_streaming_mode=self.config.xtts_use_streaming_mode + voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], ) - except: + except Exception as ex: + trace_exception(ex) self.warning(f"Couldn't load XTTS") ASCIIColors.blue("Loading local TTI services") @@ -458,14 +454,10 @@ class LollmsApplication(LoLLMsCom): self.xtts = LollmsXTTS( self, - voices_folder=voices_folder, - voice_samples_path=self.lollms_paths.custom_voices_path, - xtts_base_url=self.config.xtts_base_url, - wait_for_service=False, - use_deep_speed=self.config.xtts_use_deepspeed, - use_streaming_mode=self.config.xtts_use_streaming_mode + voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], ) - except: + except Exception as ex: + trace_exception(ex) self.warning(f"Couldn't load XTTS") ASCIIColors.blue("Loading local TTI services") diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml index 2ca4891..3dd9dd6 100644 --- a/lollms/configs/config.yaml +++ b/lollms/configs/config.yaml @@ -142,10 +142,6 @@ whisper_model: base tts_output_device: 0 # Voice service -xtts_enable: false -xtts_base_url: http://localhost:8020 -xtts_use_deepspeed: false -xtts_use_streaming_mode: true auto_read: false xtts_current_voice: null xtts_current_language: en diff --git a/lollms/paths.py b/lollms/paths.py index d80c280..0ffcd24 100644 --- a/lollms/paths.py +++ b/lollms/paths.py @@ -72,6 +72,14 @@ class LollmsPaths: self.personal_outputs_path = self.personal_path / "outputs" self.personal_user_infos_path = self.personal_path / "user_infos" + + self.personal_services_path = self.personal_path / "services" + self.personal_stt_services_path = self.personal_services_path / "stt" + self.personal_tts_services_path = self.personal_services_path / "tts" + self.personal_tti_services_path = self.personal_services_path / "tti" + self.personal_ttm_services_path = self.personal_services_path / "ttm" + + self.personal_trainers_path = self.personal_path / "trainers" self.gptqlora_path = self.personal_trainers_path / "gptqlora" @@ -117,6 +125,25 @@ class LollmsPaths: ASCIIColors.yellow(f"{self.personal_models_path}") ASCIIColors.red("personal_user_infos_path:",end="") ASCIIColors.yellow(f"{self.personal_user_infos_path}") + + ASCIIColors.red("personal_services_path:",end="") + ASCIIColors.yellow(f"{self.personal_services_path}") + + + ASCIIColors.red("personal_stt_services_path:", end="") + ASCIIColors.yellow(f"{self.personal_stt_services_path}") + + ASCIIColors.red("personal_tts_services_path:", end="") + ASCIIColors.yellow(f"{self.personal_tts_services_path}") + + ASCIIColors.red("personal_tti_services_path:", end="") + ASCIIColors.yellow(f"{self.personal_tti_services_path}") + + ASCIIColors.red("personal_ttm_services_path:", end="") + ASCIIColors.yellow(f"{self.personal_ttm_services_path}") + + + ASCIIColors.red("personal_trainers_path:",end="") ASCIIColors.yellow(f"{self.personal_trainers_path}") ASCIIColors.red("personal_trainers_path:",end="") @@ -162,6 +189,12 @@ class LollmsPaths: "Personal user infos path": self.personal_user_infos_path, "Personal trainers path": self.personal_trainers_path, "Personal gptqlora trainer path": self.gptqlora_path, + + "Personal services path": self.personal_services_path, + "Personal STT services path": self.personal_stt_services_path, + "Personal TTS services path": self.personal_tts_services_path, + "Personal TTI services path": self.personal_tti_services_path, + "Personal TTM services path": self.personal_ttm_services_path, } return "\n".join([f"{category}: {path}" for category, path in directories.items()]) @@ -180,6 +213,12 @@ class LollmsPaths: self.personal_outputs_path.mkdir(parents=True, exist_ok=True) self.personal_uploads_path.mkdir(parents=True, exist_ok=True) self.personal_user_infos_path.mkdir(parents=True, exist_ok=True) + + self.personal_services_path.mkdir(parents=True, exist_ok=True) + self.personal_stt_services_path.mkdir(parents=True, exist_ok=True) + self.personal_tts_services_path.mkdir(parents=True, exist_ok=True) + self.personal_tti_services_path.mkdir(parents=True, exist_ok=True) + self.personal_ttm_services_path.mkdir(parents=True, exist_ok=True) self.personal_trainers_path.mkdir(parents=True, exist_ok=True) self.custom_personalities_path.mkdir(parents=True, exist_ok=True) self.custom_voices_path.mkdir(parents=True, exist_ok=True) diff --git a/lollms/server/configs/config.yaml b/lollms/server/configs/config.yaml index e08c9a7..a7c26d1 100644 --- a/lollms/server/configs/config.yaml +++ b/lollms/server/configs/config.yaml @@ -139,10 +139,6 @@ whisper_model: base tts_output_device: 0 # Voice service -xtts_enable: false -xtts_base_url: http://localhost:8020 -xtts_use_deepspeed: false -xtts_use_streaming_mode: true auto_read: false xtts_current_voice: null xtts_current_language: en diff --git a/lollms/server/endpoints/lollms_comfyui.py b/lollms/server/endpoints/lollms_comfyui.py index 1233ac5..1e90a92 100644 --- a/lollms/server/endpoints/lollms_comfyui.py +++ b/lollms/server/endpoints/lollms_comfyui.py @@ -9,7 +9,7 @@ description: """ from fastapi import APIRouter, Request from pydantic import BaseModel, Field -from lollms_webui import LOLLMSWebUI +from lollms.server.elf_server import LOLLMSElfServer from pydantic import BaseModel from lollms.security import check_access from starlette.responses import StreamingResponse @@ -24,7 +24,7 @@ import platform # ----------------------- Defining router and main class ------------------------------ router = APIRouter() -lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance() +lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance() # ----------------------- voice ------------------------------ diff --git a/lollms/server/endpoints/lollms_diffusers.py b/lollms/server/endpoints/lollms_diffusers.py index 0580625..566f3d6 100644 --- a/lollms/server/endpoints/lollms_diffusers.py +++ b/lollms/server/endpoints/lollms_diffusers.py @@ -7,7 +7,7 @@ description: """ from fastapi import APIRouter, Request -from lollms_webui import LOLLMSWebUI +from lollms.server.elf_server import LOLLMSElfServer from pydantic import BaseModel from starlette.responses import StreamingResponse from lollms.types import MSG_TYPE @@ -22,7 +22,7 @@ import platform # ----------------------- Defining router and main class ------------------------------ router = APIRouter() -lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance() +lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance() class Identification(BaseModel): client_id: str diff --git a/lollms/server/endpoints/lollms_discussion.py b/lollms/server/endpoints/lollms_discussion.py index 50a6355..3efc7dd 100644 --- a/lollms/server/endpoints/lollms_discussion.py +++ b/lollms/server/endpoints/lollms_discussion.py @@ -8,7 +8,7 @@ description: """ from fastapi import APIRouter, Request -from lollms_webui import LOLLMSWebUI +from lollms.server.elf_server import LOLLMSElfServer from pydantic import BaseModel from starlette.responses import StreamingResponse from lollms.types import MSG_TYPE @@ -43,7 +43,7 @@ class DeleteDiscussionParameters(BaseModel): # ----------------------- Defining router and main class ------------------------------ router = APIRouter() -lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance() +lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance() @router.get("/list_discussions") diff --git a/lollms/server/endpoints/lollms_tts.py b/lollms/server/endpoints/lollms_tts.py index d24ece5..2cf71c2 100644 --- a/lollms/server/endpoints/lollms_tts.py +++ b/lollms/server/endpoints/lollms_tts.py @@ -56,6 +56,16 @@ def list_stt_models(): ASCIIColors.yellow("Listing voices") return {"voices":lollmsElfServer.stt.get_models()} +@router.get("/list_tts_models") +def list_tts_models(): + if lollmsElfServer.config.headless_server_mode: + return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"} + + if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1": + return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"} + + ASCIIColors.yellow("Listing voices") + return {"voices":lollmsElfServer.tts.get_models()} @router.post("/set_voice") async def set_voice(request: Request): @@ -134,7 +144,11 @@ async def text2Audio(request: LollmsText2AudioRequest): if lollmsElfServer.tts is None: return {"url": None, "error":f"No TTS service is on"} if lollmsElfServer.tts.ready: - response = lollmsElfServer.tts.tts_audio(request.text, request.voice, file_name_or_path=request.fn, use_threading=True) + if request.voice: + voice = request.voice + else: + voice = lollmsElfServer.config.xtts_current_voice + response = lollmsElfServer.tts.tts_audio(request.text, voice, file_name_or_path=request.fn, use_threading=True) return response else: return {"url": None, "error":f"TTS service is not ready yet"} @@ -143,8 +157,8 @@ async def text2Audio(request: LollmsText2AudioRequest): lollmsElfServer.error(ex) return {"status":False,"error":str(ex)} -@router.post("/text2wav") -async def text2Wav(request: LollmsText2AudioRequest): +@router.post("/text2Wave") +async def text2Wave(request: LollmsText2AudioRequest): """ Executes Python code and returns the output. @@ -168,9 +182,14 @@ async def text2Wav(request: LollmsText2AudioRequest): request.fn.parent.mkdir(exist_ok=True, parents=True) try: + if request.voice: + voice = request.voice + else: + voice = lollmsElfServer.config.xtts_current_voice + # Get the JSON data from the POST request. if lollmsElfServer.tts.ready: - response = lollmsElfServer.tts.tts_file(request.text, request.voice, file_name_or_path=request.fn) + response = lollmsElfServer.tts.tts_file(request.text, request.fn, voice) return response else: return {"url": None, "error":f"TTS service is not ready yet"} @@ -215,11 +234,7 @@ def start_xtts(): lollmsElfServer.tts = LollmsXTTS( lollmsElfServer, - voices_folder=voices_folder, - voice_samples_path=Path(__file__).parent/"voices", - xtts_base_url= lollmsElfServer.config.xtts_base_url, - use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed, - use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode + voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path], ) lollmsElfServer.HideBlockingMessage() except Exception as ex: diff --git a/lollms/services/xtts/lollms_xtts.py b/lollms/services/xtts/lollms_xtts.py index bbd0cf2..95c9dac 100644 --- a/lollms/services/xtts/lollms_xtts.py +++ b/lollms/services/xtts/lollms_xtts.py @@ -1,404 +1,193 @@ -# Title LollmsXTTS -# Licence: MIT -# Author : Paris Neo -# Adapted from the work of daswer123's xtts-api-server -# check it out : https://github.com/daswer123/xtts-api-server -# Here is a copy of the LICENCE https://github.com/daswer123/xtts-api-server/blob/main/LICENSE -# All rights are reserved +""" +project: lollms_tts +file: lollms_tts.py +author: ParisNeo +description: + This file hosts the LollmsXTTS service which provides text-to-speech functionalities using the TTS library. +""" from pathlib import Path -import sys from lollms.app import LollmsApplication from lollms.paths import LollmsPaths -from lollms.config import TypedConfig, ConfigTemplate, BaseConfig from lollms.utilities import PackageManager, find_first_available_file_index, add_period -import time -import io -import sys -import requests -import os -import base64 -import subprocess -import time -import json -import re -import platform -import threading -from dataclasses import dataclass -from PIL import Image, PngImagePlugin -from enum import Enum -from typing import List, Dict, Any -import uuid - from ascii_colors import ASCIIColors, trace_exception -from lollms.paths import LollmsPaths -from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists from lollms.tts import LollmsTTS -import subprocess -import platform +from lollms.utilities import run_pip_in_env +from typing import List +import threading +import numpy as np +# Ensure required packages are installed +if not PackageManager.check_package_installed("TTS"): + PackageManager.install_or_update("TTS") +if not PackageManager.check_package_installed("simpleaudio"): + PackageManager.install_or_update("simpleaudio") +if not PackageManager.check_package_installed("wave"): + PackageManager.install_or_update("wave") + +import wave +from TTS.api import TTS +import simpleaudio as sa +import time +from queue import Queue +import re class LollmsXTTS(LollmsTTS): - def __init__( - self, - app:LollmsApplication, - xtts_base_url=None, - share=False, - max_retries=20, - voices_folder=None, - voice_samples_path="", - wait_for_service=True, - use_deep_speed=False, - use_streaming_mode = True - ): - super().__init__("xtts",app) + def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]): + super().__init__("lollms_xtts", app) self.generation_threads = {} - self.voices_folder = voices_folder - self.ready = False - if xtts_base_url=="" or xtts_base_url=="http://127.0.0.1:8020": - xtts_base_url = None - # Get the current directory - lollms_paths = app.lollms_paths - root_dir = lollms_paths.personal_path - self.voice_samples_path = voice_samples_path - self.use_deep_speed = use_deep_speed - self.use_streaming_mode = use_streaming_mode - - # Store the path to the script - if xtts_base_url is None: - self.xtts_base_url = "http://127.0.0.1:8020" - if not LollmsXTTS.verify(lollms_paths): - LollmsXTTS.install(app) - else: - self.xtts_base_url = xtts_base_url - - self.auto_xtts_url = self.xtts_base_url+"/sdapi/v1" - shared_folder = root_dir/"shared" - self.xtts_path = shared_folder / "xtts" + self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"] + self.stop_event = threading.Event() + # Show a cool LOGO using ASCIIColors ASCIIColors.red(" __ ___ __ __ __ __ ___ _ ") ASCIIColors.red(" / / /___\/ / / / /\/\ / _\ \ \/ / |_| |_ ___ ") ASCIIColors.red(" / / // // / / / / \ \ \ _____\ /| __| __/ __| ") ASCIIColors.red("/ /___/ \_// /___/ /___/ /\/\ \_\ \_____/ \| |_| |_\__ \ ") ASCIIColors.red("\____/\___/\____/\____/\/ \/\__/ /_/\_\\__|\__|___/ ") - - ASCIIColors.red(" Forked from daswer123's XTTS server") - ASCIIColors.red(" Integration in lollms by ParisNeo using daswer123's webapi") - ASCIIColors.red(" Address :",end="") - ASCIIColors.yellow(f"{self.xtts_base_url}") - self.output_folder = app.lollms_paths.personal_outputs_path/"audio_out" - self.output_folder.mkdir(parents=True, exist_ok=True) + # Load the TTS model + self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2") + self.tts.to("cuda") + self.wav_queue = Queue() + self.play_obj = None + self.thread = None + self.ready = True - if not self.wait_for_service(1,False): - ASCIIColors.info("Loading lollms_xtts") - # Launch the Flask service using the appropriate script for the platform - self.process = self.run_xtts_api_server() - - # Wait until the service is available at http://127.0.0.1:7860/ - if wait_for_service: - self.wait_for_service() - else: - self.wait_for_service_in_another_thread(max_retries=max_retries) - - def install(lollms_app:LollmsApplication): - ASCIIColors.green("XTTS installation started") - repo_url = "https://github.com/ParisNeo/xtts-api-server" - root_dir = lollms_app.lollms_paths.personal_path - shared_folder = root_dir/"shared" - xtts_path = shared_folder / "xtts" - - # Step 1: Clone or update the repository - if os.path.exists(xtts_path): - print("Repository already exists. Pulling latest changes...") - try: - subprocess.run(["git", "-C", xtts_path, "pull"], check=True) - except: - subprocess.run(["git", "clone", repo_url, xtts_path], check=True) - - else: - print("Cloning repository...") - subprocess.run(["git", "clone", repo_url, xtts_path], check=True) - - # Step 2: Create or update the Conda environment - if environment_exists("xtts"): - print("Conda environment 'xtts' already exists. Updating...") - # Here you might want to update the environment, e.g., update Python or dependencies - # This step is highly dependent on how you manage your Conda environments and might involve - # running `conda update` commands or similar. - else: - print("Creating Conda environment 'xtts'...") - create_conda_env("xtts", "3.10") - - # Step 3: Install or update dependencies using your custom function - requirements_path = os.path.join(xtts_path, "requirements.txt") - run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path) - run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path) - run_pip_in_env("xtts", f"install -e {xtts_path}", cwd=xtts_path) - - # Step 4: Launch the server - # Assuming the server can be started with a Python script in the cloned repository - print("Launching XTTS API server...") - run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path) - - print("XTTS API server setup and launch completed.") - ASCIIColors.cyan("Done") - ASCIIColors.cyan("Installing xtts-api-server") - ASCIIColors.green("XTTS server installed successfully") + def install(lollms_app: LollmsApplication): + ASCIIColors.green("LollmsXTTS installation started") + # Here you can perform installation of needed things, or create configuration files or download needed assets etc. + run_pip_in_env("TTS") + run_pip_in_env("simpleaudio") @staticmethod - def verify(lollms_paths:LollmsPaths)->bool: - # Clone repository - root_dir = lollms_paths.personal_path - shared_folder = root_dir/"shared" - xtts_path = shared_folder / "xtts" - return xtts_path.exists() - - @staticmethod - def get(app: LollmsApplication) -> 'LollmsXTTS': - root_dir = app.lollms_paths.personal_path - shared_folder = root_dir/"shared" - xtts_path = shared_folder / "xtts" - xtts_script_path = xtts_path / "lollms_xtts.py" - git_pull(xtts_path) - - if xtts_script_path.exists(): - ASCIIColors.success("lollms_xtts found.") - ASCIIColors.success("Loading source file...",end="") - # use importlib to load the module from the file path - from lollms.services.xtts.lollms_xtts import LollmsXTTS - ASCIIColors.success("ok") - return LollmsXTTS - - def run_xtts_api_server(self): - root_dir = self.app.lollms_paths.personal_path - shared_folder = root_dir/"shared" - xtts_path = shared_folder / "xtts" - - # Get the path to the current Python interpreter - ASCIIColors.yellow("Loading XTTS ") - options= "" - if self.use_deep_speed: - options += " --deepspeed" - if self.use_streaming_mode: - options += " --streaming-mode --streaming-mode-improve --stream-play-sync" - process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", cwd=xtts_path, wait= False) - return process - - def wait_for_service_in_another_thread(self, max_retries=150, show_warning=True): - thread = threading.Thread(target=self.wait_for_service, args=(max_retries, show_warning)) - thread.start() - return thread - - def update_settings(self): + def verify(lollms_paths: LollmsPaths) -> bool: + # Verify that the service is installed either by verifying the libraries are installed or that some files or folders exist try: - settings = { - "stream_chunk_size": int(self.app.config.xtts_stream_chunk_size), - "temperature": float(self.app.config.xtts_temperature), - "speed": float(self.app.config.xtts_speed), - "length_penalty": float(self.app.config.xtts_length_penalty), - "repetition_penalty": float(self.app.config.xtts_repetition_penalty), - "top_p": float(self.app.config.xtts_top_p), - "top_k": int(self.app.config.xtts_top_k), - "enable_text_splitting": bool(self.app.config.xtts_enable_text_splitting) - } - print("set_tts_settings") - print(f"{settings}") - response = requests.post(f"{self.xtts_base_url}/set_tts_settings", settings,headers={ - 'accept': 'application/json', - 'Content-Type': 'application/json' - }) - if response.status_code == 200: - ASCIIColors.success("XTTS updated successfully") - except Exception as ex: - trace_exception(ex) - pass - - def wait_for_service(self, max_retries = 150, show_warning=True): - print(f"Waiting for xtts service (max_retries={max_retries})") - url = f"{self.xtts_base_url}/languages" - # Adjust this value as needed - retries = 0 - - while retries < max_retries or max_retries<0: - try: - response = requests.get(url) - if response.status_code == 200: - self.update_settings() - print(f"voices_folder is {self.voices_folder}.") - self.ready = True - if self.voices_folder is not None: - print("Generating sample audio.") - voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"] - try: - self.tts_audio("x t t s is ready",voice_file[0].stem) - except Exception as ex: - return True - print("Service is available.") - if self.app is not None: - self.app.success("XTTS Service is now available.") - return True - except: - pass - - retries += 1 - ASCIIColors.yellow("Waiting for xtts...") - time.sleep(5) - - if show_warning: - print("Service did not become available within the given time.") - if self.app is not None: - self.app.error("XTTS Service did not become available within the given time.") - return False - - def set_speaker_folder(self, speaker_folder): - url = f"{self.xtts_base_url}/set_speaker_folder" - - # Define the request body - payload = { - "speaker_folder": str(speaker_folder) - } - - # Send the POST request - response = requests.post(url, json=payload) - - # Check the response status code - if response.status_code == 200: - print("Request successful") + import TTS + import simpleaudio return True - # You can access the response data using response.json() - else: - print("Request failed with status code:", response.status_code) + except ImportError: return False - def tts_file(self, text, file_name_or_path, speaker=None, language="en")->str: - text = self.clean_text(text) - url = f"{self.xtts_base_url}/tts_to_file" - - # Define the request body - payload = { - "text": text, - "speaker_wav": speaker, - "language": language, - "file_name_or_path": file_name_or_path - } - headers = { - 'accept': 'application/json', - 'Content-Type': 'application/json' - } - - # Send the POST request - response = requests.post(url, headers=headers, data=json.dumps(payload)) - - # Check the response status code - if response.status_code == 200: - print("Request successful") - # You can access the response data using response.json() + @staticmethod + def get(app: LollmsApplication) -> 'LollmsXTTS': + # Verify if the service is installed and if true then return an instance of LollmsXTTS + if LollmsXTTS.verify(app.lollms_paths): + return LollmsXTTS(app, app.lollms_paths.custom_voices_path) else: - print("Request failed with status code:", response.status_code) - - return file_name_or_path - - def tts_audio(self, text, speaker=None, file_name_or_path:Path|str=None, language="en", use_threading=False): - voice=self.app.config.xtts_current_voice if speaker is None else speaker - index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav") - output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path - if voice is None: - voice = "main_voice" - self.app.info("Starting to build voice") - try: - from lollms.services.xtts.lollms_xtts import LollmsXTTS - # If the personality has a voice, then use it - personality_audio:Path = self.app.personality.personality_package_path/"audio" - if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0: - voices_folder = personality_audio - elif voice!="main_voice": - voices_folder = self.app.lollms_paths.custom_voices_path - else: - voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices" - language = self.app.config.xtts_current_language# convert_language_name() - self.set_speaker_folder(voices_folder) - preprocessed_text= add_period(text) - voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"] - if len(voice_file)==0: - return {"status":False,"error":"Voice not found"} - self.xtts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language, use_threading=use_threading) - - except Exception as ex: - trace_exception(ex) - return {"status":False,"error":f"{ex}"} - - def xtts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False): - text = self.clean_text(text) - def tts2_audio_th(thread_uid=None): - url = f"{self.xtts_base_url}/tts_to_audio" - - # Define the request body - payload = { - "text": text, - "speaker_wav": speaker, - "language": language - } - headers = { - 'accept': 'application/json', - 'Content-Type': 'application/json' - } - - # Send the POST request - response = requests.post(url, headers=headers, data=json.dumps(payload)) - - if response.status_code == 200: - print("Request successful") - print("Response headers:", response.headers) - - # Basic logging for debugging - print("First 100 bytes of response content:", response.content[:100]) - - if file_name_or_path is not None: - try: - with open(self.output_folder / file_name_or_path, 'wb') as file: - # Write the binary content to the file - file.write(response.content) - print(f"File {file_name_or_path} written successfully.") - except Exception as e: - print(f"Failed to write the file. Error: {e}") - else: - print("Request failed with status code:", response.status_code) - if thread_uid: - self.generation_threads.pop(thread_uid, None) - if use_threading: - thread_uid = str(uuid.uuid4()) - thread = threading.Thread(target=tts2_audio_th, args=(thread_uid,)) - self.generation_threads[thread_uid]=thread - self.thread = thread - thread.start() - ASCIIColors.green("Generation started") - return thread - else: - return tts2_audio_th() - - def stop(self): - url = f"{self.xtts_base_url}/stop_streaming" - - # Define the request body - payload = { - } - headers = { - 'accept': 'application/json', - 'Content-Type': 'application/json' - } - - # Send the POST request - response = requests.post(url, headers=headers, data=json.dumps(payload)) - - if response.status_code == 200: - print("Request successful") - + raise Exception("LollmsXTTS service is not installed properly.") + def get_speaker_wav(self, speaker) -> Path: + """ + Searches for the speaker file in the specified folders. + :param speaker: The name of the speaker file (without extension). + :return: The path to the speaker file if found. + :raises FileNotFoundError: If the speaker file is not found in any of the folders. + """ + for folder in self.voices_folders: + potential_speaker_wav = Path(folder) / f"{speaker}.wav" + if potential_speaker_wav.exists(): + return potential_speaker_wav + + raise FileNotFoundError(f"Speaker file '{speaker}.wav' not found in any of the specified folders.") + def tts_file(self, text, file_name_or_path, speaker=None, language="en") -> str: + speaker_wav = None + + if speaker: + speaker_wav = self.get_speaker_wav(speaker) + else: + speaker_wav = self.get_speaker_wav("main_voice") + + self.tts.tts_to_file(text=text, file_path=file_name_or_path, speaker_wav=speaker_wav, language=language) + return file_name_or_path + + def tts_audio(self, text, speaker=None, file_name_or_path: Path | str | None = None, language="en", use_threading=False): + # Split text into sentences + sentences = re.split(r'(?<=[.!?]) +', text) + + if speaker: + speaker_wav = self.get_speaker_wav(speaker) + else: + speaker_wav = self.get_speaker_wav("main_voice") + + if use_threading: + self.stop_event.clear() + generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path)) + generator_thread.start() + self.thread = threading.Thread(target=self._play_audio) + self.thread.start() + else: + self.stop_event.clear() + generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path)) + generator_thread.start() + self._play_audio() + + def _generate_audio(self, sentences, speaker_wav, language, file_name_or_path): + wav_data = [] + for sentence in sentences: + if self.stop_event.is_set(): + break + wav = self.tts.tts(text=sentence, speaker_wav=speaker_wav, language=language) + wav_array = np.array(wav, dtype=np.float32) + wav_array = np.int16(wav_array * 32767) + self.wav_queue.put(wav_array) + wav_data.append(wav_array) + self.wav_queue.put(None) # Signal that generation is done + + if file_name_or_path: + self._save_wav(wav_data, file_name_or_path) + + def _play_audio(self): + buffered_sentences = 0 + buffer = [] + while not self.stop_event.is_set(): + wav = self.wav_queue.get() + if wav is None: + # Play any remaining buffered sentences + for buffered_wav in buffer: + self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050) + self.play_obj.wait_done() + time.sleep(0.5) # Pause between sentences + ASCIIColors.green("Audio done") + break + buffer.append(wav) + buffered_sentences += 1 + if buffered_sentences >= 2: + for buffered_wav in buffer: + self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050) + self.play_obj.wait_done() + time.sleep(0.5) # Pause between sentences + buffer = [] + buffered_sentences = 0 + + def _save_wav(self, wav_data, file_name_or_path): + with wave.open(str(file_name_or_path), 'wb') as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(22050) + for wav in wav_data: + wf.writeframes(wav.tobytes()) + + def stop(self): + self.stop_event.set() + if self.thread and self.thread.is_alive(): + self.thread.join() + if self.play_obj: + self.play_obj.stop() + def get_voices(self): + # List voices from the folder ASCIIColors.yellow("Listing voices") - voices=["main_voice"] - voices_dir:Path=self.app.lollms_paths.custom_voices_path - voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"] + voices = [] + for voices_folder in self.voices_folders: + voices += [v.stem for v in voices_folder.iterdir() if v.suffix == ".wav"] return voices + + +if __name__ == "__main__": + # Here do some example + app = LollmsApplication() + lollms_xtts_service = LollmsXTTS.get(app) + lollms_xtts_service.tts_file("Hello, this is a test.", "output.wav", speaker="ParisNeo_Original_voice", language="en") diff --git a/lollms/tts.py b/lollms/tts.py index 6daff60..372de6b 100644 --- a/lollms/tts.py +++ b/lollms/tts.py @@ -143,7 +143,17 @@ class LollmsTTS: list: A list of available voices. """ return self.voices - + + def get_models(self): + """ + Retrieves the available models for TTS. + + Returns: + list: A list of available models. + """ + return self.models + + def get_devices(self): devices = sd.query_devices() @@ -152,7 +162,6 @@ class LollmsTTS: "device_names": [device['name'] for device in devices if device["max_output_channels"]>0], "device_indexes": [device['index'] for device in devices if device["max_output_channels"]>0] } - @staticmethod def clean_text(text): # Remove HTML tags diff --git a/personal_data/configs/lollms_discord_local_config.yaml b/personal_data/configs/lollms_discord_local_config.yaml index 53bec80..b9e36f5 100644 --- a/personal_data/configs/lollms_discord_local_config.yaml +++ b/personal_data/configs/lollms_discord_local_config.yaml @@ -78,8 +78,6 @@ auto_show_browser: true copy_to_clipboard_add_all_details: false # Voice service -xtts_enable: false -xtts_base_url: http://localhost:8020 auto_read: false xtts_current_voice: null xtts_current_language: en