upgraded xtts

2024-12-19 20:57:58 +00:00 · 2024-07-15 00:38:56 +02:00 · 2024-07-15 00:38:56 +02:00 · 3a00e968fb
commit 3a00e968fb
parent aa21fdfb49
15 changed files with 248 additions and 424 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -142,10 +142,6 @@ whisper_model: base
 tts_output_device: 0
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/lollms/app.py
+++ b/lollms/app.py
@ -341,14 +341,10 @@ class LollmsApplication(LoLLMsCom):
                self.xtts = LollmsXTTS(
                                        self,
-                                        voices_folder=voices_folder,
+                                        voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
                                        voice_samples_path=self.lollms_paths.custom_voices_path, 
                                        xtts_base_url=self.config.xtts_base_url,
                                        wait_for_service=False,
                                        use_deep_speed=self.config.xtts_use_deepspeed,
                                        use_streaming_mode=self.config.xtts_use_streaming_mode
                                    )
-            except:
+            except Exception as ex:
                trace_exception(ex)
                self.warning(f"Couldn't load XTTS")
        ASCIIColors.blue("Loading local TTI services")
@ -458,14 +454,10 @@ class LollmsApplication(LoLLMsCom):
                    self.xtts = LollmsXTTS(
                                            self,
-                                            voices_folder=voices_folder,
+                                            voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
                                            voice_samples_path=self.lollms_paths.custom_voices_path, 
                                            xtts_base_url=self.config.xtts_base_url,
                                            wait_for_service=False,
                                            use_deep_speed=self.config.xtts_use_deepspeed,
                                            use_streaming_mode=self.config.xtts_use_streaming_mode
                                        )
-                except:
+                except Exception as ex:
                    trace_exception(ex)
                    self.warning(f"Couldn't load XTTS")
            ASCIIColors.blue("Loading local TTI services")
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -142,10 +142,6 @@ whisper_model: base
 tts_output_device: 0
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/lollms/paths.py
+++ b/lollms/paths.py
@ -72,6 +72,14 @@ class LollmsPaths:
        self.personal_outputs_path          = self.personal_path / "outputs"
        self.personal_user_infos_path       = self.personal_path / "user_infos"
        self.personal_services_path         = self.personal_path / "services"
        self.personal_stt_services_path     = self.personal_services_path / "stt"
        self.personal_tts_services_path     = self.personal_services_path / "tts"
        self.personal_tti_services_path     = self.personal_services_path / "tti"
        self.personal_ttm_services_path     = self.personal_services_path / "ttm"
        self.personal_trainers_path         = self.personal_path / "trainers"
        self.gptqlora_path                  = self.personal_trainers_path / "gptqlora"
@ -117,6 +125,25 @@ class LollmsPaths:
        ASCIIColors.yellow(f"{self.personal_models_path}")
        ASCIIColors.red("personal_user_infos_path:",end="")
        ASCIIColors.yellow(f"{self.personal_user_infos_path}")
        ASCIIColors.red("personal_services_path:",end="")
        ASCIIColors.yellow(f"{self.personal_services_path}")
        ASCIIColors.red("personal_stt_services_path:", end="")
        ASCIIColors.yellow(f"{self.personal_stt_services_path}")
        ASCIIColors.red("personal_tts_services_path:", end="")
        ASCIIColors.yellow(f"{self.personal_tts_services_path}")
        ASCIIColors.red("personal_tti_services_path:", end="")
        ASCIIColors.yellow(f"{self.personal_tti_services_path}")
        ASCIIColors.red("personal_ttm_services_path:", end="")
        ASCIIColors.yellow(f"{self.personal_ttm_services_path}")
        ASCIIColors.red("personal_trainers_path:",end="")
        ASCIIColors.yellow(f"{self.personal_trainers_path}")
        ASCIIColors.red("personal_trainers_path:",end="")
@ -162,6 +189,12 @@ class LollmsPaths:
            "Personal user infos path": self.personal_user_infos_path,
            "Personal trainers path": self.personal_trainers_path,
            "Personal gptqlora trainer path": self.gptqlora_path,
            "Personal services path": self.personal_services_path,
            "Personal STT services path": self.personal_stt_services_path,
            "Personal TTS services path": self.personal_tts_services_path,
            "Personal TTI services path": self.personal_tti_services_path,
            "Personal TTM services path": self.personal_ttm_services_path,            
        }
        return "\n".join([f"{category}: {path}" for category, path in directories.items()])
@ -180,6 +213,12 @@ class LollmsPaths:
        self.personal_outputs_path.mkdir(parents=True, exist_ok=True)
        self.personal_uploads_path.mkdir(parents=True, exist_ok=True)
        self.personal_user_infos_path.mkdir(parents=True, exist_ok=True)
        self.personal_services_path.mkdir(parents=True, exist_ok=True)
        self.personal_stt_services_path.mkdir(parents=True, exist_ok=True)
        self.personal_tts_services_path.mkdir(parents=True, exist_ok=True)
        self.personal_tti_services_path.mkdir(parents=True, exist_ok=True)
        self.personal_ttm_services_path.mkdir(parents=True, exist_ok=True)        
        self.personal_trainers_path.mkdir(parents=True, exist_ok=True)
        self.custom_personalities_path.mkdir(parents=True, exist_ok=True)
        self.custom_voices_path.mkdir(parents=True, exist_ok=True)
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -139,10 +139,6 @@ whisper_model: base
 tts_output_device: 0
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/lollms/server/endpoints/lollms_comfyui.py
+++ b/lollms/server/endpoints/lollms_comfyui.py
@ -9,7 +9,7 @@ description:
 """
 from fastapi import APIRouter, Request
 from pydantic import BaseModel, Field
-from lollms_webui import LOLLMSWebUI
+from lollms.server.elf_server import LOLLMSElfServer
 from pydantic import BaseModel
 from lollms.security import check_access
 from starlette.responses import StreamingResponse
@ -24,7 +24,7 @@ import platform
 # ----------------------- Defining router and main class ------------------------------
 router = APIRouter()
-lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
+lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
 # ----------------------- voice ------------------------------
--- a/lollms/server/endpoints/lollms_diffusers.py
+++ b/lollms/server/endpoints/lollms_diffusers.py
@ -7,7 +7,7 @@ description:
 """
 from fastapi import APIRouter, Request
-from lollms_webui import LOLLMSWebUI
+from lollms.server.elf_server import LOLLMSElfServer
 from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
@ -22,7 +22,7 @@ import platform
 # ----------------------- Defining router and main class ------------------------------
 router = APIRouter()
-lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
+lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
 class Identification(BaseModel):
    client_id: str
--- a/lollms/server/endpoints/lollms_discussion.py
+++ b/lollms/server/endpoints/lollms_discussion.py
@ -8,7 +8,7 @@ description:
 """
 from fastapi import APIRouter, Request
-from lollms_webui import LOLLMSWebUI
+from lollms.server.elf_server import LOLLMSElfServer
 from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
@ -43,7 +43,7 @@ class DeleteDiscussionParameters(BaseModel):
 # ----------------------- Defining router and main class ------------------------------
 router = APIRouter()
-lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
+lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
@router.get("/list_discussions")
--- a/lollms/server/endpoints/lollms_tts.py
+++ b/lollms/server/endpoints/lollms_tts.py
@ -56,6 +56,16 @@ def list_stt_models():
    ASCIIColors.yellow("Listing voices")
    return {"voices":lollmsElfServer.stt.get_models()}
@router.get("/list_tts_models")
 def list_tts_models():
    if lollmsElfServer.config.headless_server_mode:
        return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
    if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
        return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
    ASCIIColors.yellow("Listing voices")
    return {"voices":lollmsElfServer.tts.get_models()}
@router.post("/set_voice")
 async def set_voice(request: Request):
@ -134,7 +144,11 @@ async def text2Audio(request: LollmsText2AudioRequest):
        if lollmsElfServer.tts is None:
            return {"url": None, "error":f"No TTS service is on"}
        if lollmsElfServer.tts.ready:
-            response = lollmsElfServer.tts.tts_audio(request.text, request.voice, file_name_or_path=request.fn, use_threading=True)
+            if request.voice:
                voice = request.voice
            else:
                voice = lollmsElfServer.config.xtts_current_voice
            response = lollmsElfServer.tts.tts_audio(request.text, voice, file_name_or_path=request.fn, use_threading=True)
            return response
        else:
            return {"url": None, "error":f"TTS service is not ready yet"}
@ -143,8 +157,8 @@ async def text2Audio(request: LollmsText2AudioRequest):
        lollmsElfServer.error(ex)
        return {"status":False,"error":str(ex)}
-@router.post("/text2wav")
+@router.post("/text2Wave")
-async def text2Wav(request: LollmsText2AudioRequest):
+async def text2Wave(request: LollmsText2AudioRequest):
    """
    Executes Python code and returns the output.
@ -168,9 +182,14 @@ async def text2Wav(request: LollmsText2AudioRequest):
    request.fn.parent.mkdir(exist_ok=True, parents=True)
    try:
        if request.voice:
            voice = request.voice
        else:
            voice = lollmsElfServer.config.xtts_current_voice
        # Get the JSON data from the POST request.
        if lollmsElfServer.tts.ready:
-            response = lollmsElfServer.tts.tts_file(request.text, request.voice, file_name_or_path=request.fn)
+            response = lollmsElfServer.tts.tts_file(request.text, request.fn, voice)
            return response
        else:
            return {"url": None, "error":f"TTS service is not ready yet"}
@ -215,11 +234,7 @@ def start_xtts():
            lollmsElfServer.tts = LollmsXTTS(
                lollmsElfServer, 
-                voices_folder=voices_folder,
+                voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path],                                        
                voice_samples_path=Path(__file__).parent/"voices", 
                xtts_base_url= lollmsElfServer.config.xtts_base_url,
                use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
                use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode                                                    
            )
        lollmsElfServer.HideBlockingMessage()
    except Exception as ex:
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -1,404 +1,193 @@
-# Title LollmsXTTS
+"""
-# Licence: MIT
+project: lollms_tts
-# Author : Paris Neo
+file: lollms_tts.py 
-# Adapted from the work of daswer123's xtts-api-server
+author: ParisNeo
-# check it out : https://github.com/daswer123/xtts-api-server
+description: 
-# Here is a copy of the LICENCE https://github.com/daswer123/xtts-api-server/blob/main/LICENSE
+    This file hosts the LollmsXTTS service which provides text-to-speech functionalities using the TTS library.
-# All rights are reserved
+"""
 from pathlib import Path
 import sys
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
 from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
 from lollms.utilities import PackageManager, find_first_available_file_index, add_period
 import time
 import io
 import sys
 import requests
 import os
 import base64
 import subprocess
 import time
 import json
 import re
 import platform
 import threading
 from dataclasses import dataclass
 from PIL import Image, PngImagePlugin
 from enum import Enum
 from typing import List, Dict, Any
 import uuid
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
 from lollms.tts import LollmsTTS
-import subprocess
+from lollms.utilities import run_pip_in_env
-import platform
+from typing import List
 import threading
 import numpy as np
 # Ensure required packages are installed
 if not PackageManager.check_package_installed("TTS"):
    PackageManager.install_or_update("TTS")
 if not PackageManager.check_package_installed("simpleaudio"):
    PackageManager.install_or_update("simpleaudio")
 if not PackageManager.check_package_installed("wave"):
    PackageManager.install_or_update("wave")
 import wave
 from TTS.api import TTS
 import simpleaudio as sa
 import time
 from queue import Queue
 import re
 class LollmsXTTS(LollmsTTS):
-    def __init__(
+    def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]):
-                    self, 
+        super().__init__("lollms_xtts", app)
                    app:LollmsApplication, 
                    xtts_base_url=None,
                    share=False,
                    max_retries=20,
                    voices_folder=None,
                    voice_samples_path="",
                    wait_for_service=True,
                    use_deep_speed=False,
                    use_streaming_mode = True
                ):
        super().__init__("xtts",app)
        self.generation_threads = {}
-        self.voices_folder = voices_folder
+        self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"]
-        self.ready = False
+        self.stop_event = threading.Event()
        if xtts_base_url=="" or xtts_base_url=="http://127.0.0.1:8020":
            xtts_base_url = None
        # Get the current directory
        lollms_paths = app.lollms_paths
        root_dir = lollms_paths.personal_path
        self.voice_samples_path = voice_samples_path
        self.use_deep_speed = use_deep_speed
        self.use_streaming_mode = use_streaming_mode
        # Store the path to the script
        if xtts_base_url is None:
            self.xtts_base_url = "http://127.0.0.1:8020"
            if not LollmsXTTS.verify(lollms_paths):
                LollmsXTTS.install(app)
        else:
            self.xtts_base_url = xtts_base_url
        self.auto_xtts_url = self.xtts_base_url+"/sdapi/v1"
        shared_folder = root_dir/"shared"
        self.xtts_path = shared_folder / "xtts"
        # Show a cool LOGO using ASCIIColors
        ASCIIColors.red("   __    ___  __    __          __     __  ___   _        ")
        ASCIIColors.red("  / /   /___\/ /   / /   /\/\  / _\    \ \/ / |_| |_ ___  ")
        ASCIIColors.red(" / /   //  // /   / /   /    \ \ \ _____\  /| __| __/ __| ")
        ASCIIColors.red("/ /___/ \_// /___/ /___/ /\/\ \_\ \_____/  \| |_| |_\__ \ ")
        ASCIIColors.red("\____/\___/\____/\____/\/    \/\__/    /_/\_\\__|\__|___/ ")
        ASCIIColors.red(" Forked from daswer123's XTTS server")
        ASCIIColors.red(" Integration in lollms by ParisNeo using daswer123's webapi")
        ASCIIColors.red(" Address :",end="")
        ASCIIColors.yellow(f"{self.xtts_base_url}")
-        self.output_folder = app.lollms_paths.personal_outputs_path/"audio_out"
+        # Load the TTS model
-        self.output_folder.mkdir(parents=True, exist_ok=True)
+        self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
        self.tts.to("cuda")
        self.wav_queue = Queue()
        self.play_obj = None
        self.thread = None        
        self.ready = True
-        if not self.wait_for_service(1,False):
+    def install(lollms_app: LollmsApplication):
-            ASCIIColors.info("Loading lollms_xtts")
+        ASCIIColors.green("LollmsXTTS installation started")
-            # Launch the Flask service using the appropriate script for the platform
+        # Here you can perform installation of needed things, or create configuration files or download needed assets etc.
-            self.process = self.run_xtts_api_server()
+        run_pip_in_env("TTS")
-
+        run_pip_in_env("simpleaudio")
        # Wait until the service is available at http://127.0.0.1:7860/
        if wait_for_service:
            self.wait_for_service()
        else:
            self.wait_for_service_in_another_thread(max_retries=max_retries)
    def install(lollms_app:LollmsApplication):
        ASCIIColors.green("XTTS installation started")
        repo_url = "https://github.com/ParisNeo/xtts-api-server"
        root_dir = lollms_app.lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        xtts_path = shared_folder / "xtts"
        # Step 1: Clone or update the repository
        if os.path.exists(xtts_path):
            print("Repository already exists. Pulling latest changes...")
            try:
                subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
            except:
                subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
        else:
            print("Cloning repository...")
            subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
        # Step 2: Create or update the Conda environment
        if environment_exists("xtts"):
            print("Conda environment 'xtts' already exists. Updating...")
            # Here you might want to update the environment, e.g., update Python or dependencies
            # This step is highly dependent on how you manage your Conda environments and might involve
            # running `conda update` commands or similar.
        else:
            print("Creating Conda environment 'xtts'...")
            create_conda_env("xtts", "3.10")
        # Step 3: Install or update dependencies using your custom function
        requirements_path = os.path.join(xtts_path, "requirements.txt")
        run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
        run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
        run_pip_in_env("xtts", f"install -e {xtts_path}", cwd=xtts_path)
        # Step 4: Launch the server
        # Assuming the server can be started with a Python script in the cloned repository
        print("Launching XTTS API server...")
        run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
        print("XTTS API server setup and launch completed.")
        ASCIIColors.cyan("Done")
        ASCIIColors.cyan("Installing xtts-api-server")
        ASCIIColors.green("XTTS server installed successfully")
    @staticmethod
-    def verify(lollms_paths:LollmsPaths)->bool:
+    def verify(lollms_paths: LollmsPaths) -> bool:
-        # Clone repository
+        # Verify that the service is installed either by verifying the libraries are installed or that some files or folders exist
        root_dir = lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        xtts_path = shared_folder / "xtts"
        return xtts_path.exists()
    @staticmethod
    def get(app: LollmsApplication) -> 'LollmsXTTS':
        root_dir = app.lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        xtts_path = shared_folder / "xtts"
        xtts_script_path = xtts_path / "lollms_xtts.py"
        git_pull(xtts_path)
        if xtts_script_path.exists():
            ASCIIColors.success("lollms_xtts found.")
            ASCIIColors.success("Loading source file...",end="")
            # use importlib to load the module from the file path
            from lollms.services.xtts.lollms_xtts import LollmsXTTS
            ASCIIColors.success("ok")
            return LollmsXTTS
    def run_xtts_api_server(self):
        root_dir = self.app.lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        xtts_path = shared_folder / "xtts"
        # Get the path to the current Python interpreter
        ASCIIColors.yellow("Loading XTTS ")
        options= ""
        if self.use_deep_speed:
            options += " --deepspeed"
        if self.use_streaming_mode:
            options += " --streaming-mode --streaming-mode-improve --stream-play-sync"
        process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", cwd=xtts_path, wait= False)
        return process
    def wait_for_service_in_another_thread(self, max_retries=150, show_warning=True):
        thread = threading.Thread(target=self.wait_for_service, args=(max_retries, show_warning))
        thread.start()
        return thread
    def update_settings(self):
        try:
-            settings = {
+            import TTS
-                "stream_chunk_size": int(self.app.config.xtts_stream_chunk_size),
+            import simpleaudio
                "temperature": float(self.app.config.xtts_temperature),
                "speed": float(self.app.config.xtts_speed),
                "length_penalty": float(self.app.config.xtts_length_penalty),
                "repetition_penalty": float(self.app.config.xtts_repetition_penalty),
                "top_p": float(self.app.config.xtts_top_p),
                "top_k": int(self.app.config.xtts_top_k),
                "enable_text_splitting": bool(self.app.config.xtts_enable_text_splitting)
            } 
            print("set_tts_settings")       
            print(f"{settings}")       
            response = requests.post(f"{self.xtts_base_url}/set_tts_settings", settings,headers={
                'accept': 'application/json',
                'Content-Type': 'application/json'
            })
            if response.status_code == 200:
                ASCIIColors.success("XTTS updated successfully")
        except Exception as ex:
            trace_exception(ex)
            pass
    def wait_for_service(self, max_retries = 150, show_warning=True):
        print(f"Waiting for xtts service (max_retries={max_retries})")
        url = f"{self.xtts_base_url}/languages"
        # Adjust this value as needed
        retries = 0
        while retries < max_retries or max_retries<0:
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    self.update_settings()
                    print(f"voices_folder is {self.voices_folder}.")
                    self.ready = True
                    if self.voices_folder is not None:
                        print("Generating sample audio.")
                        voice_file =  [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
                        try:
                            self.tts_audio("x t t s is ready",voice_file[0].stem)
                        except Exception as ex:
                            return True
                    print("Service is available.")
                    if self.app is not None:
                        self.app.success("XTTS Service is now available.")
                    return True
            except:
                pass
            retries += 1
            ASCIIColors.yellow("Waiting for xtts...")
            time.sleep(5)
        if show_warning:
            print("Service did not become available within the given time.")
            if self.app is not None:
                self.app.error("XTTS Service did not become available within the given time.")
        return False
    def set_speaker_folder(self, speaker_folder):
        url = f"{self.xtts_base_url}/set_speaker_folder"
        # Define the request body
        payload = {
            "speaker_folder": str(speaker_folder)
        }
        # Send the POST request
        response = requests.post(url, json=payload)
        # Check the response status code
        if response.status_code == 200:
            print("Request successful")
            return True
-            # You can access the response data using response.json()
+        except ImportError:
        else:
            print("Request failed with status code:", response.status_code)
            return False
-    def tts_file(self, text, file_name_or_path, speaker=None, language="en")->str:
+    @staticmethod
-        text = self.clean_text(text)
+    def get(app: LollmsApplication) -> 'LollmsXTTS':
-        url = f"{self.xtts_base_url}/tts_to_file"
+        # Verify if the service is installed and if true then return an instance of LollmsXTTS
-
+        if LollmsXTTS.verify(app.lollms_paths):
-        # Define the request body
+            return LollmsXTTS(app, app.lollms_paths.custom_voices_path)
        payload = {
            "text": text,
            "speaker_wav": speaker,
            "language": language,
            "file_name_or_path": file_name_or_path
        }
        headers = {
            'accept': 'application/json',
            'Content-Type': 'application/json'
        }
        # Send the POST request
        response =  requests.post(url, headers=headers, data=json.dumps(payload))
        # Check the response status code
        if response.status_code == 200:
            print("Request successful")
            # You can access the response data using response.json()
        else:
-            print("Request failed with status code:", response.status_code)
+            raise Exception("LollmsXTTS service is not installed properly.")
-
+    def get_speaker_wav(self, speaker) -> Path:
-        return file_name_or_path
+        """
-
+        Searches for the speaker file in the specified folders.
    def tts_audio(self, text, speaker=None, file_name_or_path:Path|str=None, language="en", use_threading=False):
        voice=self.app.config.xtts_current_voice if speaker is None else speaker
        index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
        output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
        if voice is None:
            voice = "main_voice"
        self.app.info("Starting to build voice")
        try:
            from lollms.services.xtts.lollms_xtts import LollmsXTTS
            # If the personality has a voice, then use it
            personality_audio:Path = self.app.personality.personality_package_path/"audio"
            if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
                voices_folder = personality_audio
            elif voice!="main_voice":
                voices_folder = self.app.lollms_paths.custom_voices_path
            else:
                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
            language = self.app.config.xtts_current_language# convert_language_name()
            self.set_speaker_folder(voices_folder)
            preprocessed_text= add_period(text)
            voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
            if len(voice_file)==0:
                return {"status":False,"error":"Voice not found"}
            self.xtts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language, use_threading=use_threading)
        except Exception as ex:
            trace_exception(ex)
            return {"status":False,"error":f"{ex}"}
    def xtts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
        text = self.clean_text(text)
        def tts2_audio_th(thread_uid=None):
            url = f"{self.xtts_base_url}/tts_to_audio"
            # Define the request body
            payload = {
                "text": text,
                "speaker_wav": speaker,
                "language": language
            }
            headers = {
                'accept': 'application/json',
                'Content-Type': 'application/json'
            }
            # Send the POST request
            response =  requests.post(url, headers=headers, data=json.dumps(payload))
            if response.status_code == 200:
                print("Request successful")
                print("Response headers:", response.headers)
                # Basic logging for debugging
                print("First 100 bytes of response content:", response.content[:100])
                if file_name_or_path is not None:
                    try:
                        with open(self.output_folder / file_name_or_path, 'wb') as file:
                            # Write the binary content to the file
                            file.write(response.content)
                        print(f"File {file_name_or_path} written successfully.")
                    except Exception as e:
                        print(f"Failed to write the file. Error: {e}")
            else:
                print("Request failed with status code:", response.status_code)
            if thread_uid:
                self.generation_threads.pop(thread_uid, None)
        if use_threading:
            thread_uid =  str(uuid.uuid4())       
            thread = threading.Thread(target=tts2_audio_th, args=(thread_uid,))
            self.generation_threads[thread_uid]=thread
            self.thread = thread
            thread.start()
            ASCIIColors.green("Generation started")
            return thread
        else:
            return tts2_audio_th()
    def stop(self):
        url = f"{self.xtts_base_url}/stop_streaming"
        # Define the request body
        payload = {
        }
        headers = {
            'accept': 'application/json',
            'Content-Type': 'application/json'
        }
        # Send the POST request
        response =  requests.post(url, headers=headers, data=json.dumps(payload))
        if response.status_code == 200:
            print("Request successful")
        :param speaker: The name of the speaker file (without extension).
        :return: The path to the speaker file if found.
        :raises FileNotFoundError: If the speaker file is not found in any of the folders.
        """
        for folder in self.voices_folders:
            potential_speaker_wav = Path(folder) / f"{speaker}.wav"
            if potential_speaker_wav.exists():
                return potential_speaker_wav
        raise FileNotFoundError(f"Speaker file '{speaker}.wav' not found in any of the specified folders.")
    def tts_file(self, text, file_name_or_path, speaker=None, language="en") -> str:
        speaker_wav = None
        if speaker:
            speaker_wav = self.get_speaker_wav(speaker)
        else:
            speaker_wav = self.get_speaker_wav("main_voice")
        self.tts.tts_to_file(text=text, file_path=file_name_or_path, speaker_wav=speaker_wav, language=language)
        return file_name_or_path
    def tts_audio(self, text, speaker=None, file_name_or_path: Path | str | None = None, language="en", use_threading=False):
        # Split text into sentences
        sentences = re.split(r'(?<=[.!?]) +', text)
        if speaker:
            speaker_wav = self.get_speaker_wav(speaker)
        else:
            speaker_wav = self.get_speaker_wav("main_voice")
        if use_threading:
            self.stop_event.clear()
            generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
            generator_thread.start()
            self.thread = threading.Thread(target=self._play_audio)
            self.thread.start()
        else:
            self.stop_event.clear()
            generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
            generator_thread.start()
            self._play_audio()
    def _generate_audio(self, sentences, speaker_wav, language, file_name_or_path):
        wav_data = []
        for sentence in sentences:
            if self.stop_event.is_set():
                break
            wav = self.tts.tts(text=sentence, speaker_wav=speaker_wav, language=language)
            wav_array = np.array(wav, dtype=np.float32)
            wav_array = np.int16(wav_array * 32767)
            self.wav_queue.put(wav_array)
            wav_data.append(wav_array)
        self.wav_queue.put(None)  # Signal that generation is done
        if file_name_or_path:
            self._save_wav(wav_data, file_name_or_path)
    def _play_audio(self):
        buffered_sentences = 0
        buffer = []
        while not self.stop_event.is_set():
            wav = self.wav_queue.get()
            if wav is None:
                # Play any remaining buffered sentences
                for buffered_wav in buffer:
                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
                    self.play_obj.wait_done()
                    time.sleep(0.5)  # Pause between sentences
                ASCIIColors.green("Audio done")
                break
            buffer.append(wav)
            buffered_sentences += 1
            if buffered_sentences >= 2:
                for buffered_wav in buffer:
                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
                    self.play_obj.wait_done()
                    time.sleep(0.5)  # Pause between sentences
                buffer = []
                buffered_sentences = 0
    def _save_wav(self, wav_data, file_name_or_path):
        with wave.open(str(file_name_or_path), 'wb') as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
            wf.setframerate(22050)
            for wav in wav_data:
                wf.writeframes(wav.tobytes())
    def stop(self):
        self.stop_event.set()
        if self.thread and self.thread.is_alive():
            self.thread.join()
        if self.play_obj:
            self.play_obj.stop()
    def get_voices(self):
        # List voices from the folder
        ASCIIColors.yellow("Listing voices")
-        voices=["main_voice"]
+        voices = []
-        voices_dir:Path=self.app.lollms_paths.custom_voices_path
+        for voices_folder in self.voices_folders:
-        voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
+            voices += [v.stem for v in voices_folder.iterdir() if v.suffix == ".wav"]
        return voices
 if __name__ == "__main__":
    # Here do some example
    app = LollmsApplication()
    lollms_xtts_service = LollmsXTTS.get(app)
    lollms_xtts_service.tts_file("Hello, this is a test.", "output.wav", speaker="ParisNeo_Original_voice", language="en")
--- a/lollms/tts.py
+++ b/lollms/tts.py
@ -143,7 +143,17 @@ class LollmsTTS:
            list: A list of available voices.
        """
        return self.voices
-    
+
    def get_models(self):
        """
        Retrieves the available models for TTS.
        Returns:
            list: A list of available models.
        """
        return self.models
    def get_devices(self):
        devices =  sd.query_devices()
@ -152,7 +162,6 @@ class LollmsTTS:
            "device_names": [device['name'] for device in devices if device["max_output_channels"]>0],
            "device_indexes": [device['index'] for device in devices if device["max_output_channels"]>0]
        }
    @staticmethod
    def clean_text(text):
        # Remove HTML tags
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en