upgraded xtts

2025-04-16 06:56:33 +00:00 · 2024-07-15 00:38:56 +02:00 · 2024-07-15 00:38:56 +02:00 · 3a00e968fb
commit 3a00e968fb
parent aa21fdfb49
15 changed files with 248 additions and 424 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -142,10 +142,6 @@ whisper_model: base
 tts_output_device: 0

 # Voice service
-xtts_enable: false
-xtts_base_url: http://localhost:8020
-xtts_use_deepspeed: false
-xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false

 # Voice service
-xtts_enable: false
-xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false

 # Voice service
-xtts_enable: false
-xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false

 # Voice service
-xtts_enable: false
-xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/lollms/app.py
+++ b/lollms/app.py
@ -341,14 +341,10 @@ class LollmsApplication(LoLLMsCom):

                self.xtts = LollmsXTTS(
                                        self,
-                                        voices_folder=voices_folder,
-                                        voice_samples_path=self.lollms_paths.custom_voices_path, 
-                                        xtts_base_url=self.config.xtts_base_url,
-                                        wait_for_service=False,
-                                        use_deep_speed=self.config.xtts_use_deepspeed,
-                                        use_streaming_mode=self.config.xtts_use_streaming_mode
+                                        voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
                                    )
-            except:
+            except Exception as ex:
+                trace_exception(ex)
                self.warning(f"Couldn't load XTTS")

        ASCIIColors.blue("Loading local TTI services")
@ -458,14 +454,10 @@ class LollmsApplication(LoLLMsCom):

                    self.xtts = LollmsXTTS(
                                            self,
-                                            voices_folder=voices_folder,
-                                            voice_samples_path=self.lollms_paths.custom_voices_path, 
-                                            xtts_base_url=self.config.xtts_base_url,
-                                            wait_for_service=False,
-                                            use_deep_speed=self.config.xtts_use_deepspeed,
-                                            use_streaming_mode=self.config.xtts_use_streaming_mode
+                                            voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
                                        )
-                except:
+                except Exception as ex:
+                    trace_exception(ex)
                    self.warning(f"Couldn't load XTTS")

            ASCIIColors.blue("Loading local TTI services")
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -142,10 +142,6 @@ whisper_model: base
 tts_output_device: 0

 # Voice service
-xtts_enable: false
-xtts_base_url: http://localhost:8020
-xtts_use_deepspeed: false
-xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/lollms/paths.py
+++ b/lollms/paths.py
@ -72,6 +72,14 @@ class LollmsPaths:
        self.personal_outputs_path          = self.personal_path / "outputs"
        self.personal_user_infos_path       = self.personal_path / "user_infos"

+
+        self.personal_services_path         = self.personal_path / "services"
+        self.personal_stt_services_path     = self.personal_services_path / "stt"
+        self.personal_tts_services_path     = self.personal_services_path / "tts"
+        self.personal_tti_services_path     = self.personal_services_path / "tti"
+        self.personal_ttm_services_path     = self.personal_services_path / "ttm"
+
+
        self.personal_trainers_path         = self.personal_path / "trainers"
        self.gptqlora_path                  = self.personal_trainers_path / "gptqlora"

@ -117,6 +125,25 @@ class LollmsPaths:
        ASCIIColors.yellow(f"{self.personal_models_path}")
        ASCIIColors.red("personal_user_infos_path:",end="")
        ASCIIColors.yellow(f"{self.personal_user_infos_path}")
+
+        ASCIIColors.red("personal_services_path:",end="")
+        ASCIIColors.yellow(f"{self.personal_services_path}")
+
+
+        ASCIIColors.red("personal_stt_services_path:", end="")
+        ASCIIColors.yellow(f"{self.personal_stt_services_path}")
+
+        ASCIIColors.red("personal_tts_services_path:", end="")
+        ASCIIColors.yellow(f"{self.personal_tts_services_path}")
+
+        ASCIIColors.red("personal_tti_services_path:", end="")
+        ASCIIColors.yellow(f"{self.personal_tti_services_path}")
+
+        ASCIIColors.red("personal_ttm_services_path:", end="")
+        ASCIIColors.yellow(f"{self.personal_ttm_services_path}")
+
+
+
        ASCIIColors.red("personal_trainers_path:",end="")
        ASCIIColors.yellow(f"{self.personal_trainers_path}")
        ASCIIColors.red("personal_trainers_path:",end="")
@ -162,6 +189,12 @@ class LollmsPaths:
            "Personal user infos path": self.personal_user_infos_path,
            "Personal trainers path": self.personal_trainers_path,
            "Personal gptqlora trainer path": self.gptqlora_path,
+
+            "Personal services path": self.personal_services_path,
+            "Personal STT services path": self.personal_stt_services_path,
+            "Personal TTS services path": self.personal_tts_services_path,
+            "Personal TTI services path": self.personal_tti_services_path,
+            "Personal TTM services path": self.personal_ttm_services_path,            
        }
        return "\n".join([f"{category}: {path}" for category, path in directories.items()])

@ -180,6 +213,12 @@ class LollmsPaths:
        self.personal_outputs_path.mkdir(parents=True, exist_ok=True)
        self.personal_uploads_path.mkdir(parents=True, exist_ok=True)
        self.personal_user_infos_path.mkdir(parents=True, exist_ok=True)
+
+        self.personal_services_path.mkdir(parents=True, exist_ok=True)
+        self.personal_stt_services_path.mkdir(parents=True, exist_ok=True)
+        self.personal_tts_services_path.mkdir(parents=True, exist_ok=True)
+        self.personal_tti_services_path.mkdir(parents=True, exist_ok=True)
+        self.personal_ttm_services_path.mkdir(parents=True, exist_ok=True)        
        self.personal_trainers_path.mkdir(parents=True, exist_ok=True)
        self.custom_personalities_path.mkdir(parents=True, exist_ok=True)
        self.custom_voices_path.mkdir(parents=True, exist_ok=True)
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -139,10 +139,6 @@ whisper_model: base
 tts_output_device: 0

 # Voice service
-xtts_enable: false
-xtts_base_url: http://localhost:8020
-xtts_use_deepspeed: false
-xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
--- a/lollms/server/endpoints/lollms_comfyui.py
+++ b/lollms/server/endpoints/lollms_comfyui.py
@ -9,7 +9,7 @@ description:
 """
 from fastapi import APIRouter, Request
 from pydantic import BaseModel, Field
-from lollms_webui import LOLLMSWebUI
+from lollms.server.elf_server import LOLLMSElfServer
 from pydantic import BaseModel
 from lollms.security import check_access
 from starlette.responses import StreamingResponse
@ -24,7 +24,7 @@ import platform
 # ----------------------- Defining router and main class ------------------------------

 router = APIRouter()
-lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
+lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()


 # ----------------------- voice ------------------------------
--- a/lollms/server/endpoints/lollms_diffusers.py
+++ b/lollms/server/endpoints/lollms_diffusers.py
@ -7,7 +7,7 @@ description:

 """
 from fastapi import APIRouter, Request
-from lollms_webui import LOLLMSWebUI
+from lollms.server.elf_server import LOLLMSElfServer
 from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
@ -22,7 +22,7 @@ import platform
 # ----------------------- Defining router and main class ------------------------------

 router = APIRouter()
-lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
+lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()

 class Identification(BaseModel):
    client_id: str
--- a/lollms/server/endpoints/lollms_discussion.py
+++ b/lollms/server/endpoints/lollms_discussion.py
@ -8,7 +8,7 @@ description:

 """
 from fastapi import APIRouter, Request
-from lollms_webui import LOLLMSWebUI
+from lollms.server.elf_server import LOLLMSElfServer
 from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
@ -43,7 +43,7 @@ class DeleteDiscussionParameters(BaseModel):
 # ----------------------- Defining router and main class ------------------------------

 router = APIRouter()
-lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
+lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()


@router.get("/list_discussions")
--- a/lollms/server/endpoints/lollms_tts.py
+++ b/lollms/server/endpoints/lollms_tts.py
@ -56,6 +56,16 @@ def list_stt_models():
    ASCIIColors.yellow("Listing voices")
    return {"voices":lollmsElfServer.stt.get_models()}

+@router.get("/list_tts_models")
+def list_tts_models():
+    if lollmsElfServer.config.headless_server_mode:
+        return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
+
+    if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
+        return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
+
+    ASCIIColors.yellow("Listing voices")
+    return {"voices":lollmsElfServer.tts.get_models()}

@router.post("/set_voice")
 async def set_voice(request: Request):
@ -134,7 +144,11 @@ async def text2Audio(request: LollmsText2AudioRequest):
        if lollmsElfServer.tts is None:
            return {"url": None, "error":f"No TTS service is on"}
        if lollmsElfServer.tts.ready:
-            response = lollmsElfServer.tts.tts_audio(request.text, request.voice, file_name_or_path=request.fn, use_threading=True)
+            if request.voice:
+                voice = request.voice
+            else:
+                voice = lollmsElfServer.config.xtts_current_voice
+            response = lollmsElfServer.tts.tts_audio(request.text, voice, file_name_or_path=request.fn, use_threading=True)
            return response
        else:
            return {"url": None, "error":f"TTS service is not ready yet"}
@ -143,8 +157,8 @@ async def text2Audio(request: LollmsText2AudioRequest):
        lollmsElfServer.error(ex)
        return {"status":False,"error":str(ex)}

-@router.post("/text2wav")
-async def text2Wav(request: LollmsText2AudioRequest):
+@router.post("/text2Wave")
+async def text2Wave(request: LollmsText2AudioRequest):
    """
    Executes Python code and returns the output.

@ -168,9 +182,14 @@ async def text2Wav(request: LollmsText2AudioRequest):
    request.fn.parent.mkdir(exist_ok=True, parents=True)

    try:
+        if request.voice:
+            voice = request.voice
+        else:
+            voice = lollmsElfServer.config.xtts_current_voice
+
        # Get the JSON data from the POST request.
        if lollmsElfServer.tts.ready:
-            response = lollmsElfServer.tts.tts_file(request.text, request.voice, file_name_or_path=request.fn)
+            response = lollmsElfServer.tts.tts_file(request.text, request.fn, voice)
            return response
        else:
            return {"url": None, "error":f"TTS service is not ready yet"}
@ -215,11 +234,7 @@ def start_xtts():

            lollmsElfServer.tts = LollmsXTTS(
                lollmsElfServer, 
-                voices_folder=voices_folder,
-                voice_samples_path=Path(__file__).parent/"voices", 
-                xtts_base_url= lollmsElfServer.config.xtts_base_url,
-                use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
-                use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode                                                    
+                voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path],                                        
            )
        lollmsElfServer.HideBlockingMessage()
    except Exception as ex:
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -1,404 +1,193 @@
-# Title LollmsXTTS
-# Licence: MIT
-# Author : Paris Neo
-# Adapted from the work of daswer123's xtts-api-server
-# check it out : https://github.com/daswer123/xtts-api-server
-# Here is a copy of the LICENCE https://github.com/daswer123/xtts-api-server/blob/main/LICENSE
-# All rights are reserved
+"""
+project: lollms_tts
+file: lollms_tts.py 
+author: ParisNeo
+description: 
+    This file hosts the LollmsXTTS service which provides text-to-speech functionalities using the TTS library.
+"""

 from pathlib import Path
-import sys
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
-from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
 from lollms.utilities import PackageManager, find_first_available_file_index, add_period
-import time
-import io
-import sys
-import requests
-import os
-import base64
-import subprocess
-import time
-import json
-import re
-import platform
-import threading
-from dataclasses import dataclass
-from PIL import Image, PngImagePlugin
-from enum import Enum
-from typing import List, Dict, Any
-import uuid
-
 from ascii_colors import ASCIIColors, trace_exception
-from lollms.paths import LollmsPaths
-from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
 from lollms.tts import LollmsTTS
-import subprocess
-import platform
+from lollms.utilities import run_pip_in_env
+from typing import List
+import threading
+import numpy as np
+# Ensure required packages are installed
+if not PackageManager.check_package_installed("TTS"):
+    PackageManager.install_or_update("TTS")

+if not PackageManager.check_package_installed("simpleaudio"):
+    PackageManager.install_or_update("simpleaudio")

+if not PackageManager.check_package_installed("wave"):
+    PackageManager.install_or_update("wave")
+
+import wave
+from TTS.api import TTS
+import simpleaudio as sa
+import time
+from queue import Queue
+import re

 class LollmsXTTS(LollmsTTS):
-    def __init__(
-                    self, 
-                    app:LollmsApplication, 
-                    xtts_base_url=None,
-                    share=False,
-                    max_retries=20,
-                    voices_folder=None,
-                    voice_samples_path="",
-                    wait_for_service=True,
-                    use_deep_speed=False,
-                    use_streaming_mode = True
-                ):
-        super().__init__("xtts",app)
+    def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]):
+        super().__init__("lollms_xtts", app)
        self.generation_threads = {}
-        self.voices_folder = voices_folder
-        self.ready = False
-        if xtts_base_url=="" or xtts_base_url=="http://127.0.0.1:8020":
-            xtts_base_url = None
-        # Get the current directory
-        lollms_paths = app.lollms_paths
-        root_dir = lollms_paths.personal_path
-        self.voice_samples_path = voice_samples_path
-        self.use_deep_speed = use_deep_speed
-        self.use_streaming_mode = use_streaming_mode
-        
-        # Store the path to the script
-        if xtts_base_url is None:
-            self.xtts_base_url = "http://127.0.0.1:8020"
-            if not LollmsXTTS.verify(lollms_paths):
-                LollmsXTTS.install(app)
-        else:
-            self.xtts_base_url = xtts_base_url
-
-        self.auto_xtts_url = self.xtts_base_url+"/sdapi/v1"
-        shared_folder = root_dir/"shared"
-        self.xtts_path = shared_folder / "xtts"
+        self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"]
+        self.stop_event = threading.Event()

+        # Show a cool LOGO using ASCIIColors
        ASCIIColors.red("   __    ___  __    __          __     __  ___   _        ")
        ASCIIColors.red("  / /   /___\/ /   / /   /\/\  / _\    \ \/ / |_| |_ ___  ")
        ASCIIColors.red(" / /   //  // /   / /   /    \ \ \ _____\  /| __| __/ __| ")
        ASCIIColors.red("/ /___/ \_// /___/ /___/ /\/\ \_\ \_____/  \| |_| |_\__ \ ")
        ASCIIColors.red("\____/\___/\____/\____/\/    \/\__/    /_/\_\\__|\__|___/ ")
-                                                         
-        ASCIIColors.red(" Forked from daswer123's XTTS server")
-        ASCIIColors.red(" Integration in lollms by ParisNeo using daswer123's webapi")
-        ASCIIColors.red(" Address :",end="")
-        ASCIIColors.yellow(f"{self.xtts_base_url}")

-        self.output_folder = app.lollms_paths.personal_outputs_path/"audio_out"
-        self.output_folder.mkdir(parents=True, exist_ok=True)
+        # Load the TTS model
+        self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
+        self.tts.to("cuda")
+        self.wav_queue = Queue()
+        self.play_obj = None
+        self.thread = None        
+        self.ready = True

-        if not self.wait_for_service(1,False):
-            ASCIIColors.info("Loading lollms_xtts")
-            # Launch the Flask service using the appropriate script for the platform
-            self.process = self.run_xtts_api_server()
-
-        # Wait until the service is available at http://127.0.0.1:7860/
-        if wait_for_service:
-            self.wait_for_service()
-        else:
-            self.wait_for_service_in_another_thread(max_retries=max_retries)
-
-    def install(lollms_app:LollmsApplication):
-        ASCIIColors.green("XTTS installation started")
-        repo_url = "https://github.com/ParisNeo/xtts-api-server"
-        root_dir = lollms_app.lollms_paths.personal_path
-        shared_folder = root_dir/"shared"
-        xtts_path = shared_folder / "xtts"
-
-        # Step 1: Clone or update the repository
-        if os.path.exists(xtts_path):
-            print("Repository already exists. Pulling latest changes...")
-            try:
-                subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
-            except:
-                subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
-
-        else:
-            print("Cloning repository...")
-            subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
-
-        # Step 2: Create or update the Conda environment
-        if environment_exists("xtts"):
-            print("Conda environment 'xtts' already exists. Updating...")
-            # Here you might want to update the environment, e.g., update Python or dependencies
-            # This step is highly dependent on how you manage your Conda environments and might involve
-            # running `conda update` commands or similar.
-        else:
-            print("Creating Conda environment 'xtts'...")
-            create_conda_env("xtts", "3.10")
-
-        # Step 3: Install or update dependencies using your custom function
-        requirements_path = os.path.join(xtts_path, "requirements.txt")
-        run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
-        run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
-        run_pip_in_env("xtts", f"install -e {xtts_path}", cwd=xtts_path)
-
-        # Step 4: Launch the server
-        # Assuming the server can be started with a Python script in the cloned repository
-        print("Launching XTTS API server...")
-        run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
-
-        print("XTTS API server setup and launch completed.")
-        ASCIIColors.cyan("Done")
-        ASCIIColors.cyan("Installing xtts-api-server")
-        ASCIIColors.green("XTTS server installed successfully")
+    def install(lollms_app: LollmsApplication):
+        ASCIIColors.green("LollmsXTTS installation started")
+        # Here you can perform installation of needed things, or create configuration files or download needed assets etc.
+        run_pip_in_env("TTS")
+        run_pip_in_env("simpleaudio")

    @staticmethod
-    def verify(lollms_paths:LollmsPaths)->bool:
-        # Clone repository
-        root_dir = lollms_paths.personal_path
-        shared_folder = root_dir/"shared"
-        xtts_path = shared_folder / "xtts"
-        return xtts_path.exists()
-    
-    @staticmethod
-    def get(app: LollmsApplication) -> 'LollmsXTTS':
-        root_dir = app.lollms_paths.personal_path
-        shared_folder = root_dir/"shared"
-        xtts_path = shared_folder / "xtts"
-        xtts_script_path = xtts_path / "lollms_xtts.py"
-        git_pull(xtts_path)
-        
-        if xtts_script_path.exists():
-            ASCIIColors.success("lollms_xtts found.")
-            ASCIIColors.success("Loading source file...",end="")
-            # use importlib to load the module from the file path
-            from lollms.services.xtts.lollms_xtts import LollmsXTTS
-            ASCIIColors.success("ok")
-            return LollmsXTTS
-
-    def run_xtts_api_server(self):
-        root_dir = self.app.lollms_paths.personal_path
-        shared_folder = root_dir/"shared"
-        xtts_path = shared_folder / "xtts"
-        
-        # Get the path to the current Python interpreter
-        ASCIIColors.yellow("Loading XTTS ")
-        options= ""
-        if self.use_deep_speed:
-            options += " --deepspeed"
-        if self.use_streaming_mode:
-            options += " --streaming-mode --streaming-mode-improve --stream-play-sync"
-        process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", cwd=xtts_path, wait= False)
-        return process
-
-    def wait_for_service_in_another_thread(self, max_retries=150, show_warning=True):
-        thread = threading.Thread(target=self.wait_for_service, args=(max_retries, show_warning))
-        thread.start()
-        return thread
-
-    def update_settings(self):
+    def verify(lollms_paths: LollmsPaths) -> bool:
+        # Verify that the service is installed either by verifying the libraries are installed or that some files or folders exist
        try:
-            settings = {
-                "stream_chunk_size": int(self.app.config.xtts_stream_chunk_size),
-                "temperature": float(self.app.config.xtts_temperature),
-                "speed": float(self.app.config.xtts_speed),
-                "length_penalty": float(self.app.config.xtts_length_penalty),
-                "repetition_penalty": float(self.app.config.xtts_repetition_penalty),
-                "top_p": float(self.app.config.xtts_top_p),
-                "top_k": int(self.app.config.xtts_top_k),
-                "enable_text_splitting": bool(self.app.config.xtts_enable_text_splitting)
-            } 
-            print("set_tts_settings")       
-            print(f"{settings}")       
-            response = requests.post(f"{self.xtts_base_url}/set_tts_settings", settings,headers={
-                'accept': 'application/json',
-                'Content-Type': 'application/json'
-            })
-            if response.status_code == 200:
-                ASCIIColors.success("XTTS updated successfully")
-        except Exception as ex:
-            trace_exception(ex)
-            pass
-
-    def wait_for_service(self, max_retries = 150, show_warning=True):
-        print(f"Waiting for xtts service (max_retries={max_retries})")
-        url = f"{self.xtts_base_url}/languages"
-        # Adjust this value as needed
-        retries = 0
-
-        while retries < max_retries or max_retries<0:
-            try:
-                response = requests.get(url)
-                if response.status_code == 200:
-                    self.update_settings()
-                    print(f"voices_folder is {self.voices_folder}.")
-                    self.ready = True
-                    if self.voices_folder is not None:
-                        print("Generating sample audio.")
-                        voice_file =  [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
-                        try:
-                            self.tts_audio("x t t s is ready",voice_file[0].stem)
-                        except Exception as ex:
-                            return True
-                    print("Service is available.")
-                    if self.app is not None:
-                        self.app.success("XTTS Service is now available.")
-                    return True
-            except:
-                pass
-
-            retries += 1
-            ASCIIColors.yellow("Waiting for xtts...")
-            time.sleep(5)
-
-        if show_warning:
-            print("Service did not become available within the given time.")
-            if self.app is not None:
-                self.app.error("XTTS Service did not become available within the given time.")
-        return False
-
-    def set_speaker_folder(self, speaker_folder):
-        url = f"{self.xtts_base_url}/set_speaker_folder"
-
-        # Define the request body
-        payload = {
-            "speaker_folder": str(speaker_folder)
-        }
-
-        # Send the POST request
-        response = requests.post(url, json=payload)
-
-        # Check the response status code
-        if response.status_code == 200:
-            print("Request successful")
+            import TTS
+            import simpleaudio
            return True
-            # You can access the response data using response.json()
-        else:
-            print("Request failed with status code:", response.status_code)
+        except ImportError:
            return False

-    def tts_file(self, text, file_name_or_path, speaker=None, language="en")->str:
-        text = self.clean_text(text)
-        url = f"{self.xtts_base_url}/tts_to_file"
-
-        # Define the request body
-        payload = {
-            "text": text,
-            "speaker_wav": speaker,
-            "language": language,
-            "file_name_or_path": file_name_or_path
-        }
-        headers = {
-            'accept': 'application/json',
-            'Content-Type': 'application/json'
-        }
-
-        # Send the POST request
-        response =  requests.post(url, headers=headers, data=json.dumps(payload))
-
-        # Check the response status code
-        if response.status_code == 200:
-            print("Request successful")
-            # You can access the response data using response.json()
+    @staticmethod
+    def get(app: LollmsApplication) -> 'LollmsXTTS':
+        # Verify if the service is installed and if true then return an instance of LollmsXTTS
+        if LollmsXTTS.verify(app.lollms_paths):
+            return LollmsXTTS(app, app.lollms_paths.custom_voices_path)
        else:
-            print("Request failed with status code:", response.status_code)
-
-        return file_name_or_path
-
-    def tts_audio(self, text, speaker=None, file_name_or_path:Path|str=None, language="en", use_threading=False):
-        voice=self.app.config.xtts_current_voice if speaker is None else speaker
-        index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
-        output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
-        if voice is None:
-            voice = "main_voice"
-        self.app.info("Starting to build voice")
-        try:
-            from lollms.services.xtts.lollms_xtts import LollmsXTTS
-            # If the personality has a voice, then use it
-            personality_audio:Path = self.app.personality.personality_package_path/"audio"
-            if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
-                voices_folder = personality_audio
-            elif voice!="main_voice":
-                voices_folder = self.app.lollms_paths.custom_voices_path
-            else:
-                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
-            language = self.app.config.xtts_current_language# convert_language_name()
-            self.set_speaker_folder(voices_folder)
-            preprocessed_text= add_period(text)
-            voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
-            if len(voice_file)==0:
-                return {"status":False,"error":"Voice not found"}
-            self.xtts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language, use_threading=use_threading)
-
-        except Exception as ex:
-            trace_exception(ex)
-            return {"status":False,"error":f"{ex}"}
-
-    def xtts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
-        text = self.clean_text(text)
-        def tts2_audio_th(thread_uid=None):
-            url = f"{self.xtts_base_url}/tts_to_audio"
-
-            # Define the request body
-            payload = {
-                "text": text,
-                "speaker_wav": speaker,
-                "language": language
-            }
-            headers = {
-                'accept': 'application/json',
-                'Content-Type': 'application/json'
-            }
-
-            # Send the POST request
-            response =  requests.post(url, headers=headers, data=json.dumps(payload))
-
-            if response.status_code == 200:
-                print("Request successful")
-                print("Response headers:", response.headers)
-                
-                # Basic logging for debugging
-                print("First 100 bytes of response content:", response.content[:100])
-                
-                if file_name_or_path is not None:
-                    try:
-                        with open(self.output_folder / file_name_or_path, 'wb') as file:
-                            # Write the binary content to the file
-                            file.write(response.content)
-                        print(f"File {file_name_or_path} written successfully.")
-                    except Exception as e:
-                        print(f"Failed to write the file. Error: {e}")
-            else:
-                print("Request failed with status code:", response.status_code)
-            if thread_uid:
-                self.generation_threads.pop(thread_uid, None)
-        if use_threading:
-            thread_uid =  str(uuid.uuid4())       
-            thread = threading.Thread(target=tts2_audio_th, args=(thread_uid,))
-            self.generation_threads[thread_uid]=thread
-            self.thread = thread
-            thread.start()
-            ASCIIColors.green("Generation started")
-            return thread
-        else:
-            return tts2_audio_th()
-    
-    def stop(self):
-        url = f"{self.xtts_base_url}/stop_streaming"
-
-        # Define the request body
-        payload = {
-        }
-        headers = {
-            'accept': 'application/json',
-            'Content-Type': 'application/json'
-        }
-
-        # Send the POST request
-        response =  requests.post(url, headers=headers, data=json.dumps(payload))
-
-        if response.status_code == 200:
-            print("Request successful")
-
+            raise Exception("LollmsXTTS service is not installed properly.")
+    def get_speaker_wav(self, speaker) -> Path:
+        """
+        Searches for the speaker file in the specified folders.
        
+        :param speaker: The name of the speaker file (without extension).
+        :return: The path to the speaker file if found.
+        :raises FileNotFoundError: If the speaker file is not found in any of the folders.
+        """
+        for folder in self.voices_folders:
+            potential_speaker_wav = Path(folder) / f"{speaker}.wav"
+            if potential_speaker_wav.exists():
+                return potential_speaker_wav
+        
+        raise FileNotFoundError(f"Speaker file '{speaker}.wav' not found in any of the specified folders.")
+    def tts_file(self, text, file_name_or_path, speaker=None, language="en") -> str:
+        speaker_wav = None
+        
+        if speaker:
+            speaker_wav = self.get_speaker_wav(speaker)
+        else:
+            speaker_wav = self.get_speaker_wav("main_voice")
+        
+        self.tts.tts_to_file(text=text, file_path=file_name_or_path, speaker_wav=speaker_wav, language=language)
+        return file_name_or_path
+    
+    def tts_audio(self, text, speaker=None, file_name_or_path: Path | str | None = None, language="en", use_threading=False):
+        # Split text into sentences
+        sentences = re.split(r'(?<=[.!?]) +', text)
+        
+        if speaker:
+            speaker_wav = self.get_speaker_wav(speaker)
+        else:
+            speaker_wav = self.get_speaker_wav("main_voice")
+
+        if use_threading:
+            self.stop_event.clear()
+            generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
+            generator_thread.start()
+            self.thread = threading.Thread(target=self._play_audio)
+            self.thread.start()
+        else:
+            self.stop_event.clear()
+            generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
+            generator_thread.start()
+            self._play_audio()
+
+    def _generate_audio(self, sentences, speaker_wav, language, file_name_or_path):
+        wav_data = []
+        for sentence in sentences:
+            if self.stop_event.is_set():
+                break
+            wav = self.tts.tts(text=sentence, speaker_wav=speaker_wav, language=language)
+            wav_array = np.array(wav, dtype=np.float32)
+            wav_array = np.int16(wav_array * 32767)
+            self.wav_queue.put(wav_array)
+            wav_data.append(wav_array)
+        self.wav_queue.put(None)  # Signal that generation is done
+
+        if file_name_or_path:
+            self._save_wav(wav_data, file_name_or_path)
+
+    def _play_audio(self):
+        buffered_sentences = 0
+        buffer = []
+        while not self.stop_event.is_set():
+            wav = self.wav_queue.get()
+            if wav is None:
+                # Play any remaining buffered sentences
+                for buffered_wav in buffer:
+                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
+                    self.play_obj.wait_done()
+                    time.sleep(0.5)  # Pause between sentences
+                ASCIIColors.green("Audio done")
+                break
+            buffer.append(wav)
+            buffered_sentences += 1
+            if buffered_sentences >= 2:
+                for buffered_wav in buffer:
+                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
+                    self.play_obj.wait_done()
+                    time.sleep(0.5)  # Pause between sentences
+                buffer = []
+                buffered_sentences = 0
+
+    def _save_wav(self, wav_data, file_name_or_path):
+        with wave.open(str(file_name_or_path), 'wb') as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(22050)
+            for wav in wav_data:
+                wf.writeframes(wav.tobytes())
+
+    def stop(self):
+        self.stop_event.set()
+        if self.thread and self.thread.is_alive():
+            self.thread.join()
+        if self.play_obj:
+            self.play_obj.stop()
+
    def get_voices(self):
+        # List voices from the folder
        ASCIIColors.yellow("Listing voices")
-        voices=["main_voice"]
-        voices_dir:Path=self.app.lollms_paths.custom_voices_path
-        voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
+        voices = []
+        for voices_folder in self.voices_folders:
+            voices += [v.stem for v in voices_folder.iterdir() if v.suffix == ".wav"]
        return voices
+
+
+if __name__ == "__main__":
+    # Here do some example
+    app = LollmsApplication()
+    lollms_xtts_service = LollmsXTTS.get(app)
+    lollms_xtts_service.tts_file("Hello, this is a test.", "output.wav", speaker="ParisNeo_Original_voice", language="en")
--- a/lollms/tts.py
+++ b/lollms/tts.py
@ -143,7 +143,17 @@ class LollmsTTS:
            list: A list of available voices.
        """
        return self.voices
-    
+
+    def get_models(self):
+        """
+        Retrieves the available models for TTS.
+
+        Returns:
+            list: A list of available models.
+        """
+        return self.models
+
+
    def get_devices(self):
        devices =  sd.query_devices()

@ -152,7 +162,6 @@ class LollmsTTS:
            "device_names": [device['name'] for device in devices if device["max_output_channels"]>0],
            "device_indexes": [device['index'] for device in devices if device["max_output_channels"]>0]
        }
-    
    @staticmethod
    def clean_text(text):
        # Remove HTML tags
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -78,8 +78,6 @@ auto_show_browser: true
 copy_to_clipboard_add_all_details: false

 # Voice service
-xtts_enable: false
-xtts_base_url: http://localhost:8020
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en