mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 20:57:58 +00:00
upgraded xtts
This commit is contained in:
parent
aa21fdfb49
commit
3a00e968fb
@ -142,10 +142,6 @@ whisper_model: base
|
||||
tts_output_device: 0
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
xtts_use_deepspeed: false
|
||||
xtts_use_streaming_mode: true
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
|
@ -78,8 +78,6 @@ auto_show_browser: true
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
|
@ -78,8 +78,6 @@ auto_show_browser: true
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
|
@ -78,8 +78,6 @@ auto_show_browser: true
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
|
@ -341,14 +341,10 @@ class LollmsApplication(LoLLMsCom):
|
||||
|
||||
self.xtts = LollmsXTTS(
|
||||
self,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=self.lollms_paths.custom_voices_path,
|
||||
xtts_base_url=self.config.xtts_base_url,
|
||||
wait_for_service=False,
|
||||
use_deep_speed=self.config.xtts_use_deepspeed,
|
||||
use_streaming_mode=self.config.xtts_use_streaming_mode
|
||||
voices_folders=[voices_folder, self.lollms_paths.custom_voices_path],
|
||||
)
|
||||
except:
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
self.warning(f"Couldn't load XTTS")
|
||||
|
||||
ASCIIColors.blue("Loading local TTI services")
|
||||
@ -458,14 +454,10 @@ class LollmsApplication(LoLLMsCom):
|
||||
|
||||
self.xtts = LollmsXTTS(
|
||||
self,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=self.lollms_paths.custom_voices_path,
|
||||
xtts_base_url=self.config.xtts_base_url,
|
||||
wait_for_service=False,
|
||||
use_deep_speed=self.config.xtts_use_deepspeed,
|
||||
use_streaming_mode=self.config.xtts_use_streaming_mode
|
||||
voices_folders=[voices_folder, self.lollms_paths.custom_voices_path],
|
||||
)
|
||||
except:
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
self.warning(f"Couldn't load XTTS")
|
||||
|
||||
ASCIIColors.blue("Loading local TTI services")
|
||||
|
@ -142,10 +142,6 @@ whisper_model: base
|
||||
tts_output_device: 0
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
xtts_use_deepspeed: false
|
||||
xtts_use_streaming_mode: true
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
|
@ -72,6 +72,14 @@ class LollmsPaths:
|
||||
self.personal_outputs_path = self.personal_path / "outputs"
|
||||
self.personal_user_infos_path = self.personal_path / "user_infos"
|
||||
|
||||
|
||||
self.personal_services_path = self.personal_path / "services"
|
||||
self.personal_stt_services_path = self.personal_services_path / "stt"
|
||||
self.personal_tts_services_path = self.personal_services_path / "tts"
|
||||
self.personal_tti_services_path = self.personal_services_path / "tti"
|
||||
self.personal_ttm_services_path = self.personal_services_path / "ttm"
|
||||
|
||||
|
||||
self.personal_trainers_path = self.personal_path / "trainers"
|
||||
self.gptqlora_path = self.personal_trainers_path / "gptqlora"
|
||||
|
||||
@ -117,6 +125,25 @@ class LollmsPaths:
|
||||
ASCIIColors.yellow(f"{self.personal_models_path}")
|
||||
ASCIIColors.red("personal_user_infos_path:",end="")
|
||||
ASCIIColors.yellow(f"{self.personal_user_infos_path}")
|
||||
|
||||
ASCIIColors.red("personal_services_path:",end="")
|
||||
ASCIIColors.yellow(f"{self.personal_services_path}")
|
||||
|
||||
|
||||
ASCIIColors.red("personal_stt_services_path:", end="")
|
||||
ASCIIColors.yellow(f"{self.personal_stt_services_path}")
|
||||
|
||||
ASCIIColors.red("personal_tts_services_path:", end="")
|
||||
ASCIIColors.yellow(f"{self.personal_tts_services_path}")
|
||||
|
||||
ASCIIColors.red("personal_tti_services_path:", end="")
|
||||
ASCIIColors.yellow(f"{self.personal_tti_services_path}")
|
||||
|
||||
ASCIIColors.red("personal_ttm_services_path:", end="")
|
||||
ASCIIColors.yellow(f"{self.personal_ttm_services_path}")
|
||||
|
||||
|
||||
|
||||
ASCIIColors.red("personal_trainers_path:",end="")
|
||||
ASCIIColors.yellow(f"{self.personal_trainers_path}")
|
||||
ASCIIColors.red("personal_trainers_path:",end="")
|
||||
@ -162,6 +189,12 @@ class LollmsPaths:
|
||||
"Personal user infos path": self.personal_user_infos_path,
|
||||
"Personal trainers path": self.personal_trainers_path,
|
||||
"Personal gptqlora trainer path": self.gptqlora_path,
|
||||
|
||||
"Personal services path": self.personal_services_path,
|
||||
"Personal STT services path": self.personal_stt_services_path,
|
||||
"Personal TTS services path": self.personal_tts_services_path,
|
||||
"Personal TTI services path": self.personal_tti_services_path,
|
||||
"Personal TTM services path": self.personal_ttm_services_path,
|
||||
}
|
||||
return "\n".join([f"{category}: {path}" for category, path in directories.items()])
|
||||
|
||||
@ -180,6 +213,12 @@ class LollmsPaths:
|
||||
self.personal_outputs_path.mkdir(parents=True, exist_ok=True)
|
||||
self.personal_uploads_path.mkdir(parents=True, exist_ok=True)
|
||||
self.personal_user_infos_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.personal_services_path.mkdir(parents=True, exist_ok=True)
|
||||
self.personal_stt_services_path.mkdir(parents=True, exist_ok=True)
|
||||
self.personal_tts_services_path.mkdir(parents=True, exist_ok=True)
|
||||
self.personal_tti_services_path.mkdir(parents=True, exist_ok=True)
|
||||
self.personal_ttm_services_path.mkdir(parents=True, exist_ok=True)
|
||||
self.personal_trainers_path.mkdir(parents=True, exist_ok=True)
|
||||
self.custom_personalities_path.mkdir(parents=True, exist_ok=True)
|
||||
self.custom_voices_path.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -139,10 +139,6 @@ whisper_model: base
|
||||
tts_output_device: 0
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
xtts_use_deepspeed: false
|
||||
xtts_use_streaming_mode: true
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
|
@ -9,7 +9,7 @@ description:
|
||||
"""
|
||||
from fastapi import APIRouter, Request
|
||||
from pydantic import BaseModel, Field
|
||||
from lollms_webui import LOLLMSWebUI
|
||||
from lollms.server.elf_server import LOLLMSElfServer
|
||||
from pydantic import BaseModel
|
||||
from lollms.security import check_access
|
||||
from starlette.responses import StreamingResponse
|
||||
@ -24,7 +24,7 @@ import platform
|
||||
# ----------------------- Defining router and main class ------------------------------
|
||||
|
||||
router = APIRouter()
|
||||
lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
|
||||
lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
|
||||
|
||||
|
||||
# ----------------------- voice ------------------------------
|
||||
|
@ -7,7 +7,7 @@ description:
|
||||
|
||||
"""
|
||||
from fastapi import APIRouter, Request
|
||||
from lollms_webui import LOLLMSWebUI
|
||||
from lollms.server.elf_server import LOLLMSElfServer
|
||||
from pydantic import BaseModel
|
||||
from starlette.responses import StreamingResponse
|
||||
from lollms.types import MSG_TYPE
|
||||
@ -22,7 +22,7 @@ import platform
|
||||
# ----------------------- Defining router and main class ------------------------------
|
||||
|
||||
router = APIRouter()
|
||||
lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
|
||||
lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
|
||||
|
||||
class Identification(BaseModel):
|
||||
client_id: str
|
||||
|
@ -8,7 +8,7 @@ description:
|
||||
|
||||
"""
|
||||
from fastapi import APIRouter, Request
|
||||
from lollms_webui import LOLLMSWebUI
|
||||
from lollms.server.elf_server import LOLLMSElfServer
|
||||
from pydantic import BaseModel
|
||||
from starlette.responses import StreamingResponse
|
||||
from lollms.types import MSG_TYPE
|
||||
@ -43,7 +43,7 @@ class DeleteDiscussionParameters(BaseModel):
|
||||
# ----------------------- Defining router and main class ------------------------------
|
||||
|
||||
router = APIRouter()
|
||||
lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
|
||||
lollmsElfServer:LOLLMSElfServer = LOLLMSElfServer.get_instance()
|
||||
|
||||
|
||||
@router.get("/list_discussions")
|
||||
|
@ -56,6 +56,16 @@ def list_stt_models():
|
||||
ASCIIColors.yellow("Listing voices")
|
||||
return {"voices":lollmsElfServer.stt.get_models()}
|
||||
|
||||
@router.get("/list_tts_models")
|
||||
def list_tts_models():
|
||||
if lollmsElfServer.config.headless_server_mode:
|
||||
return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
|
||||
|
||||
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
|
||||
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
||||
|
||||
ASCIIColors.yellow("Listing voices")
|
||||
return {"voices":lollmsElfServer.tts.get_models()}
|
||||
|
||||
@router.post("/set_voice")
|
||||
async def set_voice(request: Request):
|
||||
@ -134,7 +144,11 @@ async def text2Audio(request: LollmsText2AudioRequest):
|
||||
if lollmsElfServer.tts is None:
|
||||
return {"url": None, "error":f"No TTS service is on"}
|
||||
if lollmsElfServer.tts.ready:
|
||||
response = lollmsElfServer.tts.tts_audio(request.text, request.voice, file_name_or_path=request.fn, use_threading=True)
|
||||
if request.voice:
|
||||
voice = request.voice
|
||||
else:
|
||||
voice = lollmsElfServer.config.xtts_current_voice
|
||||
response = lollmsElfServer.tts.tts_audio(request.text, voice, file_name_or_path=request.fn, use_threading=True)
|
||||
return response
|
||||
else:
|
||||
return {"url": None, "error":f"TTS service is not ready yet"}
|
||||
@ -143,8 +157,8 @@ async def text2Audio(request: LollmsText2AudioRequest):
|
||||
lollmsElfServer.error(ex)
|
||||
return {"status":False,"error":str(ex)}
|
||||
|
||||
@router.post("/text2wav")
|
||||
async def text2Wav(request: LollmsText2AudioRequest):
|
||||
@router.post("/text2Wave")
|
||||
async def text2Wave(request: LollmsText2AudioRequest):
|
||||
"""
|
||||
Executes Python code and returns the output.
|
||||
|
||||
@ -168,9 +182,14 @@ async def text2Wav(request: LollmsText2AudioRequest):
|
||||
request.fn.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
try:
|
||||
if request.voice:
|
||||
voice = request.voice
|
||||
else:
|
||||
voice = lollmsElfServer.config.xtts_current_voice
|
||||
|
||||
# Get the JSON data from the POST request.
|
||||
if lollmsElfServer.tts.ready:
|
||||
response = lollmsElfServer.tts.tts_file(request.text, request.voice, file_name_or_path=request.fn)
|
||||
response = lollmsElfServer.tts.tts_file(request.text, request.fn, voice)
|
||||
return response
|
||||
else:
|
||||
return {"url": None, "error":f"TTS service is not ready yet"}
|
||||
@ -215,11 +234,7 @@ def start_xtts():
|
||||
|
||||
lollmsElfServer.tts = LollmsXTTS(
|
||||
lollmsElfServer,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=Path(__file__).parent/"voices",
|
||||
xtts_base_url= lollmsElfServer.config.xtts_base_url,
|
||||
use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
|
||||
use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode
|
||||
voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path],
|
||||
)
|
||||
lollmsElfServer.HideBlockingMessage()
|
||||
except Exception as ex:
|
||||
|
@ -1,404 +1,193 @@
|
||||
# Title LollmsXTTS
|
||||
# Licence: MIT
|
||||
# Author : Paris Neo
|
||||
# Adapted from the work of daswer123's xtts-api-server
|
||||
# check it out : https://github.com/daswer123/xtts-api-server
|
||||
# Here is a copy of the LICENCE https://github.com/daswer123/xtts-api-server/blob/main/LICENSE
|
||||
# All rights are reserved
|
||||
"""
|
||||
project: lollms_tts
|
||||
file: lollms_tts.py
|
||||
author: ParisNeo
|
||||
description:
|
||||
This file hosts the LollmsXTTS service which provides text-to-speech functionalities using the TTS library.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||
from lollms.utilities import PackageManager, find_first_available_file_index, add_period
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
import requests
|
||||
import os
|
||||
import base64
|
||||
import subprocess
|
||||
import time
|
||||
import json
|
||||
import re
|
||||
import platform
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from PIL import Image, PngImagePlugin
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Any
|
||||
import uuid
|
||||
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
|
||||
from lollms.tts import LollmsTTS
|
||||
import subprocess
|
||||
import platform
|
||||
from lollms.utilities import run_pip_in_env
|
||||
from typing import List
|
||||
import threading
|
||||
import numpy as np
|
||||
# Ensure required packages are installed
|
||||
if not PackageManager.check_package_installed("TTS"):
|
||||
PackageManager.install_or_update("TTS")
|
||||
|
||||
if not PackageManager.check_package_installed("simpleaudio"):
|
||||
PackageManager.install_or_update("simpleaudio")
|
||||
|
||||
if not PackageManager.check_package_installed("wave"):
|
||||
PackageManager.install_or_update("wave")
|
||||
|
||||
import wave
|
||||
from TTS.api import TTS
|
||||
import simpleaudio as sa
|
||||
import time
|
||||
from queue import Queue
|
||||
import re
|
||||
|
||||
class LollmsXTTS(LollmsTTS):
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
xtts_base_url=None,
|
||||
share=False,
|
||||
max_retries=20,
|
||||
voices_folder=None,
|
||||
voice_samples_path="",
|
||||
wait_for_service=True,
|
||||
use_deep_speed=False,
|
||||
use_streaming_mode = True
|
||||
):
|
||||
super().__init__("xtts",app)
|
||||
def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]):
|
||||
super().__init__("lollms_xtts", app)
|
||||
self.generation_threads = {}
|
||||
self.voices_folder = voices_folder
|
||||
self.ready = False
|
||||
if xtts_base_url=="" or xtts_base_url=="http://127.0.0.1:8020":
|
||||
xtts_base_url = None
|
||||
# Get the current directory
|
||||
lollms_paths = app.lollms_paths
|
||||
root_dir = lollms_paths.personal_path
|
||||
self.voice_samples_path = voice_samples_path
|
||||
self.use_deep_speed = use_deep_speed
|
||||
self.use_streaming_mode = use_streaming_mode
|
||||
|
||||
# Store the path to the script
|
||||
if xtts_base_url is None:
|
||||
self.xtts_base_url = "http://127.0.0.1:8020"
|
||||
if not LollmsXTTS.verify(lollms_paths):
|
||||
LollmsXTTS.install(app)
|
||||
else:
|
||||
self.xtts_base_url = xtts_base_url
|
||||
|
||||
self.auto_xtts_url = self.xtts_base_url+"/sdapi/v1"
|
||||
shared_folder = root_dir/"shared"
|
||||
self.xtts_path = shared_folder / "xtts"
|
||||
self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"]
|
||||
self.stop_event = threading.Event()
|
||||
|
||||
# Show a cool LOGO using ASCIIColors
|
||||
ASCIIColors.red(" __ ___ __ __ __ __ ___ _ ")
|
||||
ASCIIColors.red(" / / /___\/ / / / /\/\ / _\ \ \/ / |_| |_ ___ ")
|
||||
ASCIIColors.red(" / / // // / / / / \ \ \ _____\ /| __| __/ __| ")
|
||||
ASCIIColors.red("/ /___/ \_// /___/ /___/ /\/\ \_\ \_____/ \| |_| |_\__ \ ")
|
||||
ASCIIColors.red("\____/\___/\____/\____/\/ \/\__/ /_/\_\\__|\__|___/ ")
|
||||
|
||||
ASCIIColors.red(" Forked from daswer123's XTTS server")
|
||||
ASCIIColors.red(" Integration in lollms by ParisNeo using daswer123's webapi")
|
||||
ASCIIColors.red(" Address :",end="")
|
||||
ASCIIColors.yellow(f"{self.xtts_base_url}")
|
||||
|
||||
self.output_folder = app.lollms_paths.personal_outputs_path/"audio_out"
|
||||
self.output_folder.mkdir(parents=True, exist_ok=True)
|
||||
# Load the TTS model
|
||||
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
||||
self.tts.to("cuda")
|
||||
self.wav_queue = Queue()
|
||||
self.play_obj = None
|
||||
self.thread = None
|
||||
self.ready = True
|
||||
|
||||
if not self.wait_for_service(1,False):
|
||||
ASCIIColors.info("Loading lollms_xtts")
|
||||
# Launch the Flask service using the appropriate script for the platform
|
||||
self.process = self.run_xtts_api_server()
|
||||
|
||||
# Wait until the service is available at http://127.0.0.1:7860/
|
||||
if wait_for_service:
|
||||
self.wait_for_service()
|
||||
else:
|
||||
self.wait_for_service_in_another_thread(max_retries=max_retries)
|
||||
|
||||
def install(lollms_app:LollmsApplication):
|
||||
ASCIIColors.green("XTTS installation started")
|
||||
repo_url = "https://github.com/ParisNeo/xtts-api-server"
|
||||
root_dir = lollms_app.lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
|
||||
# Step 1: Clone or update the repository
|
||||
if os.path.exists(xtts_path):
|
||||
print("Repository already exists. Pulling latest changes...")
|
||||
try:
|
||||
subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
|
||||
except:
|
||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||
|
||||
else:
|
||||
print("Cloning repository...")
|
||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||
|
||||
# Step 2: Create or update the Conda environment
|
||||
if environment_exists("xtts"):
|
||||
print("Conda environment 'xtts' already exists. Updating...")
|
||||
# Here you might want to update the environment, e.g., update Python or dependencies
|
||||
# This step is highly dependent on how you manage your Conda environments and might involve
|
||||
# running `conda update` commands or similar.
|
||||
else:
|
||||
print("Creating Conda environment 'xtts'...")
|
||||
create_conda_env("xtts", "3.10")
|
||||
|
||||
# Step 3: Install or update dependencies using your custom function
|
||||
requirements_path = os.path.join(xtts_path, "requirements.txt")
|
||||
run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
|
||||
run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
|
||||
run_pip_in_env("xtts", f"install -e {xtts_path}", cwd=xtts_path)
|
||||
|
||||
# Step 4: Launch the server
|
||||
# Assuming the server can be started with a Python script in the cloned repository
|
||||
print("Launching XTTS API server...")
|
||||
run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
|
||||
|
||||
print("XTTS API server setup and launch completed.")
|
||||
ASCIIColors.cyan("Done")
|
||||
ASCIIColors.cyan("Installing xtts-api-server")
|
||||
ASCIIColors.green("XTTS server installed successfully")
|
||||
def install(lollms_app: LollmsApplication):
|
||||
ASCIIColors.green("LollmsXTTS installation started")
|
||||
# Here you can perform installation of needed things, or create configuration files or download needed assets etc.
|
||||
run_pip_in_env("TTS")
|
||||
run_pip_in_env("simpleaudio")
|
||||
|
||||
@staticmethod
|
||||
def verify(lollms_paths:LollmsPaths)->bool:
|
||||
# Clone repository
|
||||
root_dir = lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
return xtts_path.exists()
|
||||
|
||||
@staticmethod
|
||||
def get(app: LollmsApplication) -> 'LollmsXTTS':
|
||||
root_dir = app.lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
xtts_script_path = xtts_path / "lollms_xtts.py"
|
||||
git_pull(xtts_path)
|
||||
|
||||
if xtts_script_path.exists():
|
||||
ASCIIColors.success("lollms_xtts found.")
|
||||
ASCIIColors.success("Loading source file...",end="")
|
||||
# use importlib to load the module from the file path
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
ASCIIColors.success("ok")
|
||||
return LollmsXTTS
|
||||
|
||||
def run_xtts_api_server(self):
|
||||
root_dir = self.app.lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
|
||||
# Get the path to the current Python interpreter
|
||||
ASCIIColors.yellow("Loading XTTS ")
|
||||
options= ""
|
||||
if self.use_deep_speed:
|
||||
options += " --deepspeed"
|
||||
if self.use_streaming_mode:
|
||||
options += " --streaming-mode --streaming-mode-improve --stream-play-sync"
|
||||
process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", cwd=xtts_path, wait= False)
|
||||
return process
|
||||
|
||||
def wait_for_service_in_another_thread(self, max_retries=150, show_warning=True):
|
||||
thread = threading.Thread(target=self.wait_for_service, args=(max_retries, show_warning))
|
||||
thread.start()
|
||||
return thread
|
||||
|
||||
def update_settings(self):
|
||||
def verify(lollms_paths: LollmsPaths) -> bool:
|
||||
# Verify that the service is installed either by verifying the libraries are installed or that some files or folders exist
|
||||
try:
|
||||
settings = {
|
||||
"stream_chunk_size": int(self.app.config.xtts_stream_chunk_size),
|
||||
"temperature": float(self.app.config.xtts_temperature),
|
||||
"speed": float(self.app.config.xtts_speed),
|
||||
"length_penalty": float(self.app.config.xtts_length_penalty),
|
||||
"repetition_penalty": float(self.app.config.xtts_repetition_penalty),
|
||||
"top_p": float(self.app.config.xtts_top_p),
|
||||
"top_k": int(self.app.config.xtts_top_k),
|
||||
"enable_text_splitting": bool(self.app.config.xtts_enable_text_splitting)
|
||||
}
|
||||
print("set_tts_settings")
|
||||
print(f"{settings}")
|
||||
response = requests.post(f"{self.xtts_base_url}/set_tts_settings", settings,headers={
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
})
|
||||
if response.status_code == 200:
|
||||
ASCIIColors.success("XTTS updated successfully")
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
pass
|
||||
|
||||
def wait_for_service(self, max_retries = 150, show_warning=True):
|
||||
print(f"Waiting for xtts service (max_retries={max_retries})")
|
||||
url = f"{self.xtts_base_url}/languages"
|
||||
# Adjust this value as needed
|
||||
retries = 0
|
||||
|
||||
while retries < max_retries or max_retries<0:
|
||||
try:
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
self.update_settings()
|
||||
print(f"voices_folder is {self.voices_folder}.")
|
||||
self.ready = True
|
||||
if self.voices_folder is not None:
|
||||
print("Generating sample audio.")
|
||||
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
|
||||
try:
|
||||
self.tts_audio("x t t s is ready",voice_file[0].stem)
|
||||
except Exception as ex:
|
||||
return True
|
||||
print("Service is available.")
|
||||
if self.app is not None:
|
||||
self.app.success("XTTS Service is now available.")
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
|
||||
retries += 1
|
||||
ASCIIColors.yellow("Waiting for xtts...")
|
||||
time.sleep(5)
|
||||
|
||||
if show_warning:
|
||||
print("Service did not become available within the given time.")
|
||||
if self.app is not None:
|
||||
self.app.error("XTTS Service did not become available within the given time.")
|
||||
return False
|
||||
|
||||
def set_speaker_folder(self, speaker_folder):
|
||||
url = f"{self.xtts_base_url}/set_speaker_folder"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
"speaker_folder": str(speaker_folder)
|
||||
}
|
||||
|
||||
# Send the POST request
|
||||
response = requests.post(url, json=payload)
|
||||
|
||||
# Check the response status code
|
||||
if response.status_code == 200:
|
||||
print("Request successful")
|
||||
import TTS
|
||||
import simpleaudio
|
||||
return True
|
||||
# You can access the response data using response.json()
|
||||
else:
|
||||
print("Request failed with status code:", response.status_code)
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
def tts_file(self, text, file_name_or_path, speaker=None, language="en")->str:
|
||||
text = self.clean_text(text)
|
||||
url = f"{self.xtts_base_url}/tts_to_file"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
"text": text,
|
||||
"speaker_wav": speaker,
|
||||
"language": language,
|
||||
"file_name_or_path": file_name_or_path
|
||||
}
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# Send the POST request
|
||||
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
||||
|
||||
# Check the response status code
|
||||
if response.status_code == 200:
|
||||
print("Request successful")
|
||||
# You can access the response data using response.json()
|
||||
@staticmethod
|
||||
def get(app: LollmsApplication) -> 'LollmsXTTS':
|
||||
# Verify if the service is installed and if true then return an instance of LollmsXTTS
|
||||
if LollmsXTTS.verify(app.lollms_paths):
|
||||
return LollmsXTTS(app, app.lollms_paths.custom_voices_path)
|
||||
else:
|
||||
print("Request failed with status code:", response.status_code)
|
||||
|
||||
return file_name_or_path
|
||||
|
||||
def tts_audio(self, text, speaker=None, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
voice=self.app.config.xtts_current_voice if speaker is None else speaker
|
||||
index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
|
||||
output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
|
||||
if voice is None:
|
||||
voice = "main_voice"
|
||||
self.app.info("Starting to build voice")
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
# If the personality has a voice, then use it
|
||||
personality_audio:Path = self.app.personality.personality_package_path/"audio"
|
||||
if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
|
||||
voices_folder = personality_audio
|
||||
elif voice!="main_voice":
|
||||
voices_folder = self.app.lollms_paths.custom_voices_path
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
language = self.app.config.xtts_current_language# convert_language_name()
|
||||
self.set_speaker_folder(voices_folder)
|
||||
preprocessed_text= add_period(text)
|
||||
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
||||
if len(voice_file)==0:
|
||||
return {"status":False,"error":"Voice not found"}
|
||||
self.xtts_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language, use_threading=use_threading)
|
||||
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
return {"status":False,"error":f"{ex}"}
|
||||
|
||||
def xtts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
text = self.clean_text(text)
|
||||
def tts2_audio_th(thread_uid=None):
|
||||
url = f"{self.xtts_base_url}/tts_to_audio"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
"text": text,
|
||||
"speaker_wav": speaker,
|
||||
"language": language
|
||||
}
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# Send the POST request
|
||||
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
||||
|
||||
if response.status_code == 200:
|
||||
print("Request successful")
|
||||
print("Response headers:", response.headers)
|
||||
|
||||
# Basic logging for debugging
|
||||
print("First 100 bytes of response content:", response.content[:100])
|
||||
|
||||
if file_name_or_path is not None:
|
||||
try:
|
||||
with open(self.output_folder / file_name_or_path, 'wb') as file:
|
||||
# Write the binary content to the file
|
||||
file.write(response.content)
|
||||
print(f"File {file_name_or_path} written successfully.")
|
||||
except Exception as e:
|
||||
print(f"Failed to write the file. Error: {e}")
|
||||
else:
|
||||
print("Request failed with status code:", response.status_code)
|
||||
if thread_uid:
|
||||
self.generation_threads.pop(thread_uid, None)
|
||||
if use_threading:
|
||||
thread_uid = str(uuid.uuid4())
|
||||
thread = threading.Thread(target=tts2_audio_th, args=(thread_uid,))
|
||||
self.generation_threads[thread_uid]=thread
|
||||
self.thread = thread
|
||||
thread.start()
|
||||
ASCIIColors.green("Generation started")
|
||||
return thread
|
||||
else:
|
||||
return tts2_audio_th()
|
||||
|
||||
def stop(self):
|
||||
url = f"{self.xtts_base_url}/stop_streaming"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
}
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# Send the POST request
|
||||
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
||||
|
||||
if response.status_code == 200:
|
||||
print("Request successful")
|
||||
|
||||
raise Exception("LollmsXTTS service is not installed properly.")
|
||||
def get_speaker_wav(self, speaker) -> Path:
|
||||
"""
|
||||
Searches for the speaker file in the specified folders.
|
||||
|
||||
:param speaker: The name of the speaker file (without extension).
|
||||
:return: The path to the speaker file if found.
|
||||
:raises FileNotFoundError: If the speaker file is not found in any of the folders.
|
||||
"""
|
||||
for folder in self.voices_folders:
|
||||
potential_speaker_wav = Path(folder) / f"{speaker}.wav"
|
||||
if potential_speaker_wav.exists():
|
||||
return potential_speaker_wav
|
||||
|
||||
raise FileNotFoundError(f"Speaker file '{speaker}.wav' not found in any of the specified folders.")
|
||||
def tts_file(self, text, file_name_or_path, speaker=None, language="en") -> str:
|
||||
speaker_wav = None
|
||||
|
||||
if speaker:
|
||||
speaker_wav = self.get_speaker_wav(speaker)
|
||||
else:
|
||||
speaker_wav = self.get_speaker_wav("main_voice")
|
||||
|
||||
self.tts.tts_to_file(text=text, file_path=file_name_or_path, speaker_wav=speaker_wav, language=language)
|
||||
return file_name_or_path
|
||||
|
||||
def tts_audio(self, text, speaker=None, file_name_or_path: Path | str | None = None, language="en", use_threading=False):
|
||||
# Split text into sentences
|
||||
sentences = re.split(r'(?<=[.!?]) +', text)
|
||||
|
||||
if speaker:
|
||||
speaker_wav = self.get_speaker_wav(speaker)
|
||||
else:
|
||||
speaker_wav = self.get_speaker_wav("main_voice")
|
||||
|
||||
if use_threading:
|
||||
self.stop_event.clear()
|
||||
generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
|
||||
generator_thread.start()
|
||||
self.thread = threading.Thread(target=self._play_audio)
|
||||
self.thread.start()
|
||||
else:
|
||||
self.stop_event.clear()
|
||||
generator_thread = threading.Thread(target=self._generate_audio, args=(sentences, speaker_wav, language, file_name_or_path))
|
||||
generator_thread.start()
|
||||
self._play_audio()
|
||||
|
||||
def _generate_audio(self, sentences, speaker_wav, language, file_name_or_path):
|
||||
wav_data = []
|
||||
for sentence in sentences:
|
||||
if self.stop_event.is_set():
|
||||
break
|
||||
wav = self.tts.tts(text=sentence, speaker_wav=speaker_wav, language=language)
|
||||
wav_array = np.array(wav, dtype=np.float32)
|
||||
wav_array = np.int16(wav_array * 32767)
|
||||
self.wav_queue.put(wav_array)
|
||||
wav_data.append(wav_array)
|
||||
self.wav_queue.put(None) # Signal that generation is done
|
||||
|
||||
if file_name_or_path:
|
||||
self._save_wav(wav_data, file_name_or_path)
|
||||
|
||||
def _play_audio(self):
|
||||
buffered_sentences = 0
|
||||
buffer = []
|
||||
while not self.stop_event.is_set():
|
||||
wav = self.wav_queue.get()
|
||||
if wav is None:
|
||||
# Play any remaining buffered sentences
|
||||
for buffered_wav in buffer:
|
||||
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
|
||||
self.play_obj.wait_done()
|
||||
time.sleep(0.5) # Pause between sentences
|
||||
ASCIIColors.green("Audio done")
|
||||
break
|
||||
buffer.append(wav)
|
||||
buffered_sentences += 1
|
||||
if buffered_sentences >= 2:
|
||||
for buffered_wav in buffer:
|
||||
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
|
||||
self.play_obj.wait_done()
|
||||
time.sleep(0.5) # Pause between sentences
|
||||
buffer = []
|
||||
buffered_sentences = 0
|
||||
|
||||
def _save_wav(self, wav_data, file_name_or_path):
|
||||
with wave.open(str(file_name_or_path), 'wb') as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(22050)
|
||||
for wav in wav_data:
|
||||
wf.writeframes(wav.tobytes())
|
||||
|
||||
def stop(self):
|
||||
self.stop_event.set()
|
||||
if self.thread and self.thread.is_alive():
|
||||
self.thread.join()
|
||||
if self.play_obj:
|
||||
self.play_obj.stop()
|
||||
|
||||
def get_voices(self):
|
||||
# List voices from the folder
|
||||
ASCIIColors.yellow("Listing voices")
|
||||
voices=["main_voice"]
|
||||
voices_dir:Path=self.app.lollms_paths.custom_voices_path
|
||||
voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
|
||||
voices = []
|
||||
for voices_folder in self.voices_folders:
|
||||
voices += [v.stem for v in voices_folder.iterdir() if v.suffix == ".wav"]
|
||||
return voices
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Here do some example
|
||||
app = LollmsApplication()
|
||||
lollms_xtts_service = LollmsXTTS.get(app)
|
||||
lollms_xtts_service.tts_file("Hello, this is a test.", "output.wav", speaker="ParisNeo_Original_voice", language="en")
|
||||
|
@ -143,7 +143,17 @@ class LollmsTTS:
|
||||
list: A list of available voices.
|
||||
"""
|
||||
return self.voices
|
||||
|
||||
|
||||
def get_models(self):
|
||||
"""
|
||||
Retrieves the available models for TTS.
|
||||
|
||||
Returns:
|
||||
list: A list of available models.
|
||||
"""
|
||||
return self.models
|
||||
|
||||
|
||||
def get_devices(self):
|
||||
devices = sd.query_devices()
|
||||
|
||||
@ -152,7 +162,6 @@ class LollmsTTS:
|
||||
"device_names": [device['name'] for device in devices if device["max_output_channels"]>0],
|
||||
"device_indexes": [device['index'] for device in devices if device["max_output_channels"]>0]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def clean_text(text):
|
||||
# Remove HTML tags
|
||||
|
@ -78,8 +78,6 @@ auto_show_browser: true
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
|
Loading…
Reference in New Issue
Block a user