Added elevel labs TTS

This commit is contained in:
Saifeddine ALOUI 2024-07-31 01:08:29 +02:00
parent c68b687e7c
commit 390d0d49da
4 changed files with 157 additions and 6 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 128
version: 129
binding_name: null
model_name: null
model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tti_service: "None" # autosd (offline), dall-e (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -161,6 +161,12 @@ openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v1"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
# ***************** TTI *****************
use_negative_prompt: true

View File

@ -403,7 +403,11 @@ class LollmsApplication(LoLLMsCom):
ASCIIColors.blue("Activating TTS services")
if self.config.active_tts_service == "openai_tts":
if self.config.active_tts_service == "eleven_labs_tts":
from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id, self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
elif self.config.active_tts_service == "openai_tts":
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
elif self.config.active_tts_service == "xtts" and self.xtts:
@ -517,7 +521,10 @@ class LollmsApplication(LoLLMsCom):
self.tti = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
ASCIIColors.blue("Activating TTS service")
if self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
if self.config.active_tts_service == "eleven_labs_tts":
from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id, self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
elif self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
elif self.config.active_tts_service == "xtts" and self.xtts:

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 128
version: 129
binding_name: null
model_name: null
model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tti_service: "None" # autosd (offline), dall-e (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -161,6 +161,12 @@ openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v1"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
# ***************** TTI *****************
use_negative_prompt: true

View File

@ -0,0 +1,132 @@
# Title LollmsOpenAITTS
# Licence: MIT
# Author : Paris Neo
# Uses open AI api to perform text to speech
#
from pathlib import Path
import sys
from lollms.app import LollmsApplication
from lollms.paths import LollmsPaths
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
import time
import io
import sys
import requests
import os
import base64
import subprocess
import time
import json
import platform
from dataclasses import dataclass
from PIL import Image, PngImagePlugin
from enum import Enum
from typing import List, Dict, Any
from ascii_colors import ASCIIColors, trace_exception
from lollms.paths import LollmsPaths
from lollms.utilities import PackageManager, find_next_available_filename
from lollms.tts import LollmsTTS
import subprocess
import shutil
from tqdm import tqdm
import threading
from io import BytesIO
from openai import OpenAI
if not PackageManager.check_package_installed("sounddevice"):
PackageManager.install_package("sounddevice")
if not PackageManager.check_package_installed("soundfile"):
PackageManager.install_package("soundfile")
import sounddevice as sd
import soundfile as sf
def get_Whisper(lollms_paths:LollmsPaths):
return LollmsElevenLabsTTS
class LollmsElevenLabsTTS(LollmsTTS):
def __init__(
self,
app:LollmsApplication,
model_id: str = "eleven_monolingual_v2",
voice_id: str = "EXAVITQu4vr4xnSDxMaL",
api_key: str = "",
output_path: Path | str = None,
stability: float = 0.5,
similarity_boost: float = 0.5,
streaming: bool = False
):
super().__init__("elevenlabs_tts", app, model_id, voice_id, api_key, output_path)
self.voice_id = voice_id
self.model_id = model_id
self.api_key = api_key
self.output_path = output_path
self.stability = stability
self.similarity_boost = similarity_boost
self.streaming = streaming
self.ready = True
def tts_file(self, text, speaker=None, file_name_or_path: Path | str = None, language="en", use_threading=False):
speech_file_path = file_name_or_path
payload = {
"text": text,
"model_id": self.model_id,
"voice_settings": {
"stability": self.stability,
"similarity_boost": self.similarity_boost
}
}
headers = {
"xi-api-key": self.api_key,
"Content-Type": "application/json"
}
if self.streaming:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
response = requests.post(url, json=payload, headers=headers)
# Handle streaming response if needed
else:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
response = requests.post(url, json=payload, headers=headers)
with open(speech_file_path, 'wb') as f:
f.write(response.content)
return speech_file_path
def tts_audio(self, text, speaker: str = None, file_name_or_path: Path | str = None, language="en", use_threading=False):
speech_file_path = file_name_or_path
payload = {
"text": text,
"model_id": self.model_id,
"voice_settings": {
"stability": self.stability,
"similarity_boost": self.similarity_boost
}
}
headers = {
"xi-api-key": self.api_key,
"Content-Type": "application/json"
}
if self.streaming:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
response = requests.post(url, json=payload, headers=headers)
# Handle streaming response if needed
else:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
response = requests.post(url, json=payload, headers=headers)
with open(speech_file_path, 'wb') as f:
f.write(response.content)
def play_audio(file_path):
# Read the audio file
data, fs = sf.read(file_path, dtype='float32')
# Play the audio file
sd.play(data, fs)
# Wait until the file is done playing
sd.wait()
# Example usage
play_audio(speech_file_path)