mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
Added elevel labs TTS
This commit is contained in:
parent
c68b687e7c
commit
390d0d49da
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 128
|
||||
version: 129
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
@ -161,6 +161,12 @@ openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
|
||||
elevenlabs_tts_key: ""
|
||||
elevenlabs_tts_model_id: "eleven_monolingual_v1"
|
||||
elevenlabs_tts_voice_stability: 0.5
|
||||
elevenlabs_tts_voice_boost: 0.5
|
||||
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
|
||||
# ***************** TTI *****************
|
||||
|
||||
use_negative_prompt: true
|
||||
|
@ -403,7 +403,11 @@ class LollmsApplication(LoLLMsCom):
|
||||
|
||||
ASCIIColors.blue("Activating TTS services")
|
||||
|
||||
if self.config.active_tts_service == "openai_tts":
|
||||
|
||||
if self.config.active_tts_service == "eleven_labs_tts":
|
||||
from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
|
||||
self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id, self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
|
||||
elif self.config.active_tts_service == "openai_tts":
|
||||
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||
elif self.config.active_tts_service == "xtts" and self.xtts:
|
||||
@ -517,7 +521,10 @@ class LollmsApplication(LoLLMsCom):
|
||||
self.tti = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
|
||||
|
||||
ASCIIColors.blue("Activating TTS service")
|
||||
if self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
|
||||
if self.config.active_tts_service == "eleven_labs_tts":
|
||||
from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
|
||||
self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id, self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
|
||||
elif self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
|
||||
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||
elif self.config.active_tts_service == "xtts" and self.xtts:
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 128
|
||||
version: 129
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
@ -161,6 +161,12 @@ openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
|
||||
elevenlabs_tts_key: ""
|
||||
elevenlabs_tts_model_id: "eleven_monolingual_v1"
|
||||
elevenlabs_tts_voice_stability: 0.5
|
||||
elevenlabs_tts_voice_boost: 0.5
|
||||
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
|
||||
# ***************** TTI *****************
|
||||
|
||||
use_negative_prompt: true
|
||||
|
132
lollms/services/eleven_labs_tts/lollms_eleven_labs_tts.py
Normal file
132
lollms/services/eleven_labs_tts/lollms_eleven_labs_tts.py
Normal file
@ -0,0 +1,132 @@
|
||||
# Title LollmsOpenAITTS
|
||||
# Licence: MIT
|
||||
# Author : Paris Neo
|
||||
# Uses open AI api to perform text to speech
|
||||
#
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
import requests
|
||||
import os
|
||||
import base64
|
||||
import subprocess
|
||||
import time
|
||||
import json
|
||||
import platform
|
||||
from dataclasses import dataclass
|
||||
from PIL import Image, PngImagePlugin
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.utilities import PackageManager, find_next_available_filename
|
||||
from lollms.tts import LollmsTTS
|
||||
import subprocess
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
import threading
|
||||
from io import BytesIO
|
||||
from openai import OpenAI
|
||||
|
||||
if not PackageManager.check_package_installed("sounddevice"):
|
||||
PackageManager.install_package("sounddevice")
|
||||
if not PackageManager.check_package_installed("soundfile"):
|
||||
PackageManager.install_package("soundfile")
|
||||
|
||||
import sounddevice as sd
|
||||
import soundfile as sf
|
||||
|
||||
def get_Whisper(lollms_paths:LollmsPaths):
|
||||
return LollmsElevenLabsTTS
|
||||
|
||||
class LollmsElevenLabsTTS(LollmsTTS):
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
model_id: str = "eleven_monolingual_v2",
|
||||
voice_id: str = "EXAVITQu4vr4xnSDxMaL",
|
||||
api_key: str = "",
|
||||
output_path: Path | str = None,
|
||||
stability: float = 0.5,
|
||||
similarity_boost: float = 0.5,
|
||||
streaming: bool = False
|
||||
):
|
||||
super().__init__("elevenlabs_tts", app, model_id, voice_id, api_key, output_path)
|
||||
self.voice_id = voice_id
|
||||
self.model_id = model_id
|
||||
self.api_key = api_key
|
||||
self.output_path = output_path
|
||||
self.stability = stability
|
||||
self.similarity_boost = similarity_boost
|
||||
self.streaming = streaming
|
||||
self.ready = True
|
||||
|
||||
def tts_file(self, text, speaker=None, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||
speech_file_path = file_name_or_path
|
||||
payload = {
|
||||
"text": text,
|
||||
"model_id": self.model_id,
|
||||
"voice_settings": {
|
||||
"stability": self.stability,
|
||||
"similarity_boost": self.similarity_boost
|
||||
}
|
||||
}
|
||||
headers = {
|
||||
"xi-api-key": self.api_key,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
if self.streaming:
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
# Handle streaming response if needed
|
||||
else:
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
with open(speech_file_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
return speech_file_path
|
||||
|
||||
def tts_audio(self, text, speaker: str = None, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||
speech_file_path = file_name_or_path
|
||||
payload = {
|
||||
"text": text,
|
||||
"model_id": self.model_id,
|
||||
"voice_settings": {
|
||||
"stability": self.stability,
|
||||
"similarity_boost": self.similarity_boost
|
||||
}
|
||||
}
|
||||
headers = {
|
||||
"xi-api-key": self.api_key,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
if self.streaming:
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
# Handle streaming response if needed
|
||||
else:
|
||||
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
with open(speech_file_path, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
def play_audio(file_path):
|
||||
# Read the audio file
|
||||
data, fs = sf.read(file_path, dtype='float32')
|
||||
# Play the audio file
|
||||
sd.play(data, fs)
|
||||
# Wait until the file is done playing
|
||||
sd.wait()
|
||||
|
||||
# Example usage
|
||||
play_audio(speech_file_path)
|
Loading…
Reference in New Issue
Block a user