mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-20 21:23:17 +00:00
Added elevel labs TTS
This commit is contained in:
parent
c68b687e7c
commit
390d0d49da
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||||
version: 128
|
version: 129
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
model_variant: null
|
model_variant: null
|
||||||
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
|
|||||||
|
|
||||||
# -------------------- Services global configurations --------------------------
|
# -------------------- Services global configurations --------------------------
|
||||||
# Select the active test to speach, text to image and speach to text services
|
# Select the active test to speach, text to image and speach to text services
|
||||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
|
||||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||||
active_ttm_service: "None" # musicgen (offline)
|
active_ttm_service: "None" # musicgen (offline)
|
||||||
@ -161,6 +161,12 @@ openai_tts_key: ""
|
|||||||
openai_tts_model: "tts-1"
|
openai_tts_model: "tts-1"
|
||||||
openai_tts_voice: "alloy"
|
openai_tts_voice: "alloy"
|
||||||
|
|
||||||
|
|
||||||
|
elevenlabs_tts_key: ""
|
||||||
|
elevenlabs_tts_model_id: "eleven_monolingual_v1"
|
||||||
|
elevenlabs_tts_voice_stability: 0.5
|
||||||
|
elevenlabs_tts_voice_boost: 0.5
|
||||||
|
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
|
||||||
# ***************** TTI *****************
|
# ***************** TTI *****************
|
||||||
|
|
||||||
use_negative_prompt: true
|
use_negative_prompt: true
|
||||||
|
@ -403,7 +403,11 @@ class LollmsApplication(LoLLMsCom):
|
|||||||
|
|
||||||
ASCIIColors.blue("Activating TTS services")
|
ASCIIColors.blue("Activating TTS services")
|
||||||
|
|
||||||
if self.config.active_tts_service == "openai_tts":
|
|
||||||
|
if self.config.active_tts_service == "eleven_labs_tts":
|
||||||
|
from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
|
||||||
|
self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id, self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
|
||||||
|
elif self.config.active_tts_service == "openai_tts":
|
||||||
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||||
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||||
elif self.config.active_tts_service == "xtts" and self.xtts:
|
elif self.config.active_tts_service == "xtts" and self.xtts:
|
||||||
@ -517,7 +521,10 @@ class LollmsApplication(LoLLMsCom):
|
|||||||
self.tti = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
|
self.tti = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
|
||||||
|
|
||||||
ASCIIColors.blue("Activating TTS service")
|
ASCIIColors.blue("Activating TTS service")
|
||||||
if self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
|
if self.config.active_tts_service == "eleven_labs_tts":
|
||||||
|
from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
|
||||||
|
self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id, self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
|
||||||
|
elif self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
|
||||||
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||||
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||||
elif self.config.active_tts_service == "xtts" and self.xtts:
|
elif self.config.active_tts_service == "xtts" and self.xtts:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||||
version: 128
|
version: 129
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
model_variant: null
|
model_variant: null
|
||||||
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
|
|||||||
|
|
||||||
# -------------------- Services global configurations --------------------------
|
# -------------------- Services global configurations --------------------------
|
||||||
# Select the active test to speach, text to image and speach to text services
|
# Select the active test to speach, text to image and speach to text services
|
||||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
|
||||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||||
active_ttm_service: "None" # musicgen (offline)
|
active_ttm_service: "None" # musicgen (offline)
|
||||||
@ -161,6 +161,12 @@ openai_tts_key: ""
|
|||||||
openai_tts_model: "tts-1"
|
openai_tts_model: "tts-1"
|
||||||
openai_tts_voice: "alloy"
|
openai_tts_voice: "alloy"
|
||||||
|
|
||||||
|
|
||||||
|
elevenlabs_tts_key: ""
|
||||||
|
elevenlabs_tts_model_id: "eleven_monolingual_v1"
|
||||||
|
elevenlabs_tts_voice_stability: 0.5
|
||||||
|
elevenlabs_tts_voice_boost: 0.5
|
||||||
|
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
|
||||||
# ***************** TTI *****************
|
# ***************** TTI *****************
|
||||||
|
|
||||||
use_negative_prompt: true
|
use_negative_prompt: true
|
||||||
|
132
lollms/services/eleven_labs_tts/lollms_eleven_labs_tts.py
Normal file
132
lollms/services/eleven_labs_tts/lollms_eleven_labs_tts.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
# Title LollmsOpenAITTS
|
||||||
|
# Licence: MIT
|
||||||
|
# Author : Paris Neo
|
||||||
|
# Uses open AI api to perform text to speech
|
||||||
|
#
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||||
|
import time
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import base64
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import platform
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from PIL import Image, PngImagePlugin
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.utilities import PackageManager, find_next_available_filename
|
||||||
|
from lollms.tts import LollmsTTS
|
||||||
|
import subprocess
|
||||||
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
import threading
|
||||||
|
from io import BytesIO
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
if not PackageManager.check_package_installed("sounddevice"):
|
||||||
|
PackageManager.install_package("sounddevice")
|
||||||
|
if not PackageManager.check_package_installed("soundfile"):
|
||||||
|
PackageManager.install_package("soundfile")
|
||||||
|
|
||||||
|
import sounddevice as sd
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
def get_Whisper(lollms_paths:LollmsPaths):
|
||||||
|
return LollmsElevenLabsTTS
|
||||||
|
|
||||||
|
class LollmsElevenLabsTTS(LollmsTTS):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app:LollmsApplication,
|
||||||
|
model_id: str = "eleven_monolingual_v2",
|
||||||
|
voice_id: str = "EXAVITQu4vr4xnSDxMaL",
|
||||||
|
api_key: str = "",
|
||||||
|
output_path: Path | str = None,
|
||||||
|
stability: float = 0.5,
|
||||||
|
similarity_boost: float = 0.5,
|
||||||
|
streaming: bool = False
|
||||||
|
):
|
||||||
|
super().__init__("elevenlabs_tts", app, model_id, voice_id, api_key, output_path)
|
||||||
|
self.voice_id = voice_id
|
||||||
|
self.model_id = model_id
|
||||||
|
self.api_key = api_key
|
||||||
|
self.output_path = output_path
|
||||||
|
self.stability = stability
|
||||||
|
self.similarity_boost = similarity_boost
|
||||||
|
self.streaming = streaming
|
||||||
|
self.ready = True
|
||||||
|
|
||||||
|
def tts_file(self, text, speaker=None, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||||
|
speech_file_path = file_name_or_path
|
||||||
|
payload = {
|
||||||
|
"text": text,
|
||||||
|
"model_id": self.model_id,
|
||||||
|
"voice_settings": {
|
||||||
|
"stability": self.stability,
|
||||||
|
"similarity_boost": self.similarity_boost
|
||||||
|
}
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"xi-api-key": self.api_key,
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.streaming:
|
||||||
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
|
||||||
|
response = requests.post(url, json=payload, headers=headers)
|
||||||
|
# Handle streaming response if needed
|
||||||
|
else:
|
||||||
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
|
||||||
|
response = requests.post(url, json=payload, headers=headers)
|
||||||
|
with open(speech_file_path, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
return speech_file_path
|
||||||
|
|
||||||
|
def tts_audio(self, text, speaker: str = None, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||||
|
speech_file_path = file_name_or_path
|
||||||
|
payload = {
|
||||||
|
"text": text,
|
||||||
|
"model_id": self.model_id,
|
||||||
|
"voice_settings": {
|
||||||
|
"stability": self.stability,
|
||||||
|
"similarity_boost": self.similarity_boost
|
||||||
|
}
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"xi-api-key": self.api_key,
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.streaming:
|
||||||
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
|
||||||
|
response = requests.post(url, json=payload, headers=headers)
|
||||||
|
# Handle streaming response if needed
|
||||||
|
else:
|
||||||
|
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
|
||||||
|
response = requests.post(url, json=payload, headers=headers)
|
||||||
|
with open(speech_file_path, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
def play_audio(file_path):
|
||||||
|
# Read the audio file
|
||||||
|
data, fs = sf.read(file_path, dtype='float32')
|
||||||
|
# Play the audio file
|
||||||
|
sd.play(data, fs)
|
||||||
|
# Wait until the file is done playing
|
||||||
|
sd.wait()
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
play_audio(speech_file_path)
|
Loading…
Reference in New Issue
Block a user