Added elevel labs TTS

2025-04-16 06:56:33 +00:00 · 2024-07-31 01:08:29 +02:00 · 2024-07-31 01:08:29 +02:00 · 390d0d49da
commit 390d0d49da
parent c68b687e7c
4 changed files with 157 additions and 6 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 128
+version: 129
 binding_name: null
 model_name: null
 model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -161,6 +161,12 @@ openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"

+
+elevenlabs_tts_key: ""
+elevenlabs_tts_model_id: "eleven_monolingual_v1"
+elevenlabs_tts_voice_stability: 0.5
+elevenlabs_tts_voice_boost: 0.5
+elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
 # ***************** TTI *****************

 use_negative_prompt: true
--- a/lollms/app.py
+++ b/lollms/app.py
@ -403,7 +403,11 @@ class LollmsApplication(LoLLMsCom):

        ASCIIColors.blue("Activating TTS services")

-        if self.config.active_tts_service == "openai_tts":
+
+        if self.config.active_tts_service == "eleven_labs_tts":
+            from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
+            self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id,  self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
+        elif self.config.active_tts_service == "openai_tts":
            from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
            self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice,  self.config.openai_tts_key)
        elif self.config.active_tts_service == "xtts" and self.xtts:
@ -517,7 +521,10 @@ class LollmsApplication(LoLLMsCom):
                    self.tti = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)

            ASCIIColors.blue("Activating TTS service")
-            if self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
+            if self.config.active_tts_service == "eleven_labs_tts":
+                from lollms.services.eleven_labs_tts.lollms_eleven_labs_tts import LollmsElevenLabsTTS
+                self.tts = LollmsElevenLabsTTS(self, self.config.elevenlabs_tts_model_id, self.config.elevenlabs_tts_voice_id,  self.config.elevenlabs_tts_key, stability=self.config.elevenlabs_tts_voice_stability, similarity_boost=self.config.elevenlabs_tts_voice_similarity_boost)
+            elif self.config.active_tts_service == "openai_tts" and (self.tts is None or self.tts.name!="openai_tts"):
                from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
                self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice,  self.config.openai_tts_key)
            elif self.config.active_tts_service == "xtts" and self.xtts:
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 128
+version: 129
 binding_name: null
 model_name: null
 model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -161,6 +161,12 @@ openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"

+
+elevenlabs_tts_key: ""
+elevenlabs_tts_model_id: "eleven_monolingual_v1"
+elevenlabs_tts_voice_stability: 0.5
+elevenlabs_tts_voice_boost: 0.5
+elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
 # ***************** TTI *****************

 use_negative_prompt: true
--- a/lollms/services/eleven_labs_tts/lollms_eleven_labs_tts.py
+++ b/lollms/services/eleven_labs_tts/lollms_eleven_labs_tts.py
@ -0,0 +1,132 @@
+# Title LollmsOpenAITTS
+# Licence: MIT
+# Author : Paris Neo
+# Uses open AI api to perform text to speech
+# 
+
+from pathlib import Path
+import sys
+from lollms.app import LollmsApplication
+from lollms.paths import LollmsPaths
+from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
+import time
+import io
+import sys
+import requests
+import os
+import base64
+import subprocess
+import time
+import json
+import platform
+from dataclasses import dataclass
+from PIL import Image, PngImagePlugin
+from enum import Enum
+from typing import List, Dict, Any
+
+from ascii_colors import ASCIIColors, trace_exception
+from lollms.paths import LollmsPaths
+from lollms.utilities import PackageManager, find_next_available_filename
+from lollms.tts import LollmsTTS
+import subprocess
+import shutil
+from tqdm import tqdm
+import threading
+from io import BytesIO
+from openai import OpenAI
+
+if not PackageManager.check_package_installed("sounddevice"):
+    PackageManager.install_package("sounddevice")
+if not PackageManager.check_package_installed("soundfile"):
+    PackageManager.install_package("soundfile")
+
+import sounddevice as sd
+import soundfile as sf
+
+def get_Whisper(lollms_paths:LollmsPaths):
+    return LollmsElevenLabsTTS
+
+class LollmsElevenLabsTTS(LollmsTTS):
+    def __init__(
+                    self, 
+                    app:LollmsApplication,
+                    model_id: str = "eleven_monolingual_v2",
+                    voice_id: str = "EXAVITQu4vr4xnSDxMaL",
+                    api_key: str = "",
+                    output_path: Path | str = None,
+                    stability: float = 0.5,
+                    similarity_boost: float = 0.5,
+                    streaming: bool = False
+                    ):
+        super().__init__("elevenlabs_tts", app, model_id, voice_id, api_key, output_path)
+        self.voice_id = voice_id
+        self.model_id = model_id
+        self.api_key = api_key
+        self.output_path = output_path
+        self.stability = stability
+        self.similarity_boost = similarity_boost
+        self.streaming = streaming
+        self.ready = True
+
+    def tts_file(self, text, speaker=None, file_name_or_path: Path | str = None, language="en", use_threading=False):
+        speech_file_path = file_name_or_path
+        payload = {
+            "text": text,
+            "model_id": self.model_id,
+            "voice_settings": {
+                "stability": self.stability,
+                "similarity_boost": self.similarity_boost
+            }
+        }
+        headers = {
+            "xi-api-key": self.api_key,
+            "Content-Type": "application/json"
+        }
+        
+        if self.streaming:
+            url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
+            response = requests.post(url, json=payload, headers=headers)
+            # Handle streaming response if needed
+        else:
+            url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
+            response = requests.post(url, json=payload, headers=headers)
+            with open(speech_file_path, 'wb') as f:
+                f.write(response.content)
+
+        return speech_file_path
+
+    def tts_audio(self, text, speaker: str = None, file_name_or_path: Path | str = None, language="en", use_threading=False):
+        speech_file_path = file_name_or_path
+        payload = {
+            "text": text,
+            "model_id": self.model_id,
+            "voice_settings": {
+                "stability": self.stability,
+                "similarity_boost": self.similarity_boost
+            }
+        }
+        headers = {
+            "xi-api-key": self.api_key,
+            "Content-Type": "application/json"
+        }
+
+        if self.streaming:
+            url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}/stream"
+            response = requests.post(url, json=payload, headers=headers)
+            # Handle streaming response if needed
+        else:
+            url = f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}"
+            response = requests.post(url, json=payload, headers=headers)
+            with open(speech_file_path, 'wb') as f:
+                f.write(response.content)
+
+        def play_audio(file_path):
+            # Read the audio file
+            data, fs = sf.read(file_path, dtype='float32')
+            # Play the audio file
+            sd.play(data, fs)
+            # Wait until the file is done playing
+            sd.wait()
+
+        # Example usage
+        play_audio(speech_file_path)