upgraded

2024-12-18 20:27:58 +00:00 · 2024-05-18 20:56:05 +02:00 · 2024-05-18 20:56:05 +02:00 · 397d21a3be
commit 397d21a3be
parent 42ebabfe0d
16 changed files with 1065 additions and 179 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 96
+version: 98
 binding_name: null
 model_name: null
 model_variant: null
@ -80,10 +80,30 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false
 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
 active_tts_service: "None" # xtts (offline), openai_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 # -------------------- Services --------------------------
 # ***************** STT *****************
 # STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000
 # openai_whisper configuration
 openai_whisper_key: ""
 openai_whisper_model: "whisper-1"
 # whisper configuration
 whisper_activate: false
 whisper_model: base
 # ***************** TTS *****************
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
@ -101,6 +121,13 @@ xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"
 # ***************** TTI *****************
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
 dall_e_key: ""
 dall_e_generation_engine: "dall-e-3"
 # Midjourney service key
 midjourney_key: ""
 # Image generation service comfyui
 enable_comfyui_service: false
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861
 # ***************** TTT *****************
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -202,6 +233,3 @@ show_code_of_conduct: true
 activate_audio_infos: true
 # whisper configuration
 whisper_activate: false
 whisper_model: base
--- a/lollms/app.py
+++ b/lollms/app.py
@ -221,7 +221,21 @@ class LollmsApplication(LoLLMsCom):
    def get_uploads_path(self, client_id):
        return self.lollms_paths.personal_uploads_path
-    def start_servers(  self ):
+    def start_servers(self):
        self.ollama = None
        self.vllm = None
        self.whisper = None
        self.xtts = None
        self.sd = None
        self.comfyui = None
        self.motion_ctrl = None
        self.tti = None
        self.tts = None
        self.stt = None
        if self.config.enable_ollama_service:
            try:
                from lollms.services.ollama.lollms_ollama import Service
@ -240,13 +254,11 @@ class LollmsApplication(LoLLMsCom):
        if self.config.whisper_activate:
            try:
-                from lollms.media import AudioRecorder
+                from lollms.services.whisper.lollms_whisper import LollmsWhisper
-                self.rec = AudioRecorder(self.lollms_paths.personal_outputs_path/"test.wav")
+                self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
-                self.rec.start_recording()
+            except Exception as ex:
-                time.sleep(1)
+                trace_exception(ex)
-                self.rec.stop_recording()
+
            except:
                pass
        if self.config.xtts_enable:
            try:
                from lollms.services.xtts.lollms_xtts import LollmsXTTS
@ -256,7 +268,7 @@ class LollmsApplication(LoLLMsCom):
                else:
                    voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
-                self.tts = LollmsXTTS(
+                self.xtts = LollmsXTTS(
                                        self,
                                        voices_folder=voices_folder,
                                        voice_samples_path=self.lollms_paths.custom_voices_path, 
@ -291,6 +303,126 @@ class LollmsApplication(LoLLMsCom):
                self.warning(f"Couldn't load Motion control")
        if self.config.active_tti_service == "autosd":
            from lollms.services.sd.lollms_sd import LollmsSD
            self.tti = LollmsSD(self)
        elif self.config.active_tti_service == "dall-e":
            from lollms.services.dalle.lollms_dalle import LollmsDalle
            self.tti = LollmsDalle(self, self.config.dall_e_key)
        elif self.config.active_tti_service == "midjourney":
            from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
            self.tti = LollmsMidjourney(self, self.config.midjourney_key)
        if self.config.active_tts_service == "openai_tts":
            from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
            self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice,  self.config.openai_tts_key)
        elif self.config.active_tts_service == "xtts" and self.xtts:
            self.tts = self.xtts
        if self.config.active_stt_service == "openai_whisper":
            from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
            self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
        elif self.config.active_stt_service == "whisper":
            from lollms.services.whisper.lollms_whisper import LollmsWhisper
            self.stt = LollmsWhisper(self, self.config.whisper_model)
    def verify_servers(self, reload_all=False):
        try:
            if self.config.enable_ollama_service and self.ollama is None:
                try:
                    from lollms.services.ollama.lollms_ollama import Service
                    self.ollama = Service(self, base_url=self.config.ollama_base_url)
                except Exception as ex:
                    trace_exception(ex)
                    self.warning(f"Couldn't load Ollama")
            if self.config.enable_vllm_service and self.vllm is None:
                try:
                    from lollms.services.vllm.lollms_vllm import Service
                    self.vllm = Service(self, base_url=self.config.vllm_url)
                except Exception as ex:
                    trace_exception(ex)
                    self.warning(f"Couldn't load vllm")
            if self.config.whisper_activate and self.whisper is None:
                try:
                    from lollms.services.whisper.lollms_whisper import LollmsWhisper
                    self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
                except Exception as ex:
                    trace_exception(ex)
            if self.config.xtts_enable and self.xtts is None:
                try:
                    from lollms.services.xtts.lollms_xtts import LollmsXTTS
                    voice=self.config.xtts_current_voice
                    if voice!="main_voice":
                        voices_folder = self.lollms_paths.custom_voices_path
                    else:
                        voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
                    self.xtts = LollmsXTTS(
                                            self,
                                            voices_folder=voices_folder,
                                            voice_samples_path=self.lollms_paths.custom_voices_path, 
                                            xtts_base_url=self.config.xtts_base_url,
                                            wait_for_service=False,
                                            use_deep_speed=self.config.xtts_use_deepspeed,
                                            use_streaming_mode=self.config.xtts_use_streaming_mode
                                        )
                except:
                    self.warning(f"Couldn't load XTTS")
            if self.config.enable_sd_service and self.sd is None:
                try:
                    from lollms.services.sd.lollms_sd import LollmsSD
                    self.sd = LollmsSD(self, auto_sd_base_url=self.config.sd_base_url)
                except:
                    self.warning(f"Couldn't load SD")
            if self.config.enable_comfyui_service and self.comfyui is None:
                try:
                    from lollms.services.comfyui.lollms_comfyui import LollmsComfyUI
                    self.comfyui = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
                except:
                    self.warning(f"Couldn't load SD")
            if self.config.enable_motion_ctrl_service and self.motion_ctrl is None:
                try:
                    from lollms.services.motion_ctrl.lollms_motion_ctrl import Service
                    self.motion_ctrl = Service(self, base_url=self.config.motion_ctrl_base_url)
                except Exception as ex:
                    trace_exception(ex)
                    self.warning(f"Couldn't load Motion control")
            if self.config.active_tti_service == "autosd":
                from lollms.services.sd.lollms_sd import LollmsSD
                self.tti = LollmsSD(self)
            elif self.config.active_tti_service == "dall-e":
                from lollms.services.dalle.lollms_dalle import LollmsDalle
                self.tti = LollmsDalle(self, self.config.dall_e_key)
            elif self.config.active_tti_service == "midjourney":
                from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
                self.tti = LollmsMidjourney(self, self.config.midjourney_key)
            if self.config.active_tts_service == "openai_tts":
                from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
                self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice,  self.config.openai_tts_key)
            elif self.config.active_stt_service == "xtts" and self.xtts:
                self.tts = self.xtts
            if self.config.active_stt_service == "openai_whisper":
                from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
                self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
            elif self.config.active_stt_service == "whisper":
                from lollms.services.whisper.lollms_whisper import LollmsWhisper
                self.stt = LollmsWhisper(self, self.config.whisper_model)
        except Exception as ex:
            trace_exception(ex)
    def build_long_term_skills_memory(self):
        discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
        discussion_db_name.mkdir(exist_ok=True, parents=True)
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 96
+version: 98
 binding_name: null
 model_name: null
 model_variant: null
@ -80,10 +80,30 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false
 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
 active_tts_service: "None" # xtts (offline), openai_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 # -------------------- Services --------------------------
 # ***************** STT *****************
 # STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000
 # openai_whisper configuration
 openai_whisper_key: ""
 openai_whisper_model: "whisper-1"
 # whisper configuration
 whisper_activate: false
 whisper_model: base
 # ***************** TTS *****************
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
@ -101,6 +121,13 @@ xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"
 # ***************** TTI *****************
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
 dall_e_key: ""
 dall_e_generation_engine: "dall-e-3"
 # Midjourney service key
 midjourney_key: ""
 # Image generation service comfyui
 enable_comfyui_service: false
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861
 # ***************** TTT *****************
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -202,6 +233,3 @@ show_code_of_conduct: true
 activate_audio_infos: true
 # whisper configuration
 whisper_activate: false
 whisper_model: base
--- a/lollms/server/endpoints/lollms_configuration_infos.py
+++ b/lollms/server/endpoints/lollms_configuration_infos.py
@ -186,6 +186,7 @@ async def apply_settings(request: Request):
                lollmsElfServer.config.config[key] = config.get(key, lollmsElfServer.config.config[key])
            ASCIIColors.success("OK")
            lollmsElfServer.rebuild_personalities()
            lollmsElfServer.verify_servers()
            if lollmsElfServer.config.auto_save:
                lollmsElfServer.config.save_config()
            return {"status":True}
--- a/lollms/server/endpoints/lollms_sd.py
+++ b/lollms/server/endpoints/lollms_sd.py
@ -92,8 +92,8 @@ def start_sd(data: Identification):
            return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
        lollmsElfServer.ShowBlockingMessage("Starting SD api server\nPlease stand by")
-        from lollms.services.sd.lollms_sd import get_sd
+        from lollms.services.sd.lollms_sd import LollmsSD
-        lollmsElfServer.sd = get_sd(lollmsElfServer.lollms_paths)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
+        lollmsElfServer.sd = LollmsSD.get(lollmsElfServer)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
        ASCIIColors.success("Done")
        lollmsElfServer.HideBlockingMessage()
        return {"status":True}
--- a/lollms/server/endpoints/lollms_xtts.py
+++ b/lollms/server/endpoints/lollms_xtts.py
@ -8,6 +8,7 @@ description:
 """
 from fastapi import APIRouter, Request, UploadFile, File, HTTPException
 from fastapi.responses import PlainTextResponse
 from lollms_webui import LOLLMSWebUI
 from pydantic import BaseModel
 from starlette.responses import StreamingResponse
@ -39,10 +40,7 @@ def list_voices():
        return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
    ASCIIColors.yellow("Listing voices")
-    voices=["main_voice"]
+    return {"voices":lollmsElfServer.tts.get_voices()}
    voices_dir:Path=lollmsElfServer.lollms_paths.custom_voices_path
    voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
    return {"voices":voices}
@router.post("/set_voice")
 async def set_voice(request: Request):
@ -70,6 +68,24 @@ async def set_voice(request: Request):
        return {"status":False,"error":str(ex)}
 class LollmsAudio2TextRequest(BaseModel):
    wave_file_path: str
    voice: str = None
    fn:str = None
@router.post("/audio2text")
 async def audio2text(request: LollmsAudio2TextRequest):
    if lollmsElfServer.config.headless_server_mode:
        return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
    if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
        return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
    result = lollmsElfServer.whisper.transcribe(str(request.wave_file_path))
    return PlainTextResponse(result)
 class LollmsText2AudioRequest(BaseModel):
    text: str
    voice: str = None
@ -94,67 +110,13 @@ async def text2Audio(request: LollmsText2AudioRequest):
        validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
    try:
-        # Get the JSON data from the POST request.
+        if lollmsElfServer.tts is None:
-        try:
+            return {"url": None, "error":f"No TTS service is on"}
-            from lollms.services.xtts.lollms_xtts import LollmsXTTS
+        if lollmsElfServer.tts.ready:
-            voice=lollmsElfServer.config.xtts_current_voice
+            response = lollmsElfServer.tts.tts_to_audio(request.text, request.voice, file_name_or_path=request.fn)
-            if lollmsElfServer.tts is None:
+            return response
-                voice=lollmsElfServer.config.xtts_current_voice
+        else:
-                if voice!="main_voice":
+            return {"url": None, "error":f"TTS service is not ready yet"}
                    voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
                else:
                    voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
                lollmsElfServer.tts = LollmsXTTS(
                    lollmsElfServer, 
                    voices_folder=voices_folder,
                    voice_samples_path=Path(__file__).parent/"voices", 
                    xtts_base_url= lollmsElfServer.config.xtts_base_url,
                    use_deep_speed= lollmsElfServer.config.xtts_use_deep_speed,
                    use_streaming_mode= lollmsElfServer.config.xtts_use_streaming_mode,
                )
        except Exception as ex:
            return {"url": None, "error":f"{ex}"}
        voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
        index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
        output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
        if voice is None:
            voice = "main_voice"
        lollmsElfServer.info("Starting to build voice")
        try:
            from lollms.services.xtts.lollms_xtts import LollmsXTTS
            # If the personality has a voice, then use it
            personality_audio:Path = lollmsElfServer.personality.personality_package_path/"audio"
            if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
                voices_folder = personality_audio
            elif voice!="main_voice":
                voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
            else:
                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
            if lollmsElfServer.tts is None:
                lollmsElfServer.tts = LollmsXTTS(
                                                    lollmsElfServer, 
                                                    voices_folder=voices_folder,
                                                    voice_samples_path=Path(__file__).parent/"voices", 
                                                    xtts_base_url= lollmsElfServer.config.xtts_base_url,
                                                    use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
                                                    use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode                                                    
                                                )
            if lollmsElfServer.tts.ready:
                language = lollmsElfServer.config.xtts_current_language# convert_language_name()
                lollmsElfServer.tts.set_speaker_folder(voices_folder)
                preprocessed_text= add_period(request.text)
                voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
                if len(voice_file)==0:
                    return {"status":False,"error":"Voice not found"}
                lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
            else:
                lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.") 
                return  {"status":False, "error":"Service not ready yet"} 
        except Exception as ex:
            trace_exception(ex)
            return {"url": None}
    except Exception as ex:
        trace_exception(ex)
        lollmsElfServer.error(ex)
@ -255,9 +217,9 @@ def install_xtts(data:Identification):
        if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
            return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
-        from lollms.services.xtts.lollms_xtts import install_xtts
+        from lollms.services.xtts.lollms_xtts import LollmsTTS
        lollmsElfServer.ShowBlockingMessage("Installing xTTS api server\nPlease stand by")
-        install_xtts(lollmsElfServer)
+        LollmsTTS.install(lollmsElfServer)
        lollmsElfServer.HideBlockingMessage()
        return {"status":True}
    except Exception as ex:
--- a/lollms/services/dalle/lollms_dalle.py
+++ b/lollms/services/dalle/lollms_dalle.py
@ -1,10 +1,7 @@
 # Title LollmsDalle
-# Licence: MIT
+# Licence: Apache 2.0
 # Author : Paris Neo
-# Adapted from the work of mix1009's sdwebuiapi
+
 # check it out : https://github.com/mix1009/sdwebuiapi/tree/main
 # Here is a copy of the LICENCE https://github.com/mix1009/sdwebuiapi/blob/main/LICENSE
 # All rights are reserved
 from pathlib import Path
 import sys
@ -29,17 +26,16 @@ from typing import List, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import PackageManager, find_next_available_filename
 from lollms.tti import LollmsTTI
 import subprocess
 import shutil
 from tqdm import tqdm
 import threading
 from io import BytesIO
 def get_Dalli(lollms_paths:LollmsPaths):
    return LollmsDalle
-class LollmsDalle:
+
-    has_controlnet = False
+class LollmsDalle(LollmsTTI):
    def __init__(
                    self, 
                    app:LollmsApplication, 
@ -47,7 +43,7 @@ class LollmsDalle:
                    generation_engine="dall-e-3",# other possibility "dall-e-2"
                    output_path=None
                    ):
-        self.app = app
+        super().__init__(app)
        self.key = key 
        self.generation_engine = generation_engine
        self.output_path = output_path
@ -141,3 +137,6 @@ class LollmsDalle:
            ASCIIColors.red("Failed to download the image")
        return file_name
    @staticmethod
    def get(app:LollmsApplication):
        return LollmsDalle
--- a/lollms/services/midjourney/lollms_midjourney.py
+++ b/lollms/services/midjourney/lollms_midjourney.py
@ -0,0 +1,142 @@
 # Title LollmsMidjourney
 # Licence: Apache 2.0
 # Author : Paris Neo
 from pathlib import Path
 import sys
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
 from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
 import time
 import io
 import sys
 import requests
 import os
 import base64
 import subprocess
 import time
 import json
 import platform
 from dataclasses import dataclass
 from PIL import Image, PngImagePlugin
 from enum import Enum
 from typing import List, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import PackageManager, find_next_available_filename
 from lollms.tti import LollmsTTI
 import subprocess
 import shutil
 from tqdm import tqdm
 import threading
 from io import BytesIO
 class LollmsMidjourney(LollmsTTI):
    def __init__(
                    self, 
                    app:LollmsApplication, 
                    key="",
                    generation_engine="dall-e-3",# other possibility "dall-e-2"
                    output_path=None
                    ):
        super().__init__(app)
        self.key = key 
        self.generation_engine = generation_engine
        self.output_path = output_path
    def paint(
                self,
                prompt,
                width=512,
                height=512,
                images = [],
                generation_engine=None,
                output_path = None
                ):
        if output_path is None:
            output_path = self.output_path
        if generation_engine is None:
            generation_engine = self.generation_engine
        if not PackageManager.check_package_installed("openai"):
            PackageManager.install_package("openai")
        import openai
        openai.api_key = self.key
        if generation_engine=="dall-e-2":
            supported_resolutions = [
                [512, 512],
                [1024, 1024],
            ]
            # Find the closest resolution
            closest_resolution = min(supported_resolutions, key=lambda res: abs(res[0] - width) + abs(res[1] - height))
        else:
            supported_resolutions = [
                [1024, 1024],
                [1024, 1792],
                [1792, 1024]
            ]
            # Find the closest resolution
            if width>height:
                closest_resolution = [1792, 1024]
            elif width<height: 
                closest_resolution = [1024, 1792]
            else:
                closest_resolution = [1024, 1024]
        # Update the width and height
        width = closest_resolution[0]
        height = closest_resolution[1]                    
        if len(images)>0 and generation_engine=="dall-e-2":
            # Read the image file from disk and resize it
            image = Image.open(self.personality.image_files[0])
            width, height = width, height
            image = image.resize((width, height))
            # Convert the image to a BytesIO object
            byte_stream = BytesIO()
            image.save(byte_stream, format='PNG')
            byte_array = byte_stream.getvalue()
            response = openai.images.create_variation(
                image=byte_array,
                n=1,
                model=generation_engine, # for now only dalle 2 supports variations
                size=f"{width}x{height}"
            )
        else:
            response = openai.images.generate(
                model=generation_engine,
                prompt=prompt.strip(),
                quality="standard",
                size=f"{width}x{height}",
                n=1,
                )
        # download image to outputs
        output_dir = Path(output_path)
        output_dir.mkdir(parents=True, exist_ok=True)
        image_url = response.data[0].url
        # Get the image data from the URL
        response = requests.get(image_url)
        if response.status_code == 200:
            # Generate the full path for the image file
            file_name = output_dir/find_next_available_filename(output_dir, "img_dalle_")  # You can change the filename if needed
            # Save the image to the specified folder
            with open(file_name, "wb") as file:
                file.write(response.content)
            ASCIIColors.yellow(f"Image saved to {file_name}")
        else:
            ASCIIColors.red("Failed to download the image")
        return file_name
    @staticmethod
    def get(app:LollmsApplication):
        return LollmsMidjourney
--- a/lollms/services/open_ai_tts/lollms_openai_tts.py
+++ b/lollms/services/open_ai_tts/lollms_openai_tts.py
@ -0,0 +1,108 @@
 # Title LollmsOpenAITTS
 # Licence: MIT
 # Author : Paris Neo
 # Uses open AI api to perform text to speech
 # 
 from pathlib import Path
 import sys
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
 from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
 import time
 import io
 import sys
 import requests
 import os
 import base64
 import subprocess
 import time
 import json
 import platform
 from dataclasses import dataclass
 from PIL import Image, PngImagePlugin
 from enum import Enum
 from typing import List, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import PackageManager, find_next_available_filename
 from lollms.tts import LollmsTTS
 import subprocess
 import shutil
 from tqdm import tqdm
 import threading
 from io import BytesIO
 from openai import OpenAI
 if not PackageManager.check_package_installed("sounddevice"):
    PackageManager.install_package("sounddevice")
 if not PackageManager.check_package_installed("soundfile"):
    PackageManager.install_package("soundfile")
 import sounddevice as sd
 import soundfile as sf
 def get_Whisper(lollms_paths:LollmsPaths):
    return LollmsOpenAITTS
 class LollmsOpenAITTS(LollmsTTS):
    def __init__(
                    self, 
                    app:LollmsApplication,
                    model ="tts-1",
                    voice="alloy",
                    api_key="",
                    output_path=None
                    ):
        super().__init__(app, model, voice, api_key, output_path)
        self.client = OpenAI(api_key=api_key)
        self.voices = [
         "alloy",
         "echo",
         "fable",
         "nova",
         "shimmer"     
        ]
        self.models = [
         "tts-1"   
        ]
        self.voice = voice
        self.output_path = output_path
        self.ready = True
    def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
        speech_file_path = file_name_or_path
        response = self.client.audio.speech.create(
        model=self.model,
        voice=self.voice,
        input=text,
        response_format="wav"
        )
        response.write_to_file(speech_file_path)
    def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
        speech_file_path = file_name_or_path
        response = self.client.audio.speech.create(
        model=self.model,
        voice=self.voice,
        input=text,
        response_format="wav"
        )
        response.write_to_file(speech_file_path)
        def play_audio(file_path):
            # Read the audio file
            data, fs = sf.read(file_path, dtype='float32')
            # Play the audio file
            sd.play(data, fs)
            # Wait until the file is done playing
            sd.wait()
        # Example usage
        play_audio(speech_file_path)
--- a/lollms/services/openai_whisper/lollms_whisper.py
+++ b/lollms/services/openai_whisper/lollms_whisper.py
@ -0,0 +1,70 @@
 # Title LollmsOpenAIWhisper
 # Licence: MIT
 # Author : Paris Neo
 # 
 from pathlib import Path
 import sys
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
 from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
 import time
 import io
 import sys
 import requests
 import os
 import base64
 import subprocess
 import time
 import json
 import platform
 from dataclasses import dataclass
 from PIL import Image, PngImagePlugin
 from enum import Enum
 from typing import List, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import PackageManager, find_next_available_filename
 import subprocess
 import shutil
 from tqdm import tqdm
 import threading
 from io import BytesIO
 from openai import OpenAI
 def get_Whisper(lollms_paths:LollmsPaths):
    return LollmsOpenAIWhisper
 class LollmsOpenAIWhisper:
    def __init__(
                    self, 
                    app:LollmsApplication, 
                    model="whisper-1",
                    api_key="",
                    output_path=None
                    ):
        self.client = OpenAI(api_key=api_key)
        self.app = app
        self.model = model
        self.output_path = output_path
        self.ready = True
    def transcribe(
                self,
                wav_path: str|Path,
                model:str="",
                output_path:str|Path=None
                ):
        if model=="" or model is None:
            model = self.model
        if output_path is None:
            output_path = self.output_path
        audio_file= open(str(wav_path), "rb")
        transcription = self.client.audio.transcriptions.create(
            model=model, 
            file=audio_file,
            response_format="text"
        )        
        return transcription
--- a/lollms/services/sd/lollms_sd.py
+++ b/lollms/services/sd/lollms_sd.py
@ -28,18 +28,14 @@ from typing import List, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.tti import LollmsTTI
 from lollms.utilities import git_pull, show_yes_no_dialog, run_script_in_env, create_conda_env
 import subprocess
 import shutil
 from tqdm import tqdm
 import threading
-def verify_sd(lollms_paths:LollmsPaths):
+
    # Clone repository
    root_dir = lollms_paths.personal_path
    shared_folder = root_dir/"shared"
    sd_folder = shared_folder / "auto_sd"
    return sd_folder.exists()
 def download_file(url, folder_path, local_filename):
    # Make sure 'folder_path' exists
@ -137,20 +133,6 @@ def upgrade_sd(lollms_app:LollmsApplication):
    ASCIIColors.success("DONE")
 def get_sd(lollms_paths:LollmsPaths):
    root_dir = lollms_paths.personal_path
    shared_folder = root_dir/"shared"
    sd_folder = shared_folder / "auto_sd"
    sd_script_path = sd_folder / "lollms_sd.py"
    git_pull(sd_folder)
    if sd_script_path.exists():
        ASCIIColors.success("lollms_sd found.")
        ASCIIColors.success("Loading source file...",end="")
        # use importlib to load the module from the file path
        from lollms.services.sd.lollms_sd import LollmsSD
        ASCIIColors.success("ok")
        return LollmsSD
 def raw_b64_img(image: Image) -> str:
@ -274,7 +256,7 @@ class ControlNetUnit:
            "pixel_perfect": self.pixel_perfect,
        }
-class LollmsSD:
+class LollmsSD(LollmsTTI):
    has_controlnet = False
    def __init__(
                    self, 
@ -290,19 +272,19 @@ class LollmsSD:
                    share=False,
                    wait_for_service=True
                    ):
        super().__init__(app)
        if auto_sd_base_url=="" or auto_sd_base_url=="http://127.0.0.1:7860":
            auto_sd_base_url = None
        self.ready = False
        # Get the current directory
        lollms_paths = app.lollms_paths
        self.app = app
        root_dir = lollms_paths.personal_path
        self.wm = wm
        # Store the path to the script
        if auto_sd_base_url is None:
            self.auto_sd_base_url = "http://127.0.0.1:7860"
-            if not verify_sd(lollms_paths):
+            if not LollmsSD.verify(app):
                install_sd(app.lollms_paths)
        else:
            self.auto_sd_base_url = auto_sd_base_url
@ -364,6 +346,30 @@ class LollmsSD:
        else:
            self.check_controlnet()
    @staticmethod
    def verify(app:LollmsApplication):
        # Clone repository
        root_dir = app.lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        sd_folder = shared_folder / "auto_sd"
        return sd_folder.exists()
    def get(app:LollmsApplication):
        root_dir = app.lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        sd_folder = shared_folder / "auto_sd"
        sd_script_path = sd_folder / "lollms_sd.py"
        git_pull(sd_folder)
        if sd_script_path.exists():
            ASCIIColors.success("lollms_sd found.")
            ASCIIColors.success("Loading source file...",end="")
            # use importlib to load the module from the file path
            from lollms.services.sd.lollms_sd import LollmsSD
            ASCIIColors.success("ok")
            return LollmsSD
    def paint(
                self,
                sd_positive_prompt,
--- a/lollms/services/whisper/lollms_whisper.py
+++ b/lollms/services/whisper/lollms_whisper.py
@ -0,0 +1,42 @@
 # Title LollmsWhisper
 # Licence: MIT
 # Author : Paris Neo
 # 
 from pathlib import Path
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
 from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
 from lollms.utilities import PackageManager
 from lollms.stt import LollmsSTT
 from dataclasses import dataclass
 from PIL import Image, PngImagePlugin
 from enum import Enum
 from typing import List, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 import subprocess
 if not PackageManager.check_package_installed("whisper"):
    PackageManager.install_package("whisper")
 import whisper
 class LollmsWhisper(LollmsSTT):
    def __init__(
                    self, 
                    app:LollmsApplication, 
                    model="small",
                    output_path=None
                    ):
        self.app = app
        self.output_path = output_path
        self.whisper = whisper.load_model(model)
    def transcribe(
                self,
                wav_path: str|Path
                ):
        result = self.whisper.transcribe(str(wav_path))
        return result
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -11,7 +11,7 @@ import sys
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
 from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
-from lollms.utilities import PackageManager
+from lollms.utilities import PackageManager, find_first_available_file_index, add_period
 import time
 import io
 import sys
@ -32,59 +32,11 @@ import uuid
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
 from lollms.tts import LollmsTTS
 import subprocess
 import platform
 def verify_xtts(lollms_paths:LollmsPaths):
    # Clone repository
    root_dir = lollms_paths.personal_path
    shared_folder = root_dir/"shared"
    xtts_path = shared_folder / "xtts"
    return xtts_path.exists()
 def install_xtts(lollms_app:LollmsApplication):
    ASCIIColors.green("XTTS installation started")
    repo_url = "https://github.com/ParisNeo/xtts-api-server"
    root_dir = lollms_app.lollms_paths.personal_path
    shared_folder = root_dir/"shared"
    xtts_path = shared_folder / "xtts"
    # Step 1: Clone or update the repository
    if os.path.exists(xtts_path):
        print("Repository already exists. Pulling latest changes...")
        try:
            subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
        except:
            subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
    else:
        print("Cloning repository...")
        subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
    # Step 2: Create or update the Conda environment
    if environment_exists("xtts"):
        print("Conda environment 'xtts' already exists. Updating...")
        # Here you might want to update the environment, e.g., update Python or dependencies
        # This step is highly dependent on how you manage your Conda environments and might involve
        # running `conda update` commands or similar.
    else:
        print("Creating Conda environment 'xtts'...")
        create_conda_env("xtts", "3.8")
    # Step 3: Install or update dependencies using your custom function
    requirements_path = os.path.join(xtts_path, "requirements.txt")
    run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
    run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
    # Step 4: Launch the server
    # Assuming the server can be started with a Python script in the cloned repository
    print("Launching XTTS API server...")
    run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
    print("XTTS API server setup and launch completed.")
    ASCIIColors.cyan("Done")
    ASCIIColors.cyan("Installing xtts-api-server")
    ASCIIColors.green("XTTS server installed successfully")
@ -103,8 +55,7 @@ def get_xtts(lollms_paths:LollmsPaths):
        ASCIIColors.success("ok")
        return LollmsXTTS
-class LollmsXTTS:
+class LollmsXTTS(LollmsTTS):
    has_controlnet = False
    def __init__(
                    self, 
                    app:LollmsApplication, 
@ -117,6 +68,7 @@ class LollmsXTTS:
                    use_deep_speed=False,
                    use_streaming_mode = True
                ):
        super().__init__(app)
        self.generation_threads = []
        self.voices_folder = voices_folder
        self.ready = False
@ -124,7 +76,6 @@ class LollmsXTTS:
            xtts_base_url = None
        # Get the current directory
        lollms_paths = app.lollms_paths
        self.app = app
        root_dir = lollms_paths.personal_path
        self.voice_samples_path = voice_samples_path
        self.use_deep_speed = use_deep_speed
@ -133,8 +84,8 @@ class LollmsXTTS:
        # Store the path to the script
        if xtts_base_url is None:
            self.xtts_base_url = "http://127.0.0.1:8020"
-            if not verify_xtts(lollms_paths):
+            if not LollmsXTTS.verify(lollms_paths):
-                install_xtts(app.lollms_paths)
+                LollmsXTTS.install(app)
        else:
            self.xtts_base_url = xtts_base_url
@ -167,6 +118,57 @@ class LollmsXTTS:
        else:
            self.wait_for_service_in_another_thread(max_retries=max_retries)
    def install(lollms_app:LollmsApplication):
        ASCIIColors.green("XTTS installation started")
        repo_url = "https://github.com/ParisNeo/xtts-api-server"
        root_dir = lollms_app.lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        xtts_path = shared_folder / "xtts"
        # Step 1: Clone or update the repository
        if os.path.exists(xtts_path):
            print("Repository already exists. Pulling latest changes...")
            try:
                subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
            except:
                subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
        else:
            print("Cloning repository...")
            subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
        # Step 2: Create or update the Conda environment
        if environment_exists("xtts"):
            print("Conda environment 'xtts' already exists. Updating...")
            # Here you might want to update the environment, e.g., update Python or dependencies
            # This step is highly dependent on how you manage your Conda environments and might involve
            # running `conda update` commands or similar.
        else:
            print("Creating Conda environment 'xtts'...")
            create_conda_env("xtts", "3.8")
        # Step 3: Install or update dependencies using your custom function
        requirements_path = os.path.join(xtts_path, "requirements.txt")
        run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
        run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
        # Step 4: Launch the server
        # Assuming the server can be started with a Python script in the cloned repository
        print("Launching XTTS API server...")
        run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
        print("XTTS API server setup and launch completed.")
        ASCIIColors.cyan("Done")
        ASCIIColors.cyan("Installing xtts-api-server")
        ASCIIColors.green("XTTS server installed successfully")
    @staticmethod
    def verify(lollms_paths:LollmsPaths)->bool:
        # Clone repository
        root_dir = lollms_paths.personal_path
        shared_folder = root_dir/"shared"
        xtts_path = shared_folder / "xtts"
        return xtts_path.exists()
    def run_xtts_api_server(self):
        # Get the path to the current Python interpreter
@ -198,7 +200,7 @@ class LollmsXTTS:
                    if self.voices_folder is not None:
                        print("Generating sample audio.")
                        voice_file =  [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
-                        self.tts_to_audio("x t t s is ready",voice_file[0].name)
+                        self.tts_to_audio("x t t s is ready",voice_file[0].stem)
                    print("Service is available.")
                    if self.app is not None:
                        self.app.success("XTTS Service is now available.")
@ -237,13 +239,13 @@ class LollmsXTTS:
            print("Request failed with status code:", response.status_code)
            return False
-    def tts_to_file(self, text, speaker_wav, file_name_or_path, language="en"):
+    def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
        url = f"{self.xtts_base_url}/tts_to_file"
        # Define the request body
        payload = {
            "text": text,
-            "speaker_wav": speaker_wav,
+            "speaker_wav": speaker,
            "language": language,
            "file_name_or_path": file_name_or_path
        }
@ -262,14 +264,43 @@ class LollmsXTTS:
        else:
            print("Request failed with status code:", response.status_code)
-    def tts_to_audio(self, text, speaker_wav, file_name_or_path:Path|str=None, language="en", use_threading=False):
+    def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
        voice=self.app.config.xtts_current_voice if speaker is None else speaker
        index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
        output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
        if voice is None:
            voice = "main_voice"
        self.app.info("Starting to build voice")
        try:
            from lollms.services.xtts.lollms_xtts import LollmsXTTS
            # If the personality has a voice, then use it
            personality_audio:Path = self.app.personality.personality_package_path/"audio"
            if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
                voices_folder = personality_audio
            elif voice!="main_voice":
                voices_folder = self.app.lollms_paths.custom_voices_path
            else:
                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
            language = self.app.config.xtts_current_language# convert_language_name()
            self.set_speaker_folder(voices_folder)
            preprocessed_text= add_period(text)
            voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
            if len(voice_file)==0:
                return {"status":False,"error":"Voice not found"}
            self.xtts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
        except Exception as ex:
            trace_exception(ex)
            return {"status":False,"error":f"{ex}"}
    def xtts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
        def tts2_audio_th(thread_uid=None):
            url = f"{self.xtts_base_url}/tts_to_audio"
            # Define the request body
            payload = {
                "text": text,
-                "speaker_wav": speaker_wav,
+                "speaker_wav": speaker,
                "language": language
            }
            headers = {
@ -308,3 +339,10 @@ class LollmsXTTS:
            return thread
        else:
            return tts2_audio_th()
    def get_voices(self):
        ASCIIColors.yellow("Listing voices")
        voices=["main_voice"]
        voices_dir:Path=self.app.lollms_paths.custom_voices_path
        voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
        return voices
--- a/lollms/stt.py
+++ b/lollms/stt.py
@ -0,0 +1,93 @@
 """
 Lollms STT Module
 =================
 This module is part of the Lollms library, designed to provide Speech-to-Text (STT) functionalities within the LollmsApplication framework. The base class `LollmsSTT` is intended to be inherited and implemented by other classes that provide specific STT functionalities.
 Author: ParisNeo, a computer geek passionate about AI
 """
 from lollms.app import LollmsApplication
 from pathlib import Path
 class LollmsSTT:
    """
    LollmsSTT is a base class for implementing Speech-to-Text (STT) functionalities within the LollmsApplication.
    Attributes:
        app (LollmsApplication): The instance of the main Lollms application.
        model (str): The STT model to be used for transcription.
        output_path (Path or str): Path where the output transcription files will be saved.
    """
    def __init__(
                    self, 
                    app: LollmsApplication, 
                    model="",
                    output_path=None
                    ):
        """
        Initializes the LollmsSTT class with the given parameters.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
            model (str, optional): The STT model to be used for transcription. Defaults to an empty string.
            output_path (Path or str, optional): Path where the output transcription files will be saved. Defaults to None.
        """
        self.ready = False
        self.app = app
        self.output_path = output_path
        self.model = model
    def transcribe(
                self,
                wav_path: str | Path,
                prompt=""
                ):
        """
        Transcribes the given audio file to text.
        Args:
            wav_path (str or Path): The path to the WAV audio file to be transcribed.
            prompt (str, optional): An optional prompt to guide the transcription. Defaults to an empty string.
        """
        pass
    @staticmethod
    def verify(app: LollmsApplication) -> bool:
        """
        Verifies if the STT service is available.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            bool: True if the service is available, False otherwise.
        """
        return True
    @staticmethod
    def install(app: LollmsApplication) -> bool:
        """
        Installs the necessary components for the STT service.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            bool: True if the installation was successful, False otherwise.
        """
        return True
    @staticmethod 
    def get(app: LollmsApplication) -> 'LollmsSTT':
        """
        Returns the LollmsSTT class.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            LollmsSTT: The LollmsSTT class.
        """
        return LollmsSTT
--- a/lollms/tti.py
+++ b/lollms/tti.py
@ -0,0 +1,115 @@
 """
 Lollms TTI Module
 =================
 This module is part of the Lollms library, designed to provide Text-to-Image (TTI) functionalities within the LollmsApplication framework. The base class `LollmsTTI` is intended to be inherited and implemented by other classes that provide specific TTI functionalities.
 Author: ParisNeo, a computer geek passionate about AI
 """
 from lollms.app import LollmsApplication
 from pathlib import Path
 from typing import List, Dict
 class LollmsTTI:
    """
    LollmsTTI is a base class for implementing Text-to-Image (TTI) functionalities within the LollmsApplication.
    Attributes:
        app (LollmsApplication): The instance of the main Lollms application.
        model (str): The TTI model to be used for image generation.
        api_key (str): API key for accessing external TTI services (if needed).
        output_path (Path or str): Path where the output image files will be saved.
        voices (List[str]): List of available voices for TTI (to be filled by the child class).
        models (List[str]): List of available models for TTI (to be filled by the child class).
    """
    def __init__(
                    self, 
                    app: LollmsApplication, 
                    model="",
                    api_key="",
                    output_path=None
                    ):
        """
        Initializes the LollmsTTI class with the given parameters.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
            model (str, optional): The TTI model to be used for image generation. Defaults to an empty string.
            api_key (str, optional): API key for accessing external TTI services. Defaults to an empty string.
            output_path (Path or str, optional): Path where the output image files will be saved. Defaults to None.
        """
        self.ready = False
        self.app = app
        self.model = model
        self.api_key = api_key
        self.output_path = output_path
        self.voices = [] # To be filled by the child class
        self.models = [] # To be filled by the child class
    def paint(self, positive_prompt: str, negative_prompt: str = "") -> List[Dict[str, str]]:
        """
        Generates images based on the given positive and negative prompts.
        Args:
            positive_prompt (str): The positive prompt describing the desired image.
            negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
        Returns:
            List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
        """
        pass
    def paint_from_images(self, positive_prompt: str, images: List[str], negative_prompt: str = "") -> List[Dict[str, str]]:
        """
        Generates images based on the given positive prompt and reference images.
        Args:
            positive_prompt (str): The positive prompt describing the desired image.
            images (List[str]): A list of paths to reference images.
            negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
        Returns:
            List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
        """
        pass
    @staticmethod
    def verify(app: LollmsApplication) -> bool:
        """
        Verifies if the TTI service is available.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            bool: True if the service is available, False otherwise.
        """
        return True
    @staticmethod
    def install(app: LollmsApplication) -> bool:
        """
        Installs the necessary components for the TTI service.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            bool: True if the installation was successful, False otherwise.
        """
        return True
    @staticmethod 
    def get(app: LollmsApplication) -> 'LollmsTTI':
        """
        Returns the LollmsTTI class.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            LollmsTTI: The LollmsTTI class.
        """
        return LollmsTTI
--- a/lollms/tts.py
+++ b/lollms/tts.py
@ -0,0 +1,122 @@
 """
 Lollms TTS Module
 =================
 This module is part of the Lollms library, designed to provide Text-to-Speech (TTS) functionalities within the LollmsApplication framework. The base class `LollmsTTS` is intended to be inherited and implemented by other classes that provide specific TTS functionalities.
 Author: ParisNeo, a computer geek passionate about AI
 """
 from lollms.app import LollmsApplication
 from pathlib import Path
 class LollmsTTS:
    """
    LollmsTTS is a base class for implementing Text-to-Speech (TTS) functionalities within the LollmsApplication.
    Attributes:
        app (LollmsApplication): The instance of the main Lollms application.
        voice (str): The voice model to be used for TTS.
        api_key (str): API key for accessing external TTS services (if needed).
        output_path (Path or str): Path where the output audio files will be saved.
    """
    def __init__(
                    self, 
                    app: LollmsApplication, 
                    model="",
                    voice="",
                    api_key="",
                    output_path=None
                    ):
        """
        Initializes the LollmsTTS class with the given parameters.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
            model (str, optional): The speach generation model to be used for TTS. Defaults to "".
            voice (str, optional): The voice model to be used for TTS. Defaults to "alloy".
            api_key (str, optional): API key for accessing external TTS services. Defaults to an empty string.
            output_path (Path or str, optional): Path where the output audio files will be saved. Defaults to None.
        """
        self.ready = False
        self.app = app
        self.model = model
        self.voice = voice
        self.api_key = api_key
        self.output_path = output_path
        self.voices = [] # To be filled by the child class
        self.models = [] # To be filled by the child class
    def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
        """
        Converts the given text to speech and saves it to a file.
        Args:
            text (str): The text to be converted to speech.
            speaker (str): The speaker/voice model to be used.
            file_name_or_path (Path or str): The name or path of the output file.
            language (str, optional): The language of the text. Defaults to "en".
        """
        pass
    def tts_to_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
        """
        Converts the given text to speech and returns the audio data.
        Args:
            text (str): The text to be converted to speech.
            speaker (str): The speaker/voice model to be used.
            file_name_or_path (Path or str, optional): The name or path of the output file. Defaults to None.
            language (str, optional): The language of the text. Defaults to "en".
            use_threading (bool, optional): Whether to use threading for the operation. Defaults to False.
        """
        pass
    @staticmethod
    def verify(app: LollmsApplication) -> bool:
        """
        Verifies if the TTS service is available.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            bool: True if the service is available, False otherwise.
        """
        return True
    @staticmethod
    def install(app: LollmsApplication) -> bool:
        """
        Installs the necessary components for the TTS service.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            bool: True if the installation was successful, False otherwise.
        """
        return True
    @staticmethod 
    def get(app: LollmsApplication) -> 'LollmsTTS':
        """
        Returns the LollmsTTS class.
        Args:
            app (LollmsApplication): The instance of the main Lollms application.
        Returns:
            LollmsTTS: The LollmsTTS class.
        """
        return LollmsTTS
    def get_voices(self):
        """
        Retrieves the available voices for TTS.
        Returns:
            list: A list of available voices.
        """
        return self.voices