upgraded

2025-03-26 05:37:41 +00:00 · 2024-05-18 20:56:05 +02:00 · 2024-05-18 20:56:05 +02:00 · 397d21a3be
commit 397d21a3be
parent 42ebabfe0d
16 changed files with 1065 additions and 179 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 96
+version: 98
 binding_name: null
 model_name: null
 model_variant: null
@ -80,10 +80,30 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false

+# -------------------- Services global configurations --------------------------
+# Select the active test to speach, text to image and speach to text services
+active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tti_service: "None" # autosd (offline), dall-e (online)
+active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
+
+# -------------------- Services --------------------------
+
+# ***************** STT *****************
 # STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000

+# openai_whisper configuration
+openai_whisper_key: ""
+openai_whisper_model: "whisper-1"
+
+
+# whisper configuration
+whisper_activate: false
+whisper_model: base
+
+
+# ***************** TTS *****************
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
@ -101,6 +121,13 @@ xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true

+# openai_whisper configuration
+openai_tts_key: ""
+openai_tts_model: "tts-1"
+openai_tts_voice: "alloy"
+
+# ***************** TTI *****************
+
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
 dall_e_key: ""
 dall_e_generation_engine: "dall-e-3"

+# Midjourney service key
+midjourney_key: ""

 # Image generation service comfyui
 enable_comfyui_service: false
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861

+# ***************** TTT *****************
+
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -202,6 +233,3 @@ show_code_of_conduct: true
 activate_audio_infos: true


-# whisper configuration
-whisper_activate: false
-whisper_model: base
--- a/lollms/app.py
+++ b/lollms/app.py
@ -221,7 +221,21 @@ class LollmsApplication(LoLLMsCom):
    def get_uploads_path(self, client_id):
        return self.lollms_paths.personal_uploads_path

-    def start_servers(  self ):
+    def start_servers(self):
+        self.ollama = None
+        self.vllm = None
+        self.whisper = None
+        self.xtts = None
+        self.sd = None
+        self.comfyui = None
+        self.motion_ctrl = None
+
+        self.tti = None
+        self.tts = None
+        self.stt = None
+
+
+
        if self.config.enable_ollama_service:
            try:
                from lollms.services.ollama.lollms_ollama import Service
@ -240,13 +254,11 @@ class LollmsApplication(LoLLMsCom):

        if self.config.whisper_activate:
            try:
-                from lollms.media import AudioRecorder
-                self.rec = AudioRecorder(self.lollms_paths.personal_outputs_path/"test.wav")
-                self.rec.start_recording()
-                time.sleep(1)
-                self.rec.stop_recording()
-            except:
-                pass
+                from lollms.services.whisper.lollms_whisper import LollmsWhisper
+                self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
+            except Exception as ex:
+                trace_exception(ex)
+
        if self.config.xtts_enable:
            try:
                from lollms.services.xtts.lollms_xtts import LollmsXTTS
@ -256,7 +268,7 @@ class LollmsApplication(LoLLMsCom):
                else:
                    voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"

-                self.tts = LollmsXTTS(
+                self.xtts = LollmsXTTS(
                                        self,
                                        voices_folder=voices_folder,
                                        voice_samples_path=self.lollms_paths.custom_voices_path, 
@ -291,6 +303,126 @@ class LollmsApplication(LoLLMsCom):
                self.warning(f"Couldn't load Motion control")


+        if self.config.active_tti_service == "autosd":
+            from lollms.services.sd.lollms_sd import LollmsSD
+            self.tti = LollmsSD(self)
+        elif self.config.active_tti_service == "dall-e":
+            from lollms.services.dalle.lollms_dalle import LollmsDalle
+            self.tti = LollmsDalle(self, self.config.dall_e_key)
+        elif self.config.active_tti_service == "midjourney":
+            from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
+            self.tti = LollmsMidjourney(self, self.config.midjourney_key)
+
+        if self.config.active_tts_service == "openai_tts":
+            from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
+            self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice,  self.config.openai_tts_key)
+        elif self.config.active_tts_service == "xtts" and self.xtts:
+            self.tts = self.xtts
+
+        if self.config.active_stt_service == "openai_whisper":
+            from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
+            self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
+        elif self.config.active_stt_service == "whisper":
+            from lollms.services.whisper.lollms_whisper import LollmsWhisper
+            self.stt = LollmsWhisper(self, self.config.whisper_model)
+
+
+    def verify_servers(self, reload_all=False):
+
+        try:
+            if self.config.enable_ollama_service and self.ollama is None:
+                try:
+                    from lollms.services.ollama.lollms_ollama import Service
+                    self.ollama = Service(self, base_url=self.config.ollama_base_url)
+                except Exception as ex:
+                    trace_exception(ex)
+                    self.warning(f"Couldn't load Ollama")
+
+            if self.config.enable_vllm_service and self.vllm is None:
+                try:
+                    from lollms.services.vllm.lollms_vllm import Service
+                    self.vllm = Service(self, base_url=self.config.vllm_url)
+                except Exception as ex:
+                    trace_exception(ex)
+                    self.warning(f"Couldn't load vllm")
+
+            if self.config.whisper_activate and self.whisper is None:
+                try:
+                    from lollms.services.whisper.lollms_whisper import LollmsWhisper
+                    self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
+                except Exception as ex:
+                    trace_exception(ex)
+            if self.config.xtts_enable and self.xtts is None:
+                try:
+                    from lollms.services.xtts.lollms_xtts import LollmsXTTS
+                    voice=self.config.xtts_current_voice
+                    if voice!="main_voice":
+                        voices_folder = self.lollms_paths.custom_voices_path
+                    else:
+                        voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+
+                    self.xtts = LollmsXTTS(
+                                            self,
+                                            voices_folder=voices_folder,
+                                            voice_samples_path=self.lollms_paths.custom_voices_path, 
+                                            xtts_base_url=self.config.xtts_base_url,
+                                            wait_for_service=False,
+                                            use_deep_speed=self.config.xtts_use_deepspeed,
+                                            use_streaming_mode=self.config.xtts_use_streaming_mode
+                                        )
+                except:
+                    self.warning(f"Couldn't load XTTS")
+
+            if self.config.enable_sd_service and self.sd is None:
+                try:
+                    from lollms.services.sd.lollms_sd import LollmsSD
+                    self.sd = LollmsSD(self, auto_sd_base_url=self.config.sd_base_url)
+                except:
+                    self.warning(f"Couldn't load SD")
+
+            if self.config.enable_comfyui_service and self.comfyui is None:
+                try:
+                    from lollms.services.comfyui.lollms_comfyui import LollmsComfyUI
+                    self.comfyui = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
+                except:
+                    self.warning(f"Couldn't load SD")
+
+            if self.config.enable_motion_ctrl_service and self.motion_ctrl is None:
+                try:
+                    from lollms.services.motion_ctrl.lollms_motion_ctrl import Service
+                    self.motion_ctrl = Service(self, base_url=self.config.motion_ctrl_base_url)
+                except Exception as ex:
+                    trace_exception(ex)
+                    self.warning(f"Couldn't load Motion control")
+
+
+            if self.config.active_tti_service == "autosd":
+                from lollms.services.sd.lollms_sd import LollmsSD
+                self.tti = LollmsSD(self)
+            elif self.config.active_tti_service == "dall-e":
+                from lollms.services.dalle.lollms_dalle import LollmsDalle
+                self.tti = LollmsDalle(self, self.config.dall_e_key)
+            elif self.config.active_tti_service == "midjourney":
+                from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
+                self.tti = LollmsMidjourney(self, self.config.midjourney_key)
+
+            if self.config.active_tts_service == "openai_tts":
+                from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
+                self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice,  self.config.openai_tts_key)
+            elif self.config.active_stt_service == "xtts" and self.xtts:
+                self.tts = self.xtts
+
+            if self.config.active_stt_service == "openai_whisper":
+                from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
+                self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
+            elif self.config.active_stt_service == "whisper":
+                from lollms.services.whisper.lollms_whisper import LollmsWhisper
+                self.stt = LollmsWhisper(self, self.config.whisper_model)
+
+        except Exception as ex:
+            trace_exception(ex)
+            
+
    def build_long_term_skills_memory(self):
        discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
        discussion_db_name.mkdir(exist_ok=True, parents=True)
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 96
+version: 98
 binding_name: null
 model_name: null
 model_variant: null
@ -80,10 +80,30 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false

+# -------------------- Services global configurations --------------------------
+# Select the active test to speach, text to image and speach to text services
+active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tti_service: "None" # autosd (offline), dall-e (online)
+active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
+
+# -------------------- Services --------------------------
+
+# ***************** STT *****************
 # STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000

+# openai_whisper configuration
+openai_whisper_key: ""
+openai_whisper_model: "whisper-1"
+
+
+# whisper configuration
+whisper_activate: false
+whisper_model: base
+
+
+# ***************** TTS *****************
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
@ -101,6 +121,13 @@ xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true

+# openai_whisper configuration
+openai_tts_key: ""
+openai_tts_model: "tts-1"
+openai_tts_voice: "alloy"
+
+# ***************** TTI *****************
+
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
 dall_e_key: ""
 dall_e_generation_engine: "dall-e-3"

+# Midjourney service key
+midjourney_key: ""

 # Image generation service comfyui
 enable_comfyui_service: false
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861

+# ***************** TTT *****************
+
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -202,6 +233,3 @@ show_code_of_conduct: true
 activate_audio_infos: true


-# whisper configuration
-whisper_activate: false
-whisper_model: base
--- a/lollms/server/endpoints/lollms_configuration_infos.py
+++ b/lollms/server/endpoints/lollms_configuration_infos.py
@ -186,6 +186,7 @@ async def apply_settings(request: Request):
                lollmsElfServer.config.config[key] = config.get(key, lollmsElfServer.config.config[key])
            ASCIIColors.success("OK")
            lollmsElfServer.rebuild_personalities()
+            lollmsElfServer.verify_servers()
            if lollmsElfServer.config.auto_save:
                lollmsElfServer.config.save_config()
            return {"status":True}
--- a/lollms/server/endpoints/lollms_sd.py
+++ b/lollms/server/endpoints/lollms_sd.py
@ -92,8 +92,8 @@ def start_sd(data: Identification):
            return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}

        lollmsElfServer.ShowBlockingMessage("Starting SD api server\nPlease stand by")
-        from lollms.services.sd.lollms_sd import get_sd
-        lollmsElfServer.sd = get_sd(lollmsElfServer.lollms_paths)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
+        from lollms.services.sd.lollms_sd import LollmsSD
+        lollmsElfServer.sd = LollmsSD.get(lollmsElfServer)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
        ASCIIColors.success("Done")
        lollmsElfServer.HideBlockingMessage()
        return {"status":True}
--- a/lollms/server/endpoints/lollms_xtts.py
+++ b/lollms/server/endpoints/lollms_xtts.py
@ -8,6 +8,7 @@ description:

 """
 from fastapi import APIRouter, Request, UploadFile, File, HTTPException
+from fastapi.responses import PlainTextResponse
 from lollms_webui import LOLLMSWebUI
 from pydantic import BaseModel
 from starlette.responses import StreamingResponse
@ -39,10 +40,7 @@ def list_voices():
        return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}

    ASCIIColors.yellow("Listing voices")
-    voices=["main_voice"]
-    voices_dir:Path=lollmsElfServer.lollms_paths.custom_voices_path
-    voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
-    return {"voices":voices}
+    return {"voices":lollmsElfServer.tts.get_voices()}

@router.post("/set_voice")
 async def set_voice(request: Request):
@ -70,6 +68,24 @@ async def set_voice(request: Request):
        return {"status":False,"error":str(ex)}


+class LollmsAudio2TextRequest(BaseModel):
+    wave_file_path: str
+    voice: str = None
+    fn:str = None
+
+@router.post("/audio2text")
+async def audio2text(request: LollmsAudio2TextRequest):
+    if lollmsElfServer.config.headless_server_mode:
+        return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
+
+    if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
+        return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
+
+    result = lollmsElfServer.whisper.transcribe(str(request.wave_file_path))
+    return PlainTextResponse(result)
+
+
+
 class LollmsText2AudioRequest(BaseModel):
    text: str
    voice: str = None
@ -94,67 +110,13 @@ async def text2Audio(request: LollmsText2AudioRequest):
        validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
        
    try:
-        # Get the JSON data from the POST request.
-        try:
-            from lollms.services.xtts.lollms_xtts import LollmsXTTS
-            voice=lollmsElfServer.config.xtts_current_voice
-            if lollmsElfServer.tts is None:
-                voice=lollmsElfServer.config.xtts_current_voice
-                if voice!="main_voice":
-                    voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
-                else:
-                    voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
-
-                lollmsElfServer.tts = LollmsXTTS(
-                    lollmsElfServer, 
-                    voices_folder=voices_folder,
-                    voice_samples_path=Path(__file__).parent/"voices", 
-                    xtts_base_url= lollmsElfServer.config.xtts_base_url,
-                    use_deep_speed= lollmsElfServer.config.xtts_use_deep_speed,
-                    use_streaming_mode= lollmsElfServer.config.xtts_use_streaming_mode,
-                )
-        except Exception as ex:
-            return {"url": None, "error":f"{ex}"}
-            
-        voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
-        index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
-        output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
-        if voice is None:
-            voice = "main_voice"
-        lollmsElfServer.info("Starting to build voice")
-        try:
-            from lollms.services.xtts.lollms_xtts import LollmsXTTS
-            # If the personality has a voice, then use it
-            personality_audio:Path = lollmsElfServer.personality.personality_package_path/"audio"
-            if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
-                voices_folder = personality_audio
-            elif voice!="main_voice":
-                voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
-            else:
-                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
-            if lollmsElfServer.tts is None:
-                lollmsElfServer.tts = LollmsXTTS(
-                                                    lollmsElfServer, 
-                                                    voices_folder=voices_folder,
-                                                    voice_samples_path=Path(__file__).parent/"voices", 
-                                                    xtts_base_url= lollmsElfServer.config.xtts_base_url,
-                                                    use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
-                                                    use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode                                                    
-                                                )
-            if lollmsElfServer.tts.ready:
-                language = lollmsElfServer.config.xtts_current_language# convert_language_name()
-                lollmsElfServer.tts.set_speaker_folder(voices_folder)
-                preprocessed_text= add_period(request.text)
-                voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
-                if len(voice_file)==0:
-                    return {"status":False,"error":"Voice not found"}
-                lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
-            else:
-                lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.") 
-                return  {"status":False, "error":"Service not ready yet"} 
-        except Exception as ex:
-            trace_exception(ex)
-            return {"url": None}
+        if lollmsElfServer.tts is None:
+            return {"url": None, "error":f"No TTS service is on"}
+        if lollmsElfServer.tts.ready:
+            response = lollmsElfServer.tts.tts_to_audio(request.text, request.voice, file_name_or_path=request.fn)
+            return response
+        else:
+            return {"url": None, "error":f"TTS service is not ready yet"}
    except Exception as ex:
        trace_exception(ex)
        lollmsElfServer.error(ex)
@ -255,9 +217,9 @@ def install_xtts(data:Identification):
        if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
            return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
        
-        from lollms.services.xtts.lollms_xtts import install_xtts
+        from lollms.services.xtts.lollms_xtts import LollmsTTS
        lollmsElfServer.ShowBlockingMessage("Installing xTTS api server\nPlease stand by")
-        install_xtts(lollmsElfServer)
+        LollmsTTS.install(lollmsElfServer)
        lollmsElfServer.HideBlockingMessage()
        return {"status":True}
    except Exception as ex:
--- a/lollms/services/dalle/lollms_dalle.py
+++ b/lollms/services/dalle/lollms_dalle.py
@ -1,10 +1,7 @@
 # Title LollmsDalle
-# Licence: MIT
+# Licence: Apache 2.0
 # Author : Paris Neo
-# Adapted from the work of mix1009's sdwebuiapi
-# check it out : https://github.com/mix1009/sdwebuiapi/tree/main
-# Here is a copy of the LICENCE https://github.com/mix1009/sdwebuiapi/blob/main/LICENSE
-# All rights are reserved
+

 from pathlib import Path
 import sys
@ -29,17 +26,16 @@ from typing import List, Dict, Any
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import PackageManager, find_next_available_filename
+from lollms.tti import LollmsTTI
 import subprocess
 import shutil
 from tqdm import tqdm
 import threading
 from io import BytesIO

-def get_Dalli(lollms_paths:LollmsPaths):
-    return LollmsDalle

-class LollmsDalle:
-    has_controlnet = False
+
+class LollmsDalle(LollmsTTI):
    def __init__(
                    self, 
                    app:LollmsApplication, 
@ -47,7 +43,7 @@ class LollmsDalle:
                    generation_engine="dall-e-3",# other possibility "dall-e-2"
                    output_path=None
                    ):
-        self.app = app
+        super().__init__(app)
        self.key = key 
        self.generation_engine = generation_engine
        self.output_path = output_path
@ -141,3 +137,6 @@ class LollmsDalle:
            ASCIIColors.red("Failed to download the image")

        return file_name
+    @staticmethod
+    def get(app:LollmsApplication):
+        return LollmsDalle
--- a/lollms/services/midjourney/lollms_midjourney.py
+++ b/lollms/services/midjourney/lollms_midjourney.py
@ -0,0 +1,142 @@
+# Title LollmsMidjourney
+# Licence: Apache 2.0
+# Author : Paris Neo
+
+
+from pathlib import Path
+import sys
+from lollms.app import LollmsApplication
+from lollms.paths import LollmsPaths
+from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
+import time
+import io
+import sys
+import requests
+import os
+import base64
+import subprocess
+import time
+import json
+import platform
+from dataclasses import dataclass
+from PIL import Image, PngImagePlugin
+from enum import Enum
+from typing import List, Dict, Any
+
+from ascii_colors import ASCIIColors, trace_exception
+from lollms.paths import LollmsPaths
+from lollms.utilities import PackageManager, find_next_available_filename
+from lollms.tti import LollmsTTI
+import subprocess
+import shutil
+from tqdm import tqdm
+import threading
+from io import BytesIO
+
+
+
+class LollmsMidjourney(LollmsTTI):
+    def __init__(
+                    self, 
+                    app:LollmsApplication, 
+                    key="",
+                    generation_engine="dall-e-3",# other possibility "dall-e-2"
+                    output_path=None
+                    ):
+        super().__init__(app)
+        self.key = key 
+        self.generation_engine = generation_engine
+        self.output_path = output_path
+
+    def paint(
+                self,
+                prompt,
+                width=512,
+                height=512,
+                images = [],
+                generation_engine=None,
+                output_path = None
+                ):
+        if output_path is None:
+            output_path = self.output_path
+        if generation_engine is None:
+            generation_engine = self.generation_engine
+        if not PackageManager.check_package_installed("openai"):
+            PackageManager.install_package("openai")
+        import openai
+        openai.api_key = self.key
+        if generation_engine=="dall-e-2":
+            supported_resolutions = [
+                [512, 512],
+                [1024, 1024],
+            ]
+            # Find the closest resolution
+            closest_resolution = min(supported_resolutions, key=lambda res: abs(res[0] - width) + abs(res[1] - height))
+            
+        else:
+            supported_resolutions = [
+                [1024, 1024],
+                [1024, 1792],
+                [1792, 1024]
+            ]
+            # Find the closest resolution
+            if width>height:
+                closest_resolution = [1792, 1024]
+            elif width<height: 
+                closest_resolution = [1024, 1792]
+            else:
+                closest_resolution = [1024, 1024]
+
+
+        # Update the width and height
+        width = closest_resolution[0]
+        height = closest_resolution[1]                    
+
+        if len(images)>0 and generation_engine=="dall-e-2":
+            # Read the image file from disk and resize it
+            image = Image.open(self.personality.image_files[0])
+            width, height = width, height
+            image = image.resize((width, height))
+
+            # Convert the image to a BytesIO object
+            byte_stream = BytesIO()
+            image.save(byte_stream, format='PNG')
+            byte_array = byte_stream.getvalue()
+            response = openai.images.create_variation(
+                image=byte_array,
+                n=1,
+                model=generation_engine, # for now only dalle 2 supports variations
+                size=f"{width}x{height}"
+            )
+        else:
+            response = openai.images.generate(
+                model=generation_engine,
+                prompt=prompt.strip(),
+                quality="standard",
+                size=f"{width}x{height}",
+                n=1,
+                
+                )
+        # download image to outputs
+        output_dir = Path(output_path)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        image_url = response.data[0].url
+
+        # Get the image data from the URL
+        response = requests.get(image_url)
+
+        if response.status_code == 200:
+            # Generate the full path for the image file
+            file_name = output_dir/find_next_available_filename(output_dir, "img_dalle_")  # You can change the filename if needed
+
+            # Save the image to the specified folder
+            with open(file_name, "wb") as file:
+                file.write(response.content)
+            ASCIIColors.yellow(f"Image saved to {file_name}")
+        else:
+            ASCIIColors.red("Failed to download the image")
+
+        return file_name
+    @staticmethod
+    def get(app:LollmsApplication):
+        return LollmsMidjourney
--- a/lollms/services/open_ai_tts/lollms_openai_tts.py
+++ b/lollms/services/open_ai_tts/lollms_openai_tts.py
@ -0,0 +1,108 @@
+# Title LollmsOpenAITTS
+# Licence: MIT
+# Author : Paris Neo
+# Uses open AI api to perform text to speech
+# 
+
+from pathlib import Path
+import sys
+from lollms.app import LollmsApplication
+from lollms.paths import LollmsPaths
+from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
+import time
+import io
+import sys
+import requests
+import os
+import base64
+import subprocess
+import time
+import json
+import platform
+from dataclasses import dataclass
+from PIL import Image, PngImagePlugin
+from enum import Enum
+from typing import List, Dict, Any
+
+from ascii_colors import ASCIIColors, trace_exception
+from lollms.paths import LollmsPaths
+from lollms.utilities import PackageManager, find_next_available_filename
+from lollms.tts import LollmsTTS
+import subprocess
+import shutil
+from tqdm import tqdm
+import threading
+from io import BytesIO
+from openai import OpenAI
+
+if not PackageManager.check_package_installed("sounddevice"):
+    PackageManager.install_package("sounddevice")
+if not PackageManager.check_package_installed("soundfile"):
+    PackageManager.install_package("soundfile")
+
+import sounddevice as sd
+import soundfile as sf
+
+def get_Whisper(lollms_paths:LollmsPaths):
+    return LollmsOpenAITTS
+
+class LollmsOpenAITTS(LollmsTTS):
+    def __init__(
+                    self, 
+                    app:LollmsApplication,
+                    model ="tts-1",
+                    voice="alloy",
+                    api_key="",
+                    output_path=None
+                    ):
+        super().__init__(app, model, voice, api_key, output_path)
+        self.client = OpenAI(api_key=api_key)
+        self.voices = [
+         "alloy",
+         "echo",
+         "fable",
+         "nova",
+         "shimmer"     
+        ]
+        self.models = [
+         "tts-1"   
+        ]
+
+        self.voice = voice
+        self.output_path = output_path
+        self.ready = True
+
+
+    def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
+        speech_file_path = file_name_or_path
+        response = self.client.audio.speech.create(
+        model=self.model,
+        voice=self.voice,
+        input=text,
+        response_format="wav"
+        
+        )
+
+        response.write_to_file(speech_file_path)
+
+    def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
+        speech_file_path = file_name_or_path
+        response = self.client.audio.speech.create(
+        model=self.model,
+        voice=self.voice,
+        input=text,
+        response_format="wav"
+        
+        )
+
+        response.write_to_file(speech_file_path)
+        def play_audio(file_path):
+            # Read the audio file
+            data, fs = sf.read(file_path, dtype='float32')
+            # Play the audio file
+            sd.play(data, fs)
+            # Wait until the file is done playing
+            sd.wait()
+
+        # Example usage
+        play_audio(speech_file_path)
--- a/lollms/services/openai_whisper/lollms_whisper.py
+++ b/lollms/services/openai_whisper/lollms_whisper.py
@ -0,0 +1,70 @@
+# Title LollmsOpenAIWhisper
+# Licence: MIT
+# Author : Paris Neo
+# 
+
+from pathlib import Path
+import sys
+from lollms.app import LollmsApplication
+from lollms.paths import LollmsPaths
+from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
+import time
+import io
+import sys
+import requests
+import os
+import base64
+import subprocess
+import time
+import json
+import platform
+from dataclasses import dataclass
+from PIL import Image, PngImagePlugin
+from enum import Enum
+from typing import List, Dict, Any
+
+from ascii_colors import ASCIIColors, trace_exception
+from lollms.paths import LollmsPaths
+from lollms.utilities import PackageManager, find_next_available_filename
+import subprocess
+import shutil
+from tqdm import tqdm
+import threading
+from io import BytesIO
+from openai import OpenAI
+
+
+def get_Whisper(lollms_paths:LollmsPaths):
+    return LollmsOpenAIWhisper
+
+class LollmsOpenAIWhisper:
+    def __init__(
+                    self, 
+                    app:LollmsApplication, 
+                    model="whisper-1",
+                    api_key="",
+                    output_path=None
+                    ):
+        self.client = OpenAI(api_key=api_key)
+        self.app = app
+        self.model = model
+        self.output_path = output_path
+        self.ready = True
+
+    def transcribe(
+                self,
+                wav_path: str|Path,
+                model:str="",
+                output_path:str|Path=None
+                ):
+        if model=="" or model is None:
+            model = self.model
+        if output_path is None:
+            output_path = self.output_path
+        audio_file= open(str(wav_path), "rb")
+        transcription = self.client.audio.transcriptions.create(
+            model=model, 
+            file=audio_file,
+            response_format="text"
+        )        
+        return transcription
--- a/lollms/services/sd/lollms_sd.py
+++ b/lollms/services/sd/lollms_sd.py
@ -28,18 +28,14 @@ from typing import List, Dict, Any

 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
+from lollms.tti import LollmsTTI
 from lollms.utilities import git_pull, show_yes_no_dialog, run_script_in_env, create_conda_env
 import subprocess
 import shutil
 from tqdm import tqdm
 import threading

-def verify_sd(lollms_paths:LollmsPaths):
-    # Clone repository
-    root_dir = lollms_paths.personal_path
-    shared_folder = root_dir/"shared"
-    sd_folder = shared_folder / "auto_sd"
-    return sd_folder.exists()
+

 def download_file(url, folder_path, local_filename):
    # Make sure 'folder_path' exists
@ -137,20 +133,6 @@ def upgrade_sd(lollms_app:LollmsApplication):
    ASCIIColors.success("DONE")


-def get_sd(lollms_paths:LollmsPaths):
-    root_dir = lollms_paths.personal_path
-    shared_folder = root_dir/"shared"
-    sd_folder = shared_folder / "auto_sd"
-    sd_script_path = sd_folder / "lollms_sd.py"
-    git_pull(sd_folder)
-    
-    if sd_script_path.exists():
-        ASCIIColors.success("lollms_sd found.")
-        ASCIIColors.success("Loading source file...",end="")
-        # use importlib to load the module from the file path
-        from lollms.services.sd.lollms_sd import LollmsSD
-        ASCIIColors.success("ok")
-        return LollmsSD


 def raw_b64_img(image: Image) -> str:
@ -274,7 +256,7 @@ class ControlNetUnit:
            "pixel_perfect": self.pixel_perfect,
        }

-class LollmsSD:
+class LollmsSD(LollmsTTI):
    has_controlnet = False
    def __init__(
                    self, 
@ -290,19 +272,19 @@ class LollmsSD:
                    share=False,
                    wait_for_service=True
                    ):
+        super().__init__(app)
        if auto_sd_base_url=="" or auto_sd_base_url=="http://127.0.0.1:7860":
            auto_sd_base_url = None
        self.ready = False
        # Get the current directory
        lollms_paths = app.lollms_paths
-        self.app = app
        root_dir = lollms_paths.personal_path
        
        self.wm = wm
        # Store the path to the script
        if auto_sd_base_url is None:
            self.auto_sd_base_url = "http://127.0.0.1:7860"
-            if not verify_sd(lollms_paths):
+            if not LollmsSD.verify(app):
                install_sd(app.lollms_paths)
        else:
            self.auto_sd_base_url = auto_sd_base_url
@ -364,6 +346,30 @@ class LollmsSD:
        else:
            self.check_controlnet()

+    @staticmethod
+    def verify(app:LollmsApplication):
+        # Clone repository
+        root_dir = app.lollms_paths.personal_path
+        shared_folder = root_dir/"shared"
+        sd_folder = shared_folder / "auto_sd"
+        return sd_folder.exists()
+    
+    def get(app:LollmsApplication):
+        root_dir = app.lollms_paths.personal_path
+        shared_folder = root_dir/"shared"
+        sd_folder = shared_folder / "auto_sd"
+        sd_script_path = sd_folder / "lollms_sd.py"
+        git_pull(sd_folder)
+        
+        if sd_script_path.exists():
+            ASCIIColors.success("lollms_sd found.")
+            ASCIIColors.success("Loading source file...",end="")
+            # use importlib to load the module from the file path
+            from lollms.services.sd.lollms_sd import LollmsSD
+            ASCIIColors.success("ok")
+            return LollmsSD
+
+
    def paint(
                self,
                sd_positive_prompt,
--- a/lollms/services/whisper/lollms_whisper.py
+++ b/lollms/services/whisper/lollms_whisper.py
@ -0,0 +1,42 @@
+# Title LollmsWhisper
+# Licence: MIT
+# Author : Paris Neo
+# 
+
+from pathlib import Path
+from lollms.app import LollmsApplication
+from lollms.paths import LollmsPaths
+from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
+from lollms.utilities import PackageManager
+from lollms.stt import LollmsSTT
+from dataclasses import dataclass
+from PIL import Image, PngImagePlugin
+from enum import Enum
+from typing import List, Dict, Any
+
+from ascii_colors import ASCIIColors, trace_exception
+from lollms.paths import LollmsPaths
+import subprocess
+
+if not PackageManager.check_package_installed("whisper"):
+    PackageManager.install_package("whisper")
+import whisper
+
+
+class LollmsWhisper(LollmsSTT):
+    def __init__(
+                    self, 
+                    app:LollmsApplication, 
+                    model="small",
+                    output_path=None
+                    ):
+        self.app = app
+        self.output_path = output_path
+        self.whisper = whisper.load_model(model)
+
+    def transcribe(
+                self,
+                wav_path: str|Path
+                ):
+        result = self.whisper.transcribe(str(wav_path))
+        return result
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -11,7 +11,7 @@ import sys
 from lollms.app import LollmsApplication
 from lollms.paths import LollmsPaths
 from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
-from lollms.utilities import PackageManager
+from lollms.utilities import PackageManager, find_first_available_file_index, add_period
 import time
 import io
 import sys
@ -32,59 +32,11 @@ import uuid
 from ascii_colors import ASCIIColors, trace_exception
 from lollms.paths import LollmsPaths
 from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
+from lollms.tts import LollmsTTS
 import subprocess
 import platform

-def verify_xtts(lollms_paths:LollmsPaths):
-    # Clone repository
-    root_dir = lollms_paths.personal_path
-    shared_folder = root_dir/"shared"
-    xtts_path = shared_folder / "xtts"
-    return xtts_path.exists()
    
-def install_xtts(lollms_app:LollmsApplication):
-    ASCIIColors.green("XTTS installation started")
-    repo_url = "https://github.com/ParisNeo/xtts-api-server"
-    root_dir = lollms_app.lollms_paths.personal_path
-    shared_folder = root_dir/"shared"
-    xtts_path = shared_folder / "xtts"
-
-    # Step 1: Clone or update the repository
-    if os.path.exists(xtts_path):
-        print("Repository already exists. Pulling latest changes...")
-        try:
-            subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
-        except:
-            subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
-
-    else:
-        print("Cloning repository...")
-        subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
-
-    # Step 2: Create or update the Conda environment
-    if environment_exists("xtts"):
-        print("Conda environment 'xtts' already exists. Updating...")
-        # Here you might want to update the environment, e.g., update Python or dependencies
-        # This step is highly dependent on how you manage your Conda environments and might involve
-        # running `conda update` commands or similar.
-    else:
-        print("Creating Conda environment 'xtts'...")
-        create_conda_env("xtts", "3.8")
-
-    # Step 3: Install or update dependencies using your custom function
-    requirements_path = os.path.join(xtts_path, "requirements.txt")
-    run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
-    run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
-
-    # Step 4: Launch the server
-    # Assuming the server can be started with a Python script in the cloned repository
-    print("Launching XTTS API server...")
-    run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
-
-    print("XTTS API server setup and launch completed.")
-    ASCIIColors.cyan("Done")
-    ASCIIColors.cyan("Installing xtts-api-server")
-    ASCIIColors.green("XTTS server installed successfully")



@ -103,8 +55,7 @@ def get_xtts(lollms_paths:LollmsPaths):
        ASCIIColors.success("ok")
        return LollmsXTTS

-class LollmsXTTS:
-    has_controlnet = False
+class LollmsXTTS(LollmsTTS):
    def __init__(
                    self, 
                    app:LollmsApplication, 
@ -117,6 +68,7 @@ class LollmsXTTS:
                    use_deep_speed=False,
                    use_streaming_mode = True
                ):
+        super().__init__(app)
        self.generation_threads = []
        self.voices_folder = voices_folder
        self.ready = False
@ -124,7 +76,6 @@ class LollmsXTTS:
            xtts_base_url = None
        # Get the current directory
        lollms_paths = app.lollms_paths
-        self.app = app
        root_dir = lollms_paths.personal_path
        self.voice_samples_path = voice_samples_path
        self.use_deep_speed = use_deep_speed
@ -133,8 +84,8 @@ class LollmsXTTS:
        # Store the path to the script
        if xtts_base_url is None:
            self.xtts_base_url = "http://127.0.0.1:8020"
-            if not verify_xtts(lollms_paths):
-                install_xtts(app.lollms_paths)
+            if not LollmsXTTS.verify(lollms_paths):
+                LollmsXTTS.install(app)
        else:
            self.xtts_base_url = xtts_base_url

@ -167,6 +118,57 @@ class LollmsXTTS:
        else:
            self.wait_for_service_in_another_thread(max_retries=max_retries)

+    def install(lollms_app:LollmsApplication):
+        ASCIIColors.green("XTTS installation started")
+        repo_url = "https://github.com/ParisNeo/xtts-api-server"
+        root_dir = lollms_app.lollms_paths.personal_path
+        shared_folder = root_dir/"shared"
+        xtts_path = shared_folder / "xtts"
+
+        # Step 1: Clone or update the repository
+        if os.path.exists(xtts_path):
+            print("Repository already exists. Pulling latest changes...")
+            try:
+                subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
+            except:
+                subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
+
+        else:
+            print("Cloning repository...")
+            subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
+
+        # Step 2: Create or update the Conda environment
+        if environment_exists("xtts"):
+            print("Conda environment 'xtts' already exists. Updating...")
+            # Here you might want to update the environment, e.g., update Python or dependencies
+            # This step is highly dependent on how you manage your Conda environments and might involve
+            # running `conda update` commands or similar.
+        else:
+            print("Creating Conda environment 'xtts'...")
+            create_conda_env("xtts", "3.8")
+
+        # Step 3: Install or update dependencies using your custom function
+        requirements_path = os.path.join(xtts_path, "requirements.txt")
+        run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
+        run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
+
+        # Step 4: Launch the server
+        # Assuming the server can be started with a Python script in the cloned repository
+        print("Launching XTTS API server...")
+        run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
+
+        print("XTTS API server setup and launch completed.")
+        ASCIIColors.cyan("Done")
+        ASCIIColors.cyan("Installing xtts-api-server")
+        ASCIIColors.green("XTTS server installed successfully")
+
+    @staticmethod
+    def verify(lollms_paths:LollmsPaths)->bool:
+        # Clone repository
+        root_dir = lollms_paths.personal_path
+        shared_folder = root_dir/"shared"
+        xtts_path = shared_folder / "xtts"
+        return xtts_path.exists()

    def run_xtts_api_server(self):
        # Get the path to the current Python interpreter
@ -198,7 +200,7 @@ class LollmsXTTS:
                    if self.voices_folder is not None:
                        print("Generating sample audio.")
                        voice_file =  [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
-                        self.tts_to_audio("x t t s is ready",voice_file[0].name)
+                        self.tts_to_audio("x t t s is ready",voice_file[0].stem)
                    print("Service is available.")
                    if self.app is not None:
                        self.app.success("XTTS Service is now available.")
@ -237,13 +239,13 @@ class LollmsXTTS:
            print("Request failed with status code:", response.status_code)
            return False

-    def tts_to_file(self, text, speaker_wav, file_name_or_path, language="en"):
+    def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
        url = f"{self.xtts_base_url}/tts_to_file"

        # Define the request body
        payload = {
            "text": text,
-            "speaker_wav": speaker_wav,
+            "speaker_wav": speaker,
            "language": language,
            "file_name_or_path": file_name_or_path
        }
@ -262,14 +264,43 @@ class LollmsXTTS:
        else:
            print("Request failed with status code:", response.status_code)

-    def tts_to_audio(self, text, speaker_wav, file_name_or_path:Path|str=None, language="en", use_threading=False):
+    def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
+        voice=self.app.config.xtts_current_voice if speaker is None else speaker
+        index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
+        output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
+        if voice is None:
+            voice = "main_voice"
+        self.app.info("Starting to build voice")
+        try:
+            from lollms.services.xtts.lollms_xtts import LollmsXTTS
+            # If the personality has a voice, then use it
+            personality_audio:Path = self.app.personality.personality_package_path/"audio"
+            if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
+                voices_folder = personality_audio
+            elif voice!="main_voice":
+                voices_folder = self.app.lollms_paths.custom_voices_path
+            else:
+                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+            language = self.app.config.xtts_current_language# convert_language_name()
+            self.set_speaker_folder(voices_folder)
+            preprocessed_text= add_period(text)
+            voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
+            if len(voice_file)==0:
+                return {"status":False,"error":"Voice not found"}
+            self.xtts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
+
+        except Exception as ex:
+            trace_exception(ex)
+            return {"status":False,"error":f"{ex}"}
+
+    def xtts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
        def tts2_audio_th(thread_uid=None):
            url = f"{self.xtts_base_url}/tts_to_audio"

            # Define the request body
            payload = {
                "text": text,
-                "speaker_wav": speaker_wav,
+                "speaker_wav": speaker,
                "language": language
            }
            headers = {
@ -308,3 +339,10 @@ class LollmsXTTS:
            return thread
        else:
            return tts2_audio_th()
+        
+    def get_voices(self):
+        ASCIIColors.yellow("Listing voices")
+        voices=["main_voice"]
+        voices_dir:Path=self.app.lollms_paths.custom_voices_path
+        voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
+        return voices
--- a/lollms/stt.py
+++ b/lollms/stt.py
@ -0,0 +1,93 @@
+"""
+Lollms STT Module
+=================
+
+This module is part of the Lollms library, designed to provide Speech-to-Text (STT) functionalities within the LollmsApplication framework. The base class `LollmsSTT` is intended to be inherited and implemented by other classes that provide specific STT functionalities.
+
+Author: ParisNeo, a computer geek passionate about AI
+"""
+
+from lollms.app import LollmsApplication
+from pathlib import Path
+
+class LollmsSTT:
+    """
+    LollmsSTT is a base class for implementing Speech-to-Text (STT) functionalities within the LollmsApplication.
+    
+    Attributes:
+        app (LollmsApplication): The instance of the main Lollms application.
+        model (str): The STT model to be used for transcription.
+        output_path (Path or str): Path where the output transcription files will be saved.
+    """
+    
+    def __init__(
+                    self, 
+                    app: LollmsApplication, 
+                    model="",
+                    output_path=None
+                    ):
+        """
+        Initializes the LollmsSTT class with the given parameters.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+            model (str, optional): The STT model to be used for transcription. Defaults to an empty string.
+            output_path (Path or str, optional): Path where the output transcription files will be saved. Defaults to None.
+        """
+        self.ready = False
+        self.app = app
+        self.output_path = output_path
+        self.model = model
+
+    def transcribe(
+                self,
+                wav_path: str | Path,
+                prompt=""
+                ):
+        """
+        Transcribes the given audio file to text.
+
+        Args:
+            wav_path (str or Path): The path to the WAV audio file to be transcribed.
+            prompt (str, optional): An optional prompt to guide the transcription. Defaults to an empty string.
+        """
+        pass
+
+    @staticmethod
+    def verify(app: LollmsApplication) -> bool:
+        """
+        Verifies if the STT service is available.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            bool: True if the service is available, False otherwise.
+        """
+        return True
+
+    @staticmethod
+    def install(app: LollmsApplication) -> bool:
+        """
+        Installs the necessary components for the STT service.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            bool: True if the installation was successful, False otherwise.
+        """
+        return True
+    
+    @staticmethod 
+    def get(app: LollmsApplication) -> 'LollmsSTT':
+        """
+        Returns the LollmsSTT class.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            LollmsSTT: The LollmsSTT class.
+        """
+        return LollmsSTT
--- a/lollms/tti.py
+++ b/lollms/tti.py
@ -0,0 +1,115 @@
+"""
+Lollms TTI Module
+=================
+
+This module is part of the Lollms library, designed to provide Text-to-Image (TTI) functionalities within the LollmsApplication framework. The base class `LollmsTTI` is intended to be inherited and implemented by other classes that provide specific TTI functionalities.
+
+Author: ParisNeo, a computer geek passionate about AI
+"""
+
+from lollms.app import LollmsApplication
+from pathlib import Path
+from typing import List, Dict
+
+class LollmsTTI:
+    """
+    LollmsTTI is a base class for implementing Text-to-Image (TTI) functionalities within the LollmsApplication.
+    
+    Attributes:
+        app (LollmsApplication): The instance of the main Lollms application.
+        model (str): The TTI model to be used for image generation.
+        api_key (str): API key for accessing external TTI services (if needed).
+        output_path (Path or str): Path where the output image files will be saved.
+        voices (List[str]): List of available voices for TTI (to be filled by the child class).
+        models (List[str]): List of available models for TTI (to be filled by the child class).
+    """
+    
+    def __init__(
+                    self, 
+                    app: LollmsApplication, 
+                    model="",
+                    api_key="",
+                    output_path=None
+                    ):
+        """
+        Initializes the LollmsTTI class with the given parameters.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+            model (str, optional): The TTI model to be used for image generation. Defaults to an empty string.
+            api_key (str, optional): API key for accessing external TTI services. Defaults to an empty string.
+            output_path (Path or str, optional): Path where the output image files will be saved. Defaults to None.
+        """
+        self.ready = False
+        self.app = app
+        self.model = model
+        self.api_key = api_key
+        self.output_path = output_path
+        self.voices = [] # To be filled by the child class
+        self.models = [] # To be filled by the child class
+
+    def paint(self, positive_prompt: str, negative_prompt: str = "") -> List[Dict[str, str]]:
+        """
+        Generates images based on the given positive and negative prompts.
+
+        Args:
+            positive_prompt (str): The positive prompt describing the desired image.
+            negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
+
+        Returns:
+            List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
+        """
+        pass
+
+    def paint_from_images(self, positive_prompt: str, images: List[str], negative_prompt: str = "") -> List[Dict[str, str]]:
+        """
+        Generates images based on the given positive prompt and reference images.
+
+        Args:
+            positive_prompt (str): The positive prompt describing the desired image.
+            images (List[str]): A list of paths to reference images.
+            negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
+
+        Returns:
+            List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
+        """
+        pass
+
+    @staticmethod
+    def verify(app: LollmsApplication) -> bool:
+        """
+        Verifies if the TTI service is available.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            bool: True if the service is available, False otherwise.
+        """
+        return True
+
+    @staticmethod
+    def install(app: LollmsApplication) -> bool:
+        """
+        Installs the necessary components for the TTI service.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            bool: True if the installation was successful, False otherwise.
+        """
+        return True
+    
+    @staticmethod 
+    def get(app: LollmsApplication) -> 'LollmsTTI':
+        """
+        Returns the LollmsTTI class.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            LollmsTTI: The LollmsTTI class.
+        """
+        return LollmsTTI
--- a/lollms/tts.py
+++ b/lollms/tts.py
@ -0,0 +1,122 @@
+"""
+Lollms TTS Module
+=================
+
+This module is part of the Lollms library, designed to provide Text-to-Speech (TTS) functionalities within the LollmsApplication framework. The base class `LollmsTTS` is intended to be inherited and implemented by other classes that provide specific TTS functionalities.
+
+Author: ParisNeo, a computer geek passionate about AI
+"""
+from lollms.app import LollmsApplication
+from pathlib import Path
+
+class LollmsTTS:
+    """
+    LollmsTTS is a base class for implementing Text-to-Speech (TTS) functionalities within the LollmsApplication.
+    
+    Attributes:
+        app (LollmsApplication): The instance of the main Lollms application.
+        voice (str): The voice model to be used for TTS.
+        api_key (str): API key for accessing external TTS services (if needed).
+        output_path (Path or str): Path where the output audio files will be saved.
+    """
+    
+    def __init__(
+                    self, 
+                    app: LollmsApplication, 
+                    model="",
+                    voice="",
+                    api_key="",
+                    output_path=None
+                    ):
+        """
+        Initializes the LollmsTTS class with the given parameters.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+            model (str, optional): The speach generation model to be used for TTS. Defaults to "".
+            voice (str, optional): The voice model to be used for TTS. Defaults to "alloy".
+            api_key (str, optional): API key for accessing external TTS services. Defaults to an empty string.
+            output_path (Path or str, optional): Path where the output audio files will be saved. Defaults to None.
+        """
+        self.ready = False
+        self.app = app
+        self.model = model
+        self.voice = voice
+        self.api_key = api_key
+        self.output_path = output_path
+        self.voices = [] # To be filled by the child class
+        self.models = [] # To be filled by the child class
+
+    def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
+        """
+        Converts the given text to speech and saves it to a file.
+
+        Args:
+            text (str): The text to be converted to speech.
+            speaker (str): The speaker/voice model to be used.
+            file_name_or_path (Path or str): The name or path of the output file.
+            language (str, optional): The language of the text. Defaults to "en".
+        """
+        pass
+
+    def tts_to_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
+        """
+        Converts the given text to speech and returns the audio data.
+
+        Args:
+            text (str): The text to be converted to speech.
+            speaker (str): The speaker/voice model to be used.
+            file_name_or_path (Path or str, optional): The name or path of the output file. Defaults to None.
+            language (str, optional): The language of the text. Defaults to "en".
+            use_threading (bool, optional): Whether to use threading for the operation. Defaults to False.
+        """
+        pass
+
+
+    @staticmethod
+    def verify(app: LollmsApplication) -> bool:
+        """
+        Verifies if the TTS service is available.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            bool: True if the service is available, False otherwise.
+        """
+        return True
+
+    @staticmethod
+    def install(app: LollmsApplication) -> bool:
+        """
+        Installs the necessary components for the TTS service.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            bool: True if the installation was successful, False otherwise.
+        """
+        return True
+    
+    @staticmethod 
+    def get(app: LollmsApplication) -> 'LollmsTTS':
+        """
+        Returns the LollmsTTS class.
+
+        Args:
+            app (LollmsApplication): The instance of the main Lollms application.
+
+        Returns:
+            LollmsTTS: The LollmsTTS class.
+        """
+        return LollmsTTS
+    
+    def get_voices(self):
+        """
+        Retrieves the available voices for TTS.
+
+        Returns:
+            list: A list of available voices.
+        """
+        return self.voices