diff --git a/configs/config.yaml b/configs/config.yaml index f89589a..d214739 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 98 +version: 99 binding_name: null model_name: null model_variant: null @@ -85,7 +85,7 @@ copy_to_clipboard_add_all_details: false active_tts_service: "None" # xtts (offline), openai_tts (API key required) active_tti_service: "None" # autosd (offline), dall-e (online) active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required) - +active_ttm_service: "None" # musicgen (offline) # -------------------- Services -------------------------- # ***************** STT ***************** diff --git a/lollms/app.py b/lollms/app.py index 1ccb843..9a5732e 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -320,7 +320,7 @@ class LollmsApplication(LoLLMsCom): self.tts = self.xtts if self.config.active_stt_service == "openai_whisper": - from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper + from lollms.services.openai_whisper.lollms_openai_whisper import LollmsOpenAIWhisper self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key) elif self.config.active_stt_service == "whisper": from lollms.services.whisper.lollms_whisper import LollmsWhisper @@ -413,7 +413,7 @@ class LollmsApplication(LoLLMsCom): self.tts = self.xtts if self.config.active_stt_service == "openai_whisper": - from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper + from lollms.services.openai_whisper.lollms_openai_whisper import LollmsOpenAIWhisper self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key) elif self.config.active_stt_service == "whisper": from lollms.services.whisper.lollms_whisper import LollmsWhisper diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml index f89589a..d214739 100644 --- a/lollms/configs/config.yaml +++ b/lollms/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 98 +version: 99 binding_name: null model_name: null model_variant: null @@ -85,7 +85,7 @@ copy_to_clipboard_add_all_details: false active_tts_service: "None" # xtts (offline), openai_tts (API key required) active_tti_service: "None" # autosd (offline), dall-e (online) active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required) - +active_ttm_service: "None" # musicgen (offline) # -------------------- Services -------------------------- # ***************** STT ***************** diff --git a/lollms/server/endpoints/lollms_xtts.py b/lollms/server/endpoints/lollms_tts.py similarity index 95% rename from lollms/server/endpoints/lollms_xtts.py rename to lollms/server/endpoints/lollms_tts.py index fdecf60..96691ed 100644 --- a/lollms/server/endpoints/lollms_xtts.py +++ b/lollms/server/endpoints/lollms_tts.py @@ -42,6 +42,18 @@ def list_voices(): ASCIIColors.yellow("Listing voices") return {"voices":lollmsElfServer.tts.get_voices()} +@router.get("/list_stt_models") +def list_stt_models(): + if lollmsElfServer.config.headless_server_mode: + return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"} + + if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1": + return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"} + + ASCIIColors.yellow("Listing voices") + return {"voices":lollmsElfServer.stt.get_models()} + + @router.post("/set_voice") async def set_voice(request: Request): """ @@ -70,7 +82,7 @@ async def set_voice(request: Request): class LollmsAudio2TextRequest(BaseModel): wave_file_path: str - voice: str = None + model: str = None fn:str = None @router.post("/audio2text") diff --git a/lollms/services/openai_whisper/lollms_whisper.py b/lollms/services/openai_whisper/lollms_openai_whisper.py similarity index 88% rename from lollms/services/openai_whisper/lollms_whisper.py rename to lollms/services/openai_whisper/lollms_openai_whisper.py index d2aeef2..659e3ad 100644 --- a/lollms/services/openai_whisper/lollms_whisper.py +++ b/lollms/services/openai_whisper/lollms_openai_whisper.py @@ -26,6 +26,7 @@ from typing import List, Dict, Any from ascii_colors import ASCIIColors, trace_exception from lollms.paths import LollmsPaths from lollms.utilities import PackageManager, find_next_available_filename +from lollms.stt import LollmsSTT import subprocess import shutil from tqdm import tqdm @@ -34,10 +35,7 @@ from io import BytesIO from openai import OpenAI -def get_Whisper(lollms_paths:LollmsPaths): - return LollmsOpenAIWhisper - -class LollmsOpenAIWhisper: +class LollmsOpenAIWhisper(LollmsSTT): def __init__( self, app:LollmsApplication, @@ -45,10 +43,8 @@ class LollmsOpenAIWhisper: api_key="", output_path=None ): + super().__init__(app, model, output_path) self.client = OpenAI(api_key=api_key) - self.app = app - self.model = model - self.output_path = output_path self.ready = True def transcribe( @@ -67,4 +63,4 @@ class LollmsOpenAIWhisper: file=audio_file, response_format="text" ) - return transcription + return transcription \ No newline at end of file diff --git a/lollms/services/whisper/lollms_whisper.py b/lollms/services/whisper/lollms_whisper.py index 3ec0a7c..f5c49f4 100644 --- a/lollms/services/whisper/lollms_whisper.py +++ b/lollms/services/whisper/lollms_whisper.py @@ -30,13 +30,13 @@ class LollmsWhisper(LollmsSTT): model="small", output_path=None ): - self.app = app - self.output_path = output_path + super().__init__(app, model, output_path) self.whisper = whisper.load_model(model) + self.ready = True def transcribe( self, - wav_path: str|Path + wave_path: str|Path ): - result = self.whisper.transcribe(str(wav_path)) + result = self.whisper.transcribe(str(wave_path)) return result diff --git a/lollms/services/xtts/log.txt b/lollms/services/xtts/log.txt deleted file mode 100644 index e69de29..0000000 diff --git a/lollms/stt.py b/lollms/stt.py index 56ff0b4..070460b 100644 --- a/lollms/stt.py +++ b/lollms/stt.py @@ -24,7 +24,8 @@ class LollmsSTT: self, app: LollmsApplication, model="", - output_path=None + output_path=None, + models=[] ): """ Initializes the LollmsSTT class with the given parameters. @@ -38,6 +39,7 @@ class LollmsSTT: self.app = app self.output_path = output_path self.model = model + self.models = models def transcribe( self, @@ -52,7 +54,10 @@ class LollmsSTT: prompt (str, optional): An optional prompt to guide the transcription. Defaults to an empty string. """ pass - + + def get_models(self): + return self.models + @staticmethod def verify(app: LollmsApplication) -> bool: """ @@ -90,4 +95,4 @@ class LollmsSTT: Returns: LollmsSTT: The LollmsSTT class. """ - return LollmsSTT \ No newline at end of file + return LollmsSTT