mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
upgraded
This commit is contained in:
parent
42ebabfe0d
commit
397d21a3be
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||||
version: 96
|
version: 98
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
model_variant: null
|
model_variant: null
|
||||||
@ -80,10 +80,30 @@ auto_show_browser: true
|
|||||||
# copy to clipboard
|
# copy to clipboard
|
||||||
copy_to_clipboard_add_all_details: false
|
copy_to_clipboard_add_all_details: false
|
||||||
|
|
||||||
|
# -------------------- Services global configurations --------------------------
|
||||||
|
# Select the active test to speach, text to image and speach to text services
|
||||||
|
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||||
|
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||||
|
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||||
|
|
||||||
|
# -------------------- Services --------------------------
|
||||||
|
|
||||||
|
# ***************** STT *****************
|
||||||
# STT service
|
# STT service
|
||||||
asr_enable: false
|
asr_enable: false
|
||||||
asr_base_url: http://localhost:9000
|
asr_base_url: http://localhost:9000
|
||||||
|
|
||||||
|
# openai_whisper configuration
|
||||||
|
openai_whisper_key: ""
|
||||||
|
openai_whisper_model: "whisper-1"
|
||||||
|
|
||||||
|
|
||||||
|
# whisper configuration
|
||||||
|
whisper_activate: false
|
||||||
|
whisper_model: base
|
||||||
|
|
||||||
|
|
||||||
|
# ***************** TTS *****************
|
||||||
# Voice service
|
# Voice service
|
||||||
xtts_enable: false
|
xtts_enable: false
|
||||||
xtts_base_url: http://localhost:8020
|
xtts_base_url: http://localhost:8020
|
||||||
@ -101,6 +121,13 @@ xtts_top_p: 0.85
|
|||||||
xtts_speed: 1
|
xtts_speed: 1
|
||||||
xtts_enable_text_splitting: true
|
xtts_enable_text_splitting: true
|
||||||
|
|
||||||
|
# openai_whisper configuration
|
||||||
|
openai_tts_key: ""
|
||||||
|
openai_tts_model: "tts-1"
|
||||||
|
openai_tts_voice: "alloy"
|
||||||
|
|
||||||
|
# ***************** TTI *****************
|
||||||
|
|
||||||
# Image generation service
|
# Image generation service
|
||||||
enable_sd_service: false
|
enable_sd_service: false
|
||||||
sd_base_url: http://localhost:7860
|
sd_base_url: http://localhost:7860
|
||||||
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
|
|||||||
dall_e_key: ""
|
dall_e_key: ""
|
||||||
dall_e_generation_engine: "dall-e-3"
|
dall_e_generation_engine: "dall-e-3"
|
||||||
|
|
||||||
|
# Midjourney service key
|
||||||
|
midjourney_key: ""
|
||||||
|
|
||||||
# Image generation service comfyui
|
# Image generation service comfyui
|
||||||
enable_comfyui_service: false
|
enable_comfyui_service: false
|
||||||
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
|
|||||||
enable_motion_ctrl_service: false
|
enable_motion_ctrl_service: false
|
||||||
motion_ctrl_base_url: http://localhost:7861
|
motion_ctrl_base_url: http://localhost:7861
|
||||||
|
|
||||||
|
# ***************** TTT *****************
|
||||||
|
|
||||||
# ollama service
|
# ollama service
|
||||||
enable_ollama_service: false
|
enable_ollama_service: false
|
||||||
ollama_base_url: http://localhost:11434
|
ollama_base_url: http://localhost:11434
|
||||||
@ -202,6 +233,3 @@ show_code_of_conduct: true
|
|||||||
activate_audio_infos: true
|
activate_audio_infos: true
|
||||||
|
|
||||||
|
|
||||||
# whisper configuration
|
|
||||||
whisper_activate: false
|
|
||||||
whisper_model: base
|
|
150
lollms/app.py
150
lollms/app.py
@ -221,7 +221,21 @@ class LollmsApplication(LoLLMsCom):
|
|||||||
def get_uploads_path(self, client_id):
|
def get_uploads_path(self, client_id):
|
||||||
return self.lollms_paths.personal_uploads_path
|
return self.lollms_paths.personal_uploads_path
|
||||||
|
|
||||||
def start_servers( self ):
|
def start_servers(self):
|
||||||
|
self.ollama = None
|
||||||
|
self.vllm = None
|
||||||
|
self.whisper = None
|
||||||
|
self.xtts = None
|
||||||
|
self.sd = None
|
||||||
|
self.comfyui = None
|
||||||
|
self.motion_ctrl = None
|
||||||
|
|
||||||
|
self.tti = None
|
||||||
|
self.tts = None
|
||||||
|
self.stt = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if self.config.enable_ollama_service:
|
if self.config.enable_ollama_service:
|
||||||
try:
|
try:
|
||||||
from lollms.services.ollama.lollms_ollama import Service
|
from lollms.services.ollama.lollms_ollama import Service
|
||||||
@ -240,13 +254,11 @@ class LollmsApplication(LoLLMsCom):
|
|||||||
|
|
||||||
if self.config.whisper_activate:
|
if self.config.whisper_activate:
|
||||||
try:
|
try:
|
||||||
from lollms.media import AudioRecorder
|
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||||
self.rec = AudioRecorder(self.lollms_paths.personal_outputs_path/"test.wav")
|
self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
|
||||||
self.rec.start_recording()
|
except Exception as ex:
|
||||||
time.sleep(1)
|
trace_exception(ex)
|
||||||
self.rec.stop_recording()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
if self.config.xtts_enable:
|
if self.config.xtts_enable:
|
||||||
try:
|
try:
|
||||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||||
@ -256,7 +268,7 @@ class LollmsApplication(LoLLMsCom):
|
|||||||
else:
|
else:
|
||||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||||
|
|
||||||
self.tts = LollmsXTTS(
|
self.xtts = LollmsXTTS(
|
||||||
self,
|
self,
|
||||||
voices_folder=voices_folder,
|
voices_folder=voices_folder,
|
||||||
voice_samples_path=self.lollms_paths.custom_voices_path,
|
voice_samples_path=self.lollms_paths.custom_voices_path,
|
||||||
@ -291,6 +303,126 @@ class LollmsApplication(LoLLMsCom):
|
|||||||
self.warning(f"Couldn't load Motion control")
|
self.warning(f"Couldn't load Motion control")
|
||||||
|
|
||||||
|
|
||||||
|
if self.config.active_tti_service == "autosd":
|
||||||
|
from lollms.services.sd.lollms_sd import LollmsSD
|
||||||
|
self.tti = LollmsSD(self)
|
||||||
|
elif self.config.active_tti_service == "dall-e":
|
||||||
|
from lollms.services.dalle.lollms_dalle import LollmsDalle
|
||||||
|
self.tti = LollmsDalle(self, self.config.dall_e_key)
|
||||||
|
elif self.config.active_tti_service == "midjourney":
|
||||||
|
from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
|
||||||
|
self.tti = LollmsMidjourney(self, self.config.midjourney_key)
|
||||||
|
|
||||||
|
if self.config.active_tts_service == "openai_tts":
|
||||||
|
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||||
|
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||||
|
elif self.config.active_tts_service == "xtts" and self.xtts:
|
||||||
|
self.tts = self.xtts
|
||||||
|
|
||||||
|
if self.config.active_stt_service == "openai_whisper":
|
||||||
|
from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
|
||||||
|
self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
|
||||||
|
elif self.config.active_stt_service == "whisper":
|
||||||
|
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||||
|
self.stt = LollmsWhisper(self, self.config.whisper_model)
|
||||||
|
|
||||||
|
|
||||||
|
def verify_servers(self, reload_all=False):
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.config.enable_ollama_service and self.ollama is None:
|
||||||
|
try:
|
||||||
|
from lollms.services.ollama.lollms_ollama import Service
|
||||||
|
self.ollama = Service(self, base_url=self.config.ollama_base_url)
|
||||||
|
except Exception as ex:
|
||||||
|
trace_exception(ex)
|
||||||
|
self.warning(f"Couldn't load Ollama")
|
||||||
|
|
||||||
|
if self.config.enable_vllm_service and self.vllm is None:
|
||||||
|
try:
|
||||||
|
from lollms.services.vllm.lollms_vllm import Service
|
||||||
|
self.vllm = Service(self, base_url=self.config.vllm_url)
|
||||||
|
except Exception as ex:
|
||||||
|
trace_exception(ex)
|
||||||
|
self.warning(f"Couldn't load vllm")
|
||||||
|
|
||||||
|
if self.config.whisper_activate and self.whisper is None:
|
||||||
|
try:
|
||||||
|
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||||
|
self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
|
||||||
|
except Exception as ex:
|
||||||
|
trace_exception(ex)
|
||||||
|
if self.config.xtts_enable and self.xtts is None:
|
||||||
|
try:
|
||||||
|
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||||
|
voice=self.config.xtts_current_voice
|
||||||
|
if voice!="main_voice":
|
||||||
|
voices_folder = self.lollms_paths.custom_voices_path
|
||||||
|
else:
|
||||||
|
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||||
|
|
||||||
|
self.xtts = LollmsXTTS(
|
||||||
|
self,
|
||||||
|
voices_folder=voices_folder,
|
||||||
|
voice_samples_path=self.lollms_paths.custom_voices_path,
|
||||||
|
xtts_base_url=self.config.xtts_base_url,
|
||||||
|
wait_for_service=False,
|
||||||
|
use_deep_speed=self.config.xtts_use_deepspeed,
|
||||||
|
use_streaming_mode=self.config.xtts_use_streaming_mode
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
self.warning(f"Couldn't load XTTS")
|
||||||
|
|
||||||
|
if self.config.enable_sd_service and self.sd is None:
|
||||||
|
try:
|
||||||
|
from lollms.services.sd.lollms_sd import LollmsSD
|
||||||
|
self.sd = LollmsSD(self, auto_sd_base_url=self.config.sd_base_url)
|
||||||
|
except:
|
||||||
|
self.warning(f"Couldn't load SD")
|
||||||
|
|
||||||
|
if self.config.enable_comfyui_service and self.comfyui is None:
|
||||||
|
try:
|
||||||
|
from lollms.services.comfyui.lollms_comfyui import LollmsComfyUI
|
||||||
|
self.comfyui = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
|
||||||
|
except:
|
||||||
|
self.warning(f"Couldn't load SD")
|
||||||
|
|
||||||
|
if self.config.enable_motion_ctrl_service and self.motion_ctrl is None:
|
||||||
|
try:
|
||||||
|
from lollms.services.motion_ctrl.lollms_motion_ctrl import Service
|
||||||
|
self.motion_ctrl = Service(self, base_url=self.config.motion_ctrl_base_url)
|
||||||
|
except Exception as ex:
|
||||||
|
trace_exception(ex)
|
||||||
|
self.warning(f"Couldn't load Motion control")
|
||||||
|
|
||||||
|
|
||||||
|
if self.config.active_tti_service == "autosd":
|
||||||
|
from lollms.services.sd.lollms_sd import LollmsSD
|
||||||
|
self.tti = LollmsSD(self)
|
||||||
|
elif self.config.active_tti_service == "dall-e":
|
||||||
|
from lollms.services.dalle.lollms_dalle import LollmsDalle
|
||||||
|
self.tti = LollmsDalle(self, self.config.dall_e_key)
|
||||||
|
elif self.config.active_tti_service == "midjourney":
|
||||||
|
from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
|
||||||
|
self.tti = LollmsMidjourney(self, self.config.midjourney_key)
|
||||||
|
|
||||||
|
if self.config.active_tts_service == "openai_tts":
|
||||||
|
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||||
|
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||||
|
elif self.config.active_stt_service == "xtts" and self.xtts:
|
||||||
|
self.tts = self.xtts
|
||||||
|
|
||||||
|
if self.config.active_stt_service == "openai_whisper":
|
||||||
|
from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
|
||||||
|
self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
|
||||||
|
elif self.config.active_stt_service == "whisper":
|
||||||
|
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||||
|
self.stt = LollmsWhisper(self, self.config.whisper_model)
|
||||||
|
|
||||||
|
except Exception as ex:
|
||||||
|
trace_exception(ex)
|
||||||
|
|
||||||
|
|
||||||
def build_long_term_skills_memory(self):
|
def build_long_term_skills_memory(self):
|
||||||
discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
|
discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
|
||||||
discussion_db_name.mkdir(exist_ok=True, parents=True)
|
discussion_db_name.mkdir(exist_ok=True, parents=True)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||||
version: 96
|
version: 98
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
model_variant: null
|
model_variant: null
|
||||||
@ -80,10 +80,30 @@ auto_show_browser: true
|
|||||||
# copy to clipboard
|
# copy to clipboard
|
||||||
copy_to_clipboard_add_all_details: false
|
copy_to_clipboard_add_all_details: false
|
||||||
|
|
||||||
|
# -------------------- Services global configurations --------------------------
|
||||||
|
# Select the active test to speach, text to image and speach to text services
|
||||||
|
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||||
|
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||||
|
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||||
|
|
||||||
|
# -------------------- Services --------------------------
|
||||||
|
|
||||||
|
# ***************** STT *****************
|
||||||
# STT service
|
# STT service
|
||||||
asr_enable: false
|
asr_enable: false
|
||||||
asr_base_url: http://localhost:9000
|
asr_base_url: http://localhost:9000
|
||||||
|
|
||||||
|
# openai_whisper configuration
|
||||||
|
openai_whisper_key: ""
|
||||||
|
openai_whisper_model: "whisper-1"
|
||||||
|
|
||||||
|
|
||||||
|
# whisper configuration
|
||||||
|
whisper_activate: false
|
||||||
|
whisper_model: base
|
||||||
|
|
||||||
|
|
||||||
|
# ***************** TTS *****************
|
||||||
# Voice service
|
# Voice service
|
||||||
xtts_enable: false
|
xtts_enable: false
|
||||||
xtts_base_url: http://localhost:8020
|
xtts_base_url: http://localhost:8020
|
||||||
@ -101,6 +121,13 @@ xtts_top_p: 0.85
|
|||||||
xtts_speed: 1
|
xtts_speed: 1
|
||||||
xtts_enable_text_splitting: true
|
xtts_enable_text_splitting: true
|
||||||
|
|
||||||
|
# openai_whisper configuration
|
||||||
|
openai_tts_key: ""
|
||||||
|
openai_tts_model: "tts-1"
|
||||||
|
openai_tts_voice: "alloy"
|
||||||
|
|
||||||
|
# ***************** TTI *****************
|
||||||
|
|
||||||
# Image generation service
|
# Image generation service
|
||||||
enable_sd_service: false
|
enable_sd_service: false
|
||||||
sd_base_url: http://localhost:7860
|
sd_base_url: http://localhost:7860
|
||||||
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
|
|||||||
dall_e_key: ""
|
dall_e_key: ""
|
||||||
dall_e_generation_engine: "dall-e-3"
|
dall_e_generation_engine: "dall-e-3"
|
||||||
|
|
||||||
|
# Midjourney service key
|
||||||
|
midjourney_key: ""
|
||||||
|
|
||||||
# Image generation service comfyui
|
# Image generation service comfyui
|
||||||
enable_comfyui_service: false
|
enable_comfyui_service: false
|
||||||
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
|
|||||||
enable_motion_ctrl_service: false
|
enable_motion_ctrl_service: false
|
||||||
motion_ctrl_base_url: http://localhost:7861
|
motion_ctrl_base_url: http://localhost:7861
|
||||||
|
|
||||||
|
# ***************** TTT *****************
|
||||||
|
|
||||||
# ollama service
|
# ollama service
|
||||||
enable_ollama_service: false
|
enable_ollama_service: false
|
||||||
ollama_base_url: http://localhost:11434
|
ollama_base_url: http://localhost:11434
|
||||||
@ -202,6 +233,3 @@ show_code_of_conduct: true
|
|||||||
activate_audio_infos: true
|
activate_audio_infos: true
|
||||||
|
|
||||||
|
|
||||||
# whisper configuration
|
|
||||||
whisper_activate: false
|
|
||||||
whisper_model: base
|
|
@ -186,6 +186,7 @@ async def apply_settings(request: Request):
|
|||||||
lollmsElfServer.config.config[key] = config.get(key, lollmsElfServer.config.config[key])
|
lollmsElfServer.config.config[key] = config.get(key, lollmsElfServer.config.config[key])
|
||||||
ASCIIColors.success("OK")
|
ASCIIColors.success("OK")
|
||||||
lollmsElfServer.rebuild_personalities()
|
lollmsElfServer.rebuild_personalities()
|
||||||
|
lollmsElfServer.verify_servers()
|
||||||
if lollmsElfServer.config.auto_save:
|
if lollmsElfServer.config.auto_save:
|
||||||
lollmsElfServer.config.save_config()
|
lollmsElfServer.config.save_config()
|
||||||
return {"status":True}
|
return {"status":True}
|
||||||
|
@ -92,8 +92,8 @@ def start_sd(data: Identification):
|
|||||||
return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
|
return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
|
||||||
|
|
||||||
lollmsElfServer.ShowBlockingMessage("Starting SD api server\nPlease stand by")
|
lollmsElfServer.ShowBlockingMessage("Starting SD api server\nPlease stand by")
|
||||||
from lollms.services.sd.lollms_sd import get_sd
|
from lollms.services.sd.lollms_sd import LollmsSD
|
||||||
lollmsElfServer.sd = get_sd(lollmsElfServer.lollms_paths)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
|
lollmsElfServer.sd = LollmsSD.get(lollmsElfServer)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
|
||||||
ASCIIColors.success("Done")
|
ASCIIColors.success("Done")
|
||||||
lollmsElfServer.HideBlockingMessage()
|
lollmsElfServer.HideBlockingMessage()
|
||||||
return {"status":True}
|
return {"status":True}
|
||||||
|
@ -8,6 +8,7 @@ description:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
from fastapi import APIRouter, Request, UploadFile, File, HTTPException
|
from fastapi import APIRouter, Request, UploadFile, File, HTTPException
|
||||||
|
from fastapi.responses import PlainTextResponse
|
||||||
from lollms_webui import LOLLMSWebUI
|
from lollms_webui import LOLLMSWebUI
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from starlette.responses import StreamingResponse
|
from starlette.responses import StreamingResponse
|
||||||
@ -39,10 +40,7 @@ def list_voices():
|
|||||||
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
||||||
|
|
||||||
ASCIIColors.yellow("Listing voices")
|
ASCIIColors.yellow("Listing voices")
|
||||||
voices=["main_voice"]
|
return {"voices":lollmsElfServer.tts.get_voices()}
|
||||||
voices_dir:Path=lollmsElfServer.lollms_paths.custom_voices_path
|
|
||||||
voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
|
|
||||||
return {"voices":voices}
|
|
||||||
|
|
||||||
@router.post("/set_voice")
|
@router.post("/set_voice")
|
||||||
async def set_voice(request: Request):
|
async def set_voice(request: Request):
|
||||||
@ -70,6 +68,24 @@ async def set_voice(request: Request):
|
|||||||
return {"status":False,"error":str(ex)}
|
return {"status":False,"error":str(ex)}
|
||||||
|
|
||||||
|
|
||||||
|
class LollmsAudio2TextRequest(BaseModel):
|
||||||
|
wave_file_path: str
|
||||||
|
voice: str = None
|
||||||
|
fn:str = None
|
||||||
|
|
||||||
|
@router.post("/audio2text")
|
||||||
|
async def audio2text(request: LollmsAudio2TextRequest):
|
||||||
|
if lollmsElfServer.config.headless_server_mode:
|
||||||
|
return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
|
||||||
|
|
||||||
|
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
|
||||||
|
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
||||||
|
|
||||||
|
result = lollmsElfServer.whisper.transcribe(str(request.wave_file_path))
|
||||||
|
return PlainTextResponse(result)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LollmsText2AudioRequest(BaseModel):
|
class LollmsText2AudioRequest(BaseModel):
|
||||||
text: str
|
text: str
|
||||||
voice: str = None
|
voice: str = None
|
||||||
@ -94,67 +110,13 @@ async def text2Audio(request: LollmsText2AudioRequest):
|
|||||||
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
|
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get the JSON data from the POST request.
|
if lollmsElfServer.tts is None:
|
||||||
try:
|
return {"url": None, "error":f"No TTS service is on"}
|
||||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
if lollmsElfServer.tts.ready:
|
||||||
voice=lollmsElfServer.config.xtts_current_voice
|
response = lollmsElfServer.tts.tts_to_audio(request.text, request.voice, file_name_or_path=request.fn)
|
||||||
if lollmsElfServer.tts is None:
|
return response
|
||||||
voice=lollmsElfServer.config.xtts_current_voice
|
else:
|
||||||
if voice!="main_voice":
|
return {"url": None, "error":f"TTS service is not ready yet"}
|
||||||
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
|
|
||||||
else:
|
|
||||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
|
||||||
|
|
||||||
lollmsElfServer.tts = LollmsXTTS(
|
|
||||||
lollmsElfServer,
|
|
||||||
voices_folder=voices_folder,
|
|
||||||
voice_samples_path=Path(__file__).parent/"voices",
|
|
||||||
xtts_base_url= lollmsElfServer.config.xtts_base_url,
|
|
||||||
use_deep_speed= lollmsElfServer.config.xtts_use_deep_speed,
|
|
||||||
use_streaming_mode= lollmsElfServer.config.xtts_use_streaming_mode,
|
|
||||||
)
|
|
||||||
except Exception as ex:
|
|
||||||
return {"url": None, "error":f"{ex}"}
|
|
||||||
|
|
||||||
voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
|
|
||||||
index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
|
|
||||||
output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
|
|
||||||
if voice is None:
|
|
||||||
voice = "main_voice"
|
|
||||||
lollmsElfServer.info("Starting to build voice")
|
|
||||||
try:
|
|
||||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
|
||||||
# If the personality has a voice, then use it
|
|
||||||
personality_audio:Path = lollmsElfServer.personality.personality_package_path/"audio"
|
|
||||||
if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
|
|
||||||
voices_folder = personality_audio
|
|
||||||
elif voice!="main_voice":
|
|
||||||
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
|
|
||||||
else:
|
|
||||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
|
||||||
if lollmsElfServer.tts is None:
|
|
||||||
lollmsElfServer.tts = LollmsXTTS(
|
|
||||||
lollmsElfServer,
|
|
||||||
voices_folder=voices_folder,
|
|
||||||
voice_samples_path=Path(__file__).parent/"voices",
|
|
||||||
xtts_base_url= lollmsElfServer.config.xtts_base_url,
|
|
||||||
use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
|
|
||||||
use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode
|
|
||||||
)
|
|
||||||
if lollmsElfServer.tts.ready:
|
|
||||||
language = lollmsElfServer.config.xtts_current_language# convert_language_name()
|
|
||||||
lollmsElfServer.tts.set_speaker_folder(voices_folder)
|
|
||||||
preprocessed_text= add_period(request.text)
|
|
||||||
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
|
||||||
if len(voice_file)==0:
|
|
||||||
return {"status":False,"error":"Voice not found"}
|
|
||||||
lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
|
||||||
else:
|
|
||||||
lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.")
|
|
||||||
return {"status":False, "error":"Service not ready yet"}
|
|
||||||
except Exception as ex:
|
|
||||||
trace_exception(ex)
|
|
||||||
return {"url": None}
|
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
trace_exception(ex)
|
trace_exception(ex)
|
||||||
lollmsElfServer.error(ex)
|
lollmsElfServer.error(ex)
|
||||||
@ -255,9 +217,9 @@ def install_xtts(data:Identification):
|
|||||||
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
|
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
|
||||||
return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
|
return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
|
||||||
|
|
||||||
from lollms.services.xtts.lollms_xtts import install_xtts
|
from lollms.services.xtts.lollms_xtts import LollmsTTS
|
||||||
lollmsElfServer.ShowBlockingMessage("Installing xTTS api server\nPlease stand by")
|
lollmsElfServer.ShowBlockingMessage("Installing xTTS api server\nPlease stand by")
|
||||||
install_xtts(lollmsElfServer)
|
LollmsTTS.install(lollmsElfServer)
|
||||||
lollmsElfServer.HideBlockingMessage()
|
lollmsElfServer.HideBlockingMessage()
|
||||||
return {"status":True}
|
return {"status":True}
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
|
@ -1,10 +1,7 @@
|
|||||||
# Title LollmsDalle
|
# Title LollmsDalle
|
||||||
# Licence: MIT
|
# Licence: Apache 2.0
|
||||||
# Author : Paris Neo
|
# Author : Paris Neo
|
||||||
# Adapted from the work of mix1009's sdwebuiapi
|
|
||||||
# check it out : https://github.com/mix1009/sdwebuiapi/tree/main
|
|
||||||
# Here is a copy of the LICENCE https://github.com/mix1009/sdwebuiapi/blob/main/LICENSE
|
|
||||||
# All rights are reserved
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
@ -29,17 +26,16 @@ from typing import List, Dict, Any
|
|||||||
from ascii_colors import ASCIIColors, trace_exception
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
from lollms.paths import LollmsPaths
|
from lollms.paths import LollmsPaths
|
||||||
from lollms.utilities import PackageManager, find_next_available_filename
|
from lollms.utilities import PackageManager, find_next_available_filename
|
||||||
|
from lollms.tti import LollmsTTI
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
import shutil
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import threading
|
import threading
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
def get_Dalli(lollms_paths:LollmsPaths):
|
|
||||||
return LollmsDalle
|
|
||||||
|
|
||||||
class LollmsDalle:
|
|
||||||
has_controlnet = False
|
class LollmsDalle(LollmsTTI):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
app:LollmsApplication,
|
app:LollmsApplication,
|
||||||
@ -47,7 +43,7 @@ class LollmsDalle:
|
|||||||
generation_engine="dall-e-3",# other possibility "dall-e-2"
|
generation_engine="dall-e-3",# other possibility "dall-e-2"
|
||||||
output_path=None
|
output_path=None
|
||||||
):
|
):
|
||||||
self.app = app
|
super().__init__(app)
|
||||||
self.key = key
|
self.key = key
|
||||||
self.generation_engine = generation_engine
|
self.generation_engine = generation_engine
|
||||||
self.output_path = output_path
|
self.output_path = output_path
|
||||||
@ -141,3 +137,6 @@ class LollmsDalle:
|
|||||||
ASCIIColors.red("Failed to download the image")
|
ASCIIColors.red("Failed to download the image")
|
||||||
|
|
||||||
return file_name
|
return file_name
|
||||||
|
@staticmethod
|
||||||
|
def get(app:LollmsApplication):
|
||||||
|
return LollmsDalle
|
142
lollms/services/midjourney/lollms_midjourney.py
Normal file
142
lollms/services/midjourney/lollms_midjourney.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
# Title LollmsMidjourney
|
||||||
|
# Licence: Apache 2.0
|
||||||
|
# Author : Paris Neo
|
||||||
|
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||||
|
import time
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import base64
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import platform
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from PIL import Image, PngImagePlugin
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.utilities import PackageManager, find_next_available_filename
|
||||||
|
from lollms.tti import LollmsTTI
|
||||||
|
import subprocess
|
||||||
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
import threading
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LollmsMidjourney(LollmsTTI):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app:LollmsApplication,
|
||||||
|
key="",
|
||||||
|
generation_engine="dall-e-3",# other possibility "dall-e-2"
|
||||||
|
output_path=None
|
||||||
|
):
|
||||||
|
super().__init__(app)
|
||||||
|
self.key = key
|
||||||
|
self.generation_engine = generation_engine
|
||||||
|
self.output_path = output_path
|
||||||
|
|
||||||
|
def paint(
|
||||||
|
self,
|
||||||
|
prompt,
|
||||||
|
width=512,
|
||||||
|
height=512,
|
||||||
|
images = [],
|
||||||
|
generation_engine=None,
|
||||||
|
output_path = None
|
||||||
|
):
|
||||||
|
if output_path is None:
|
||||||
|
output_path = self.output_path
|
||||||
|
if generation_engine is None:
|
||||||
|
generation_engine = self.generation_engine
|
||||||
|
if not PackageManager.check_package_installed("openai"):
|
||||||
|
PackageManager.install_package("openai")
|
||||||
|
import openai
|
||||||
|
openai.api_key = self.key
|
||||||
|
if generation_engine=="dall-e-2":
|
||||||
|
supported_resolutions = [
|
||||||
|
[512, 512],
|
||||||
|
[1024, 1024],
|
||||||
|
]
|
||||||
|
# Find the closest resolution
|
||||||
|
closest_resolution = min(supported_resolutions, key=lambda res: abs(res[0] - width) + abs(res[1] - height))
|
||||||
|
|
||||||
|
else:
|
||||||
|
supported_resolutions = [
|
||||||
|
[1024, 1024],
|
||||||
|
[1024, 1792],
|
||||||
|
[1792, 1024]
|
||||||
|
]
|
||||||
|
# Find the closest resolution
|
||||||
|
if width>height:
|
||||||
|
closest_resolution = [1792, 1024]
|
||||||
|
elif width<height:
|
||||||
|
closest_resolution = [1024, 1792]
|
||||||
|
else:
|
||||||
|
closest_resolution = [1024, 1024]
|
||||||
|
|
||||||
|
|
||||||
|
# Update the width and height
|
||||||
|
width = closest_resolution[0]
|
||||||
|
height = closest_resolution[1]
|
||||||
|
|
||||||
|
if len(images)>0 and generation_engine=="dall-e-2":
|
||||||
|
# Read the image file from disk and resize it
|
||||||
|
image = Image.open(self.personality.image_files[0])
|
||||||
|
width, height = width, height
|
||||||
|
image = image.resize((width, height))
|
||||||
|
|
||||||
|
# Convert the image to a BytesIO object
|
||||||
|
byte_stream = BytesIO()
|
||||||
|
image.save(byte_stream, format='PNG')
|
||||||
|
byte_array = byte_stream.getvalue()
|
||||||
|
response = openai.images.create_variation(
|
||||||
|
image=byte_array,
|
||||||
|
n=1,
|
||||||
|
model=generation_engine, # for now only dalle 2 supports variations
|
||||||
|
size=f"{width}x{height}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = openai.images.generate(
|
||||||
|
model=generation_engine,
|
||||||
|
prompt=prompt.strip(),
|
||||||
|
quality="standard",
|
||||||
|
size=f"{width}x{height}",
|
||||||
|
n=1,
|
||||||
|
|
||||||
|
)
|
||||||
|
# download image to outputs
|
||||||
|
output_dir = Path(output_path)
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
image_url = response.data[0].url
|
||||||
|
|
||||||
|
# Get the image data from the URL
|
||||||
|
response = requests.get(image_url)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
# Generate the full path for the image file
|
||||||
|
file_name = output_dir/find_next_available_filename(output_dir, "img_dalle_") # You can change the filename if needed
|
||||||
|
|
||||||
|
# Save the image to the specified folder
|
||||||
|
with open(file_name, "wb") as file:
|
||||||
|
file.write(response.content)
|
||||||
|
ASCIIColors.yellow(f"Image saved to {file_name}")
|
||||||
|
else:
|
||||||
|
ASCIIColors.red("Failed to download the image")
|
||||||
|
|
||||||
|
return file_name
|
||||||
|
@staticmethod
|
||||||
|
def get(app:LollmsApplication):
|
||||||
|
return LollmsMidjourney
|
108
lollms/services/open_ai_tts/lollms_openai_tts.py
Normal file
108
lollms/services/open_ai_tts/lollms_openai_tts.py
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
# Title LollmsOpenAITTS
|
||||||
|
# Licence: MIT
|
||||||
|
# Author : Paris Neo
|
||||||
|
# Uses open AI api to perform text to speech
|
||||||
|
#
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||||
|
import time
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import base64
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import platform
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from PIL import Image, PngImagePlugin
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.utilities import PackageManager, find_next_available_filename
|
||||||
|
from lollms.tts import LollmsTTS
|
||||||
|
import subprocess
|
||||||
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
import threading
|
||||||
|
from io import BytesIO
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
if not PackageManager.check_package_installed("sounddevice"):
|
||||||
|
PackageManager.install_package("sounddevice")
|
||||||
|
if not PackageManager.check_package_installed("soundfile"):
|
||||||
|
PackageManager.install_package("soundfile")
|
||||||
|
|
||||||
|
import sounddevice as sd
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
def get_Whisper(lollms_paths:LollmsPaths):
|
||||||
|
return LollmsOpenAITTS
|
||||||
|
|
||||||
|
class LollmsOpenAITTS(LollmsTTS):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app:LollmsApplication,
|
||||||
|
model ="tts-1",
|
||||||
|
voice="alloy",
|
||||||
|
api_key="",
|
||||||
|
output_path=None
|
||||||
|
):
|
||||||
|
super().__init__(app, model, voice, api_key, output_path)
|
||||||
|
self.client = OpenAI(api_key=api_key)
|
||||||
|
self.voices = [
|
||||||
|
"alloy",
|
||||||
|
"echo",
|
||||||
|
"fable",
|
||||||
|
"nova",
|
||||||
|
"shimmer"
|
||||||
|
]
|
||||||
|
self.models = [
|
||||||
|
"tts-1"
|
||||||
|
]
|
||||||
|
|
||||||
|
self.voice = voice
|
||||||
|
self.output_path = output_path
|
||||||
|
self.ready = True
|
||||||
|
|
||||||
|
|
||||||
|
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||||
|
speech_file_path = file_name_or_path
|
||||||
|
response = self.client.audio.speech.create(
|
||||||
|
model=self.model,
|
||||||
|
voice=self.voice,
|
||||||
|
input=text,
|
||||||
|
response_format="wav"
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
response.write_to_file(speech_file_path)
|
||||||
|
|
||||||
|
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||||
|
speech_file_path = file_name_or_path
|
||||||
|
response = self.client.audio.speech.create(
|
||||||
|
model=self.model,
|
||||||
|
voice=self.voice,
|
||||||
|
input=text,
|
||||||
|
response_format="wav"
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
response.write_to_file(speech_file_path)
|
||||||
|
def play_audio(file_path):
|
||||||
|
# Read the audio file
|
||||||
|
data, fs = sf.read(file_path, dtype='float32')
|
||||||
|
# Play the audio file
|
||||||
|
sd.play(data, fs)
|
||||||
|
# Wait until the file is done playing
|
||||||
|
sd.wait()
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
play_audio(speech_file_path)
|
70
lollms/services/openai_whisper/lollms_whisper.py
Normal file
70
lollms/services/openai_whisper/lollms_whisper.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
# Title LollmsOpenAIWhisper
|
||||||
|
# Licence: MIT
|
||||||
|
# Author : Paris Neo
|
||||||
|
#
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import sys
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||||
|
import time
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
import base64
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import platform
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from PIL import Image, PngImagePlugin
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.utilities import PackageManager, find_next_available_filename
|
||||||
|
import subprocess
|
||||||
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
import threading
|
||||||
|
from io import BytesIO
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
|
||||||
|
def get_Whisper(lollms_paths:LollmsPaths):
|
||||||
|
return LollmsOpenAIWhisper
|
||||||
|
|
||||||
|
class LollmsOpenAIWhisper:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app:LollmsApplication,
|
||||||
|
model="whisper-1",
|
||||||
|
api_key="",
|
||||||
|
output_path=None
|
||||||
|
):
|
||||||
|
self.client = OpenAI(api_key=api_key)
|
||||||
|
self.app = app
|
||||||
|
self.model = model
|
||||||
|
self.output_path = output_path
|
||||||
|
self.ready = True
|
||||||
|
|
||||||
|
def transcribe(
|
||||||
|
self,
|
||||||
|
wav_path: str|Path,
|
||||||
|
model:str="",
|
||||||
|
output_path:str|Path=None
|
||||||
|
):
|
||||||
|
if model=="" or model is None:
|
||||||
|
model = self.model
|
||||||
|
if output_path is None:
|
||||||
|
output_path = self.output_path
|
||||||
|
audio_file= open(str(wav_path), "rb")
|
||||||
|
transcription = self.client.audio.transcriptions.create(
|
||||||
|
model=model,
|
||||||
|
file=audio_file,
|
||||||
|
response_format="text"
|
||||||
|
)
|
||||||
|
return transcription
|
@ -28,18 +28,14 @@ from typing import List, Dict, Any
|
|||||||
|
|
||||||
from ascii_colors import ASCIIColors, trace_exception
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
from lollms.paths import LollmsPaths
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.tti import LollmsTTI
|
||||||
from lollms.utilities import git_pull, show_yes_no_dialog, run_script_in_env, create_conda_env
|
from lollms.utilities import git_pull, show_yes_no_dialog, run_script_in_env, create_conda_env
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
import shutil
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
def verify_sd(lollms_paths:LollmsPaths):
|
|
||||||
# Clone repository
|
|
||||||
root_dir = lollms_paths.personal_path
|
|
||||||
shared_folder = root_dir/"shared"
|
|
||||||
sd_folder = shared_folder / "auto_sd"
|
|
||||||
return sd_folder.exists()
|
|
||||||
|
|
||||||
def download_file(url, folder_path, local_filename):
|
def download_file(url, folder_path, local_filename):
|
||||||
# Make sure 'folder_path' exists
|
# Make sure 'folder_path' exists
|
||||||
@ -137,20 +133,6 @@ def upgrade_sd(lollms_app:LollmsApplication):
|
|||||||
ASCIIColors.success("DONE")
|
ASCIIColors.success("DONE")
|
||||||
|
|
||||||
|
|
||||||
def get_sd(lollms_paths:LollmsPaths):
|
|
||||||
root_dir = lollms_paths.personal_path
|
|
||||||
shared_folder = root_dir/"shared"
|
|
||||||
sd_folder = shared_folder / "auto_sd"
|
|
||||||
sd_script_path = sd_folder / "lollms_sd.py"
|
|
||||||
git_pull(sd_folder)
|
|
||||||
|
|
||||||
if sd_script_path.exists():
|
|
||||||
ASCIIColors.success("lollms_sd found.")
|
|
||||||
ASCIIColors.success("Loading source file...",end="")
|
|
||||||
# use importlib to load the module from the file path
|
|
||||||
from lollms.services.sd.lollms_sd import LollmsSD
|
|
||||||
ASCIIColors.success("ok")
|
|
||||||
return LollmsSD
|
|
||||||
|
|
||||||
|
|
||||||
def raw_b64_img(image: Image) -> str:
|
def raw_b64_img(image: Image) -> str:
|
||||||
@ -274,7 +256,7 @@ class ControlNetUnit:
|
|||||||
"pixel_perfect": self.pixel_perfect,
|
"pixel_perfect": self.pixel_perfect,
|
||||||
}
|
}
|
||||||
|
|
||||||
class LollmsSD:
|
class LollmsSD(LollmsTTI):
|
||||||
has_controlnet = False
|
has_controlnet = False
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -290,19 +272,19 @@ class LollmsSD:
|
|||||||
share=False,
|
share=False,
|
||||||
wait_for_service=True
|
wait_for_service=True
|
||||||
):
|
):
|
||||||
|
super().__init__(app)
|
||||||
if auto_sd_base_url=="" or auto_sd_base_url=="http://127.0.0.1:7860":
|
if auto_sd_base_url=="" or auto_sd_base_url=="http://127.0.0.1:7860":
|
||||||
auto_sd_base_url = None
|
auto_sd_base_url = None
|
||||||
self.ready = False
|
self.ready = False
|
||||||
# Get the current directory
|
# Get the current directory
|
||||||
lollms_paths = app.lollms_paths
|
lollms_paths = app.lollms_paths
|
||||||
self.app = app
|
|
||||||
root_dir = lollms_paths.personal_path
|
root_dir = lollms_paths.personal_path
|
||||||
|
|
||||||
self.wm = wm
|
self.wm = wm
|
||||||
# Store the path to the script
|
# Store the path to the script
|
||||||
if auto_sd_base_url is None:
|
if auto_sd_base_url is None:
|
||||||
self.auto_sd_base_url = "http://127.0.0.1:7860"
|
self.auto_sd_base_url = "http://127.0.0.1:7860"
|
||||||
if not verify_sd(lollms_paths):
|
if not LollmsSD.verify(app):
|
||||||
install_sd(app.lollms_paths)
|
install_sd(app.lollms_paths)
|
||||||
else:
|
else:
|
||||||
self.auto_sd_base_url = auto_sd_base_url
|
self.auto_sd_base_url = auto_sd_base_url
|
||||||
@ -364,6 +346,30 @@ class LollmsSD:
|
|||||||
else:
|
else:
|
||||||
self.check_controlnet()
|
self.check_controlnet()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def verify(app:LollmsApplication):
|
||||||
|
# Clone repository
|
||||||
|
root_dir = app.lollms_paths.personal_path
|
||||||
|
shared_folder = root_dir/"shared"
|
||||||
|
sd_folder = shared_folder / "auto_sd"
|
||||||
|
return sd_folder.exists()
|
||||||
|
|
||||||
|
def get(app:LollmsApplication):
|
||||||
|
root_dir = app.lollms_paths.personal_path
|
||||||
|
shared_folder = root_dir/"shared"
|
||||||
|
sd_folder = shared_folder / "auto_sd"
|
||||||
|
sd_script_path = sd_folder / "lollms_sd.py"
|
||||||
|
git_pull(sd_folder)
|
||||||
|
|
||||||
|
if sd_script_path.exists():
|
||||||
|
ASCIIColors.success("lollms_sd found.")
|
||||||
|
ASCIIColors.success("Loading source file...",end="")
|
||||||
|
# use importlib to load the module from the file path
|
||||||
|
from lollms.services.sd.lollms_sd import LollmsSD
|
||||||
|
ASCIIColors.success("ok")
|
||||||
|
return LollmsSD
|
||||||
|
|
||||||
|
|
||||||
def paint(
|
def paint(
|
||||||
self,
|
self,
|
||||||
sd_positive_prompt,
|
sd_positive_prompt,
|
||||||
|
42
lollms/services/whisper/lollms_whisper.py
Normal file
42
lollms/services/whisper/lollms_whisper.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# Title LollmsWhisper
|
||||||
|
# Licence: MIT
|
||||||
|
# Author : Paris Neo
|
||||||
|
#
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||||
|
from lollms.utilities import PackageManager
|
||||||
|
from lollms.stt import LollmsSTT
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from PIL import Image, PngImagePlugin
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
|
from lollms.paths import LollmsPaths
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
if not PackageManager.check_package_installed("whisper"):
|
||||||
|
PackageManager.install_package("whisper")
|
||||||
|
import whisper
|
||||||
|
|
||||||
|
|
||||||
|
class LollmsWhisper(LollmsSTT):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app:LollmsApplication,
|
||||||
|
model="small",
|
||||||
|
output_path=None
|
||||||
|
):
|
||||||
|
self.app = app
|
||||||
|
self.output_path = output_path
|
||||||
|
self.whisper = whisper.load_model(model)
|
||||||
|
|
||||||
|
def transcribe(
|
||||||
|
self,
|
||||||
|
wav_path: str|Path
|
||||||
|
):
|
||||||
|
result = self.whisper.transcribe(str(wav_path))
|
||||||
|
return result
|
@ -11,7 +11,7 @@ import sys
|
|||||||
from lollms.app import LollmsApplication
|
from lollms.app import LollmsApplication
|
||||||
from lollms.paths import LollmsPaths
|
from lollms.paths import LollmsPaths
|
||||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||||
from lollms.utilities import PackageManager
|
from lollms.utilities import PackageManager, find_first_available_file_index, add_period
|
||||||
import time
|
import time
|
||||||
import io
|
import io
|
||||||
import sys
|
import sys
|
||||||
@ -32,59 +32,11 @@ import uuid
|
|||||||
from ascii_colors import ASCIIColors, trace_exception
|
from ascii_colors import ASCIIColors, trace_exception
|
||||||
from lollms.paths import LollmsPaths
|
from lollms.paths import LollmsPaths
|
||||||
from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
|
from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
|
||||||
|
from lollms.tts import LollmsTTS
|
||||||
import subprocess
|
import subprocess
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
def verify_xtts(lollms_paths:LollmsPaths):
|
|
||||||
# Clone repository
|
|
||||||
root_dir = lollms_paths.personal_path
|
|
||||||
shared_folder = root_dir/"shared"
|
|
||||||
xtts_path = shared_folder / "xtts"
|
|
||||||
return xtts_path.exists()
|
|
||||||
|
|
||||||
def install_xtts(lollms_app:LollmsApplication):
|
|
||||||
ASCIIColors.green("XTTS installation started")
|
|
||||||
repo_url = "https://github.com/ParisNeo/xtts-api-server"
|
|
||||||
root_dir = lollms_app.lollms_paths.personal_path
|
|
||||||
shared_folder = root_dir/"shared"
|
|
||||||
xtts_path = shared_folder / "xtts"
|
|
||||||
|
|
||||||
# Step 1: Clone or update the repository
|
|
||||||
if os.path.exists(xtts_path):
|
|
||||||
print("Repository already exists. Pulling latest changes...")
|
|
||||||
try:
|
|
||||||
subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
|
|
||||||
except:
|
|
||||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
|
||||||
|
|
||||||
else:
|
|
||||||
print("Cloning repository...")
|
|
||||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
|
||||||
|
|
||||||
# Step 2: Create or update the Conda environment
|
|
||||||
if environment_exists("xtts"):
|
|
||||||
print("Conda environment 'xtts' already exists. Updating...")
|
|
||||||
# Here you might want to update the environment, e.g., update Python or dependencies
|
|
||||||
# This step is highly dependent on how you manage your Conda environments and might involve
|
|
||||||
# running `conda update` commands or similar.
|
|
||||||
else:
|
|
||||||
print("Creating Conda environment 'xtts'...")
|
|
||||||
create_conda_env("xtts", "3.8")
|
|
||||||
|
|
||||||
# Step 3: Install or update dependencies using your custom function
|
|
||||||
requirements_path = os.path.join(xtts_path, "requirements.txt")
|
|
||||||
run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
|
|
||||||
run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
|
|
||||||
|
|
||||||
# Step 4: Launch the server
|
|
||||||
# Assuming the server can be started with a Python script in the cloned repository
|
|
||||||
print("Launching XTTS API server...")
|
|
||||||
run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
|
|
||||||
|
|
||||||
print("XTTS API server setup and launch completed.")
|
|
||||||
ASCIIColors.cyan("Done")
|
|
||||||
ASCIIColors.cyan("Installing xtts-api-server")
|
|
||||||
ASCIIColors.green("XTTS server installed successfully")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -103,8 +55,7 @@ def get_xtts(lollms_paths:LollmsPaths):
|
|||||||
ASCIIColors.success("ok")
|
ASCIIColors.success("ok")
|
||||||
return LollmsXTTS
|
return LollmsXTTS
|
||||||
|
|
||||||
class LollmsXTTS:
|
class LollmsXTTS(LollmsTTS):
|
||||||
has_controlnet = False
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
app:LollmsApplication,
|
app:LollmsApplication,
|
||||||
@ -117,6 +68,7 @@ class LollmsXTTS:
|
|||||||
use_deep_speed=False,
|
use_deep_speed=False,
|
||||||
use_streaming_mode = True
|
use_streaming_mode = True
|
||||||
):
|
):
|
||||||
|
super().__init__(app)
|
||||||
self.generation_threads = []
|
self.generation_threads = []
|
||||||
self.voices_folder = voices_folder
|
self.voices_folder = voices_folder
|
||||||
self.ready = False
|
self.ready = False
|
||||||
@ -124,7 +76,6 @@ class LollmsXTTS:
|
|||||||
xtts_base_url = None
|
xtts_base_url = None
|
||||||
# Get the current directory
|
# Get the current directory
|
||||||
lollms_paths = app.lollms_paths
|
lollms_paths = app.lollms_paths
|
||||||
self.app = app
|
|
||||||
root_dir = lollms_paths.personal_path
|
root_dir = lollms_paths.personal_path
|
||||||
self.voice_samples_path = voice_samples_path
|
self.voice_samples_path = voice_samples_path
|
||||||
self.use_deep_speed = use_deep_speed
|
self.use_deep_speed = use_deep_speed
|
||||||
@ -133,8 +84,8 @@ class LollmsXTTS:
|
|||||||
# Store the path to the script
|
# Store the path to the script
|
||||||
if xtts_base_url is None:
|
if xtts_base_url is None:
|
||||||
self.xtts_base_url = "http://127.0.0.1:8020"
|
self.xtts_base_url = "http://127.0.0.1:8020"
|
||||||
if not verify_xtts(lollms_paths):
|
if not LollmsXTTS.verify(lollms_paths):
|
||||||
install_xtts(app.lollms_paths)
|
LollmsXTTS.install(app)
|
||||||
else:
|
else:
|
||||||
self.xtts_base_url = xtts_base_url
|
self.xtts_base_url = xtts_base_url
|
||||||
|
|
||||||
@ -167,6 +118,57 @@ class LollmsXTTS:
|
|||||||
else:
|
else:
|
||||||
self.wait_for_service_in_another_thread(max_retries=max_retries)
|
self.wait_for_service_in_another_thread(max_retries=max_retries)
|
||||||
|
|
||||||
|
def install(lollms_app:LollmsApplication):
|
||||||
|
ASCIIColors.green("XTTS installation started")
|
||||||
|
repo_url = "https://github.com/ParisNeo/xtts-api-server"
|
||||||
|
root_dir = lollms_app.lollms_paths.personal_path
|
||||||
|
shared_folder = root_dir/"shared"
|
||||||
|
xtts_path = shared_folder / "xtts"
|
||||||
|
|
||||||
|
# Step 1: Clone or update the repository
|
||||||
|
if os.path.exists(xtts_path):
|
||||||
|
print("Repository already exists. Pulling latest changes...")
|
||||||
|
try:
|
||||||
|
subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
|
||||||
|
except:
|
||||||
|
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("Cloning repository...")
|
||||||
|
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||||
|
|
||||||
|
# Step 2: Create or update the Conda environment
|
||||||
|
if environment_exists("xtts"):
|
||||||
|
print("Conda environment 'xtts' already exists. Updating...")
|
||||||
|
# Here you might want to update the environment, e.g., update Python or dependencies
|
||||||
|
# This step is highly dependent on how you manage your Conda environments and might involve
|
||||||
|
# running `conda update` commands or similar.
|
||||||
|
else:
|
||||||
|
print("Creating Conda environment 'xtts'...")
|
||||||
|
create_conda_env("xtts", "3.8")
|
||||||
|
|
||||||
|
# Step 3: Install or update dependencies using your custom function
|
||||||
|
requirements_path = os.path.join(xtts_path, "requirements.txt")
|
||||||
|
run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
|
||||||
|
run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
|
||||||
|
|
||||||
|
# Step 4: Launch the server
|
||||||
|
# Assuming the server can be started with a Python script in the cloned repository
|
||||||
|
print("Launching XTTS API server...")
|
||||||
|
run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
|
||||||
|
|
||||||
|
print("XTTS API server setup and launch completed.")
|
||||||
|
ASCIIColors.cyan("Done")
|
||||||
|
ASCIIColors.cyan("Installing xtts-api-server")
|
||||||
|
ASCIIColors.green("XTTS server installed successfully")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def verify(lollms_paths:LollmsPaths)->bool:
|
||||||
|
# Clone repository
|
||||||
|
root_dir = lollms_paths.personal_path
|
||||||
|
shared_folder = root_dir/"shared"
|
||||||
|
xtts_path = shared_folder / "xtts"
|
||||||
|
return xtts_path.exists()
|
||||||
|
|
||||||
def run_xtts_api_server(self):
|
def run_xtts_api_server(self):
|
||||||
# Get the path to the current Python interpreter
|
# Get the path to the current Python interpreter
|
||||||
@ -198,7 +200,7 @@ class LollmsXTTS:
|
|||||||
if self.voices_folder is not None:
|
if self.voices_folder is not None:
|
||||||
print("Generating sample audio.")
|
print("Generating sample audio.")
|
||||||
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
|
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
|
||||||
self.tts_to_audio("x t t s is ready",voice_file[0].name)
|
self.tts_to_audio("x t t s is ready",voice_file[0].stem)
|
||||||
print("Service is available.")
|
print("Service is available.")
|
||||||
if self.app is not None:
|
if self.app is not None:
|
||||||
self.app.success("XTTS Service is now available.")
|
self.app.success("XTTS Service is now available.")
|
||||||
@ -237,13 +239,13 @@ class LollmsXTTS:
|
|||||||
print("Request failed with status code:", response.status_code)
|
print("Request failed with status code:", response.status_code)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def tts_to_file(self, text, speaker_wav, file_name_or_path, language="en"):
|
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||||
url = f"{self.xtts_base_url}/tts_to_file"
|
url = f"{self.xtts_base_url}/tts_to_file"
|
||||||
|
|
||||||
# Define the request body
|
# Define the request body
|
||||||
payload = {
|
payload = {
|
||||||
"text": text,
|
"text": text,
|
||||||
"speaker_wav": speaker_wav,
|
"speaker_wav": speaker,
|
||||||
"language": language,
|
"language": language,
|
||||||
"file_name_or_path": file_name_or_path
|
"file_name_or_path": file_name_or_path
|
||||||
}
|
}
|
||||||
@ -262,14 +264,43 @@ class LollmsXTTS:
|
|||||||
else:
|
else:
|
||||||
print("Request failed with status code:", response.status_code)
|
print("Request failed with status code:", response.status_code)
|
||||||
|
|
||||||
def tts_to_audio(self, text, speaker_wav, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||||
|
voice=self.app.config.xtts_current_voice if speaker is None else speaker
|
||||||
|
index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
|
||||||
|
output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
|
||||||
|
if voice is None:
|
||||||
|
voice = "main_voice"
|
||||||
|
self.app.info("Starting to build voice")
|
||||||
|
try:
|
||||||
|
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||||
|
# If the personality has a voice, then use it
|
||||||
|
personality_audio:Path = self.app.personality.personality_package_path/"audio"
|
||||||
|
if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
|
||||||
|
voices_folder = personality_audio
|
||||||
|
elif voice!="main_voice":
|
||||||
|
voices_folder = self.app.lollms_paths.custom_voices_path
|
||||||
|
else:
|
||||||
|
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||||
|
language = self.app.config.xtts_current_language# convert_language_name()
|
||||||
|
self.set_speaker_folder(voices_folder)
|
||||||
|
preprocessed_text= add_period(text)
|
||||||
|
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
||||||
|
if len(voice_file)==0:
|
||||||
|
return {"status":False,"error":"Voice not found"}
|
||||||
|
self.xtts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||||
|
|
||||||
|
except Exception as ex:
|
||||||
|
trace_exception(ex)
|
||||||
|
return {"status":False,"error":f"{ex}"}
|
||||||
|
|
||||||
|
def xtts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||||
def tts2_audio_th(thread_uid=None):
|
def tts2_audio_th(thread_uid=None):
|
||||||
url = f"{self.xtts_base_url}/tts_to_audio"
|
url = f"{self.xtts_base_url}/tts_to_audio"
|
||||||
|
|
||||||
# Define the request body
|
# Define the request body
|
||||||
payload = {
|
payload = {
|
||||||
"text": text,
|
"text": text,
|
||||||
"speaker_wav": speaker_wav,
|
"speaker_wav": speaker,
|
||||||
"language": language
|
"language": language
|
||||||
}
|
}
|
||||||
headers = {
|
headers = {
|
||||||
@ -308,3 +339,10 @@ class LollmsXTTS:
|
|||||||
return thread
|
return thread
|
||||||
else:
|
else:
|
||||||
return tts2_audio_th()
|
return tts2_audio_th()
|
||||||
|
|
||||||
|
def get_voices(self):
|
||||||
|
ASCIIColors.yellow("Listing voices")
|
||||||
|
voices=["main_voice"]
|
||||||
|
voices_dir:Path=self.app.lollms_paths.custom_voices_path
|
||||||
|
voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
|
||||||
|
return voices
|
||||||
|
93
lollms/stt.py
Normal file
93
lollms/stt.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
"""
|
||||||
|
Lollms STT Module
|
||||||
|
=================
|
||||||
|
|
||||||
|
This module is part of the Lollms library, designed to provide Speech-to-Text (STT) functionalities within the LollmsApplication framework. The base class `LollmsSTT` is intended to be inherited and implemented by other classes that provide specific STT functionalities.
|
||||||
|
|
||||||
|
Author: ParisNeo, a computer geek passionate about AI
|
||||||
|
"""
|
||||||
|
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
class LollmsSTT:
|
||||||
|
"""
|
||||||
|
LollmsSTT is a base class for implementing Speech-to-Text (STT) functionalities within the LollmsApplication.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
model (str): The STT model to be used for transcription.
|
||||||
|
output_path (Path or str): Path where the output transcription files will be saved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app: LollmsApplication,
|
||||||
|
model="",
|
||||||
|
output_path=None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initializes the LollmsSTT class with the given parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
model (str, optional): The STT model to be used for transcription. Defaults to an empty string.
|
||||||
|
output_path (Path or str, optional): Path where the output transcription files will be saved. Defaults to None.
|
||||||
|
"""
|
||||||
|
self.ready = False
|
||||||
|
self.app = app
|
||||||
|
self.output_path = output_path
|
||||||
|
self.model = model
|
||||||
|
|
||||||
|
def transcribe(
|
||||||
|
self,
|
||||||
|
wav_path: str | Path,
|
||||||
|
prompt=""
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Transcribes the given audio file to text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
wav_path (str or Path): The path to the WAV audio file to be transcribed.
|
||||||
|
prompt (str, optional): An optional prompt to guide the transcription. Defaults to an empty string.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def verify(app: LollmsApplication) -> bool:
|
||||||
|
"""
|
||||||
|
Verifies if the STT service is available.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the service is available, False otherwise.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def install(app: LollmsApplication) -> bool:
|
||||||
|
"""
|
||||||
|
Installs the necessary components for the STT service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the installation was successful, False otherwise.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get(app: LollmsApplication) -> 'LollmsSTT':
|
||||||
|
"""
|
||||||
|
Returns the LollmsSTT class.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LollmsSTT: The LollmsSTT class.
|
||||||
|
"""
|
||||||
|
return LollmsSTT
|
115
lollms/tti.py
Normal file
115
lollms/tti.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
"""
|
||||||
|
Lollms TTI Module
|
||||||
|
=================
|
||||||
|
|
||||||
|
This module is part of the Lollms library, designed to provide Text-to-Image (TTI) functionalities within the LollmsApplication framework. The base class `LollmsTTI` is intended to be inherited and implemented by other classes that provide specific TTI functionalities.
|
||||||
|
|
||||||
|
Author: ParisNeo, a computer geek passionate about AI
|
||||||
|
"""
|
||||||
|
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
class LollmsTTI:
|
||||||
|
"""
|
||||||
|
LollmsTTI is a base class for implementing Text-to-Image (TTI) functionalities within the LollmsApplication.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
model (str): The TTI model to be used for image generation.
|
||||||
|
api_key (str): API key for accessing external TTI services (if needed).
|
||||||
|
output_path (Path or str): Path where the output image files will be saved.
|
||||||
|
voices (List[str]): List of available voices for TTI (to be filled by the child class).
|
||||||
|
models (List[str]): List of available models for TTI (to be filled by the child class).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app: LollmsApplication,
|
||||||
|
model="",
|
||||||
|
api_key="",
|
||||||
|
output_path=None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initializes the LollmsTTI class with the given parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
model (str, optional): The TTI model to be used for image generation. Defaults to an empty string.
|
||||||
|
api_key (str, optional): API key for accessing external TTI services. Defaults to an empty string.
|
||||||
|
output_path (Path or str, optional): Path where the output image files will be saved. Defaults to None.
|
||||||
|
"""
|
||||||
|
self.ready = False
|
||||||
|
self.app = app
|
||||||
|
self.model = model
|
||||||
|
self.api_key = api_key
|
||||||
|
self.output_path = output_path
|
||||||
|
self.voices = [] # To be filled by the child class
|
||||||
|
self.models = [] # To be filled by the child class
|
||||||
|
|
||||||
|
def paint(self, positive_prompt: str, negative_prompt: str = "") -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Generates images based on the given positive and negative prompts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
positive_prompt (str): The positive prompt describing the desired image.
|
||||||
|
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def paint_from_images(self, positive_prompt: str, images: List[str], negative_prompt: str = "") -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Generates images based on the given positive prompt and reference images.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
positive_prompt (str): The positive prompt describing the desired image.
|
||||||
|
images (List[str]): A list of paths to reference images.
|
||||||
|
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def verify(app: LollmsApplication) -> bool:
|
||||||
|
"""
|
||||||
|
Verifies if the TTI service is available.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the service is available, False otherwise.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def install(app: LollmsApplication) -> bool:
|
||||||
|
"""
|
||||||
|
Installs the necessary components for the TTI service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the installation was successful, False otherwise.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get(app: LollmsApplication) -> 'LollmsTTI':
|
||||||
|
"""
|
||||||
|
Returns the LollmsTTI class.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LollmsTTI: The LollmsTTI class.
|
||||||
|
"""
|
||||||
|
return LollmsTTI
|
122
lollms/tts.py
Normal file
122
lollms/tts.py
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
"""
|
||||||
|
Lollms TTS Module
|
||||||
|
=================
|
||||||
|
|
||||||
|
This module is part of the Lollms library, designed to provide Text-to-Speech (TTS) functionalities within the LollmsApplication framework. The base class `LollmsTTS` is intended to be inherited and implemented by other classes that provide specific TTS functionalities.
|
||||||
|
|
||||||
|
Author: ParisNeo, a computer geek passionate about AI
|
||||||
|
"""
|
||||||
|
from lollms.app import LollmsApplication
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
class LollmsTTS:
|
||||||
|
"""
|
||||||
|
LollmsTTS is a base class for implementing Text-to-Speech (TTS) functionalities within the LollmsApplication.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
voice (str): The voice model to be used for TTS.
|
||||||
|
api_key (str): API key for accessing external TTS services (if needed).
|
||||||
|
output_path (Path or str): Path where the output audio files will be saved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
app: LollmsApplication,
|
||||||
|
model="",
|
||||||
|
voice="",
|
||||||
|
api_key="",
|
||||||
|
output_path=None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initializes the LollmsTTS class with the given parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
model (str, optional): The speach generation model to be used for TTS. Defaults to "".
|
||||||
|
voice (str, optional): The voice model to be used for TTS. Defaults to "alloy".
|
||||||
|
api_key (str, optional): API key for accessing external TTS services. Defaults to an empty string.
|
||||||
|
output_path (Path or str, optional): Path where the output audio files will be saved. Defaults to None.
|
||||||
|
"""
|
||||||
|
self.ready = False
|
||||||
|
self.app = app
|
||||||
|
self.model = model
|
||||||
|
self.voice = voice
|
||||||
|
self.api_key = api_key
|
||||||
|
self.output_path = output_path
|
||||||
|
self.voices = [] # To be filled by the child class
|
||||||
|
self.models = [] # To be filled by the child class
|
||||||
|
|
||||||
|
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||||
|
"""
|
||||||
|
Converts the given text to speech and saves it to a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): The text to be converted to speech.
|
||||||
|
speaker (str): The speaker/voice model to be used.
|
||||||
|
file_name_or_path (Path or str): The name or path of the output file.
|
||||||
|
language (str, optional): The language of the text. Defaults to "en".
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def tts_to_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||||
|
"""
|
||||||
|
Converts the given text to speech and returns the audio data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): The text to be converted to speech.
|
||||||
|
speaker (str): The speaker/voice model to be used.
|
||||||
|
file_name_or_path (Path or str, optional): The name or path of the output file. Defaults to None.
|
||||||
|
language (str, optional): The language of the text. Defaults to "en".
|
||||||
|
use_threading (bool, optional): Whether to use threading for the operation. Defaults to False.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def verify(app: LollmsApplication) -> bool:
|
||||||
|
"""
|
||||||
|
Verifies if the TTS service is available.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the service is available, False otherwise.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def install(app: LollmsApplication) -> bool:
|
||||||
|
"""
|
||||||
|
Installs the necessary components for the TTS service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the installation was successful, False otherwise.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get(app: LollmsApplication) -> 'LollmsTTS':
|
||||||
|
"""
|
||||||
|
Returns the LollmsTTS class.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app (LollmsApplication): The instance of the main Lollms application.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LollmsTTS: The LollmsTTS class.
|
||||||
|
"""
|
||||||
|
return LollmsTTS
|
||||||
|
|
||||||
|
def get_voices(self):
|
||||||
|
"""
|
||||||
|
Retrieves the available voices for TTS.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: A list of available voices.
|
||||||
|
"""
|
||||||
|
return self.voices
|
Loading…
Reference in New Issue
Block a user