mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-01-18 02:39:46 +00:00
upgraded
This commit is contained in:
parent
42ebabfe0d
commit
397d21a3be
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 96
|
||||
version: 98
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -80,10 +80,30 @@ auto_show_browser: true
|
||||
# copy to clipboard
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
# STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_whisper_key: ""
|
||||
openai_whisper_model: "whisper-1"
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
@ -101,6 +121,13 @@ xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://localhost:7860
|
||||
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
|
||||
dall_e_key: ""
|
||||
dall_e_generation_engine: "dall-e-3"
|
||||
|
||||
# Midjourney service key
|
||||
midjourney_key: ""
|
||||
|
||||
# Image generation service comfyui
|
||||
enable_comfyui_service: false
|
||||
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
|
||||
enable_motion_ctrl_service: false
|
||||
motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
# ***************** TTT *****************
|
||||
|
||||
# ollama service
|
||||
enable_ollama_service: false
|
||||
ollama_base_url: http://localhost:11434
|
||||
@ -202,6 +233,3 @@ show_code_of_conduct: true
|
||||
activate_audio_infos: true
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
150
lollms/app.py
150
lollms/app.py
@ -221,7 +221,21 @@ class LollmsApplication(LoLLMsCom):
|
||||
def get_uploads_path(self, client_id):
|
||||
return self.lollms_paths.personal_uploads_path
|
||||
|
||||
def start_servers( self ):
|
||||
def start_servers(self):
|
||||
self.ollama = None
|
||||
self.vllm = None
|
||||
self.whisper = None
|
||||
self.xtts = None
|
||||
self.sd = None
|
||||
self.comfyui = None
|
||||
self.motion_ctrl = None
|
||||
|
||||
self.tti = None
|
||||
self.tts = None
|
||||
self.stt = None
|
||||
|
||||
|
||||
|
||||
if self.config.enable_ollama_service:
|
||||
try:
|
||||
from lollms.services.ollama.lollms_ollama import Service
|
||||
@ -240,13 +254,11 @@ class LollmsApplication(LoLLMsCom):
|
||||
|
||||
if self.config.whisper_activate:
|
||||
try:
|
||||
from lollms.media import AudioRecorder
|
||||
self.rec = AudioRecorder(self.lollms_paths.personal_outputs_path/"test.wav")
|
||||
self.rec.start_recording()
|
||||
time.sleep(1)
|
||||
self.rec.stop_recording()
|
||||
except:
|
||||
pass
|
||||
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||
self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
|
||||
if self.config.xtts_enable:
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
@ -256,7 +268,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
|
||||
self.tts = LollmsXTTS(
|
||||
self.xtts = LollmsXTTS(
|
||||
self,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=self.lollms_paths.custom_voices_path,
|
||||
@ -291,6 +303,126 @@ class LollmsApplication(LoLLMsCom):
|
||||
self.warning(f"Couldn't load Motion control")
|
||||
|
||||
|
||||
if self.config.active_tti_service == "autosd":
|
||||
from lollms.services.sd.lollms_sd import LollmsSD
|
||||
self.tti = LollmsSD(self)
|
||||
elif self.config.active_tti_service == "dall-e":
|
||||
from lollms.services.dalle.lollms_dalle import LollmsDalle
|
||||
self.tti = LollmsDalle(self, self.config.dall_e_key)
|
||||
elif self.config.active_tti_service == "midjourney":
|
||||
from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
|
||||
self.tti = LollmsMidjourney(self, self.config.midjourney_key)
|
||||
|
||||
if self.config.active_tts_service == "openai_tts":
|
||||
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||
elif self.config.active_tts_service == "xtts" and self.xtts:
|
||||
self.tts = self.xtts
|
||||
|
||||
if self.config.active_stt_service == "openai_whisper":
|
||||
from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
|
||||
self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
|
||||
elif self.config.active_stt_service == "whisper":
|
||||
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||
self.stt = LollmsWhisper(self, self.config.whisper_model)
|
||||
|
||||
|
||||
def verify_servers(self, reload_all=False):
|
||||
|
||||
try:
|
||||
if self.config.enable_ollama_service and self.ollama is None:
|
||||
try:
|
||||
from lollms.services.ollama.lollms_ollama import Service
|
||||
self.ollama = Service(self, base_url=self.config.ollama_base_url)
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
self.warning(f"Couldn't load Ollama")
|
||||
|
||||
if self.config.enable_vllm_service and self.vllm is None:
|
||||
try:
|
||||
from lollms.services.vllm.lollms_vllm import Service
|
||||
self.vllm = Service(self, base_url=self.config.vllm_url)
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
self.warning(f"Couldn't load vllm")
|
||||
|
||||
if self.config.whisper_activate and self.whisper is None:
|
||||
try:
|
||||
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||
self.whisper = LollmsWhisper(self, self.config.whisper_model, self.lollms_paths.personal_outputs_path)
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
if self.config.xtts_enable and self.xtts is None:
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
voice=self.config.xtts_current_voice
|
||||
if voice!="main_voice":
|
||||
voices_folder = self.lollms_paths.custom_voices_path
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
|
||||
self.xtts = LollmsXTTS(
|
||||
self,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=self.lollms_paths.custom_voices_path,
|
||||
xtts_base_url=self.config.xtts_base_url,
|
||||
wait_for_service=False,
|
||||
use_deep_speed=self.config.xtts_use_deepspeed,
|
||||
use_streaming_mode=self.config.xtts_use_streaming_mode
|
||||
)
|
||||
except:
|
||||
self.warning(f"Couldn't load XTTS")
|
||||
|
||||
if self.config.enable_sd_service and self.sd is None:
|
||||
try:
|
||||
from lollms.services.sd.lollms_sd import LollmsSD
|
||||
self.sd = LollmsSD(self, auto_sd_base_url=self.config.sd_base_url)
|
||||
except:
|
||||
self.warning(f"Couldn't load SD")
|
||||
|
||||
if self.config.enable_comfyui_service and self.comfyui is None:
|
||||
try:
|
||||
from lollms.services.comfyui.lollms_comfyui import LollmsComfyUI
|
||||
self.comfyui = LollmsComfyUI(self, comfyui_base_url=self.config.comfyui_base_url)
|
||||
except:
|
||||
self.warning(f"Couldn't load SD")
|
||||
|
||||
if self.config.enable_motion_ctrl_service and self.motion_ctrl is None:
|
||||
try:
|
||||
from lollms.services.motion_ctrl.lollms_motion_ctrl import Service
|
||||
self.motion_ctrl = Service(self, base_url=self.config.motion_ctrl_base_url)
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
self.warning(f"Couldn't load Motion control")
|
||||
|
||||
|
||||
if self.config.active_tti_service == "autosd":
|
||||
from lollms.services.sd.lollms_sd import LollmsSD
|
||||
self.tti = LollmsSD(self)
|
||||
elif self.config.active_tti_service == "dall-e":
|
||||
from lollms.services.dalle.lollms_dalle import LollmsDalle
|
||||
self.tti = LollmsDalle(self, self.config.dall_e_key)
|
||||
elif self.config.active_tti_service == "midjourney":
|
||||
from lollms.services.midjourney.lollms_midjourney import LollmsMidjourney
|
||||
self.tti = LollmsMidjourney(self, self.config.midjourney_key)
|
||||
|
||||
if self.config.active_tts_service == "openai_tts":
|
||||
from lollms.services.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||
elif self.config.active_stt_service == "xtts" and self.xtts:
|
||||
self.tts = self.xtts
|
||||
|
||||
if self.config.active_stt_service == "openai_whisper":
|
||||
from lollms.services.openai_whisper.lollms_whisper import LollmsOpenAIWhisper
|
||||
self.stt = LollmsOpenAIWhisper(self, self.config.openai_whisper_model, self.config.openai_whisper_key)
|
||||
elif self.config.active_stt_service == "whisper":
|
||||
from lollms.services.whisper.lollms_whisper import LollmsWhisper
|
||||
self.stt = LollmsWhisper(self, self.config.whisper_model)
|
||||
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
|
||||
|
||||
def build_long_term_skills_memory(self):
|
||||
discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
|
||||
discussion_db_name.mkdir(exist_ok=True, parents=True)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 96
|
||||
version: 98
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -80,10 +80,30 @@ auto_show_browser: true
|
||||
# copy to clipboard
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
# STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_whisper_key: ""
|
||||
openai_whisper_model: "whisper-1"
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
@ -101,6 +121,13 @@ xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://localhost:7860
|
||||
@ -109,6 +136,8 @@ sd_base_url: http://localhost:7860
|
||||
dall_e_key: ""
|
||||
dall_e_generation_engine: "dall-e-3"
|
||||
|
||||
# Midjourney service key
|
||||
midjourney_key: ""
|
||||
|
||||
# Image generation service comfyui
|
||||
enable_comfyui_service: false
|
||||
@ -118,6 +147,8 @@ comfyui_base_url: http://127.0.0.1:8188/
|
||||
enable_motion_ctrl_service: false
|
||||
motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
# ***************** TTT *****************
|
||||
|
||||
# ollama service
|
||||
enable_ollama_service: false
|
||||
ollama_base_url: http://localhost:11434
|
||||
@ -202,6 +233,3 @@ show_code_of_conduct: true
|
||||
activate_audio_infos: true
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
@ -186,6 +186,7 @@ async def apply_settings(request: Request):
|
||||
lollmsElfServer.config.config[key] = config.get(key, lollmsElfServer.config.config[key])
|
||||
ASCIIColors.success("OK")
|
||||
lollmsElfServer.rebuild_personalities()
|
||||
lollmsElfServer.verify_servers()
|
||||
if lollmsElfServer.config.auto_save:
|
||||
lollmsElfServer.config.save_config()
|
||||
return {"status":True}
|
||||
|
@ -92,8 +92,8 @@ def start_sd(data: Identification):
|
||||
return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
|
||||
|
||||
lollmsElfServer.ShowBlockingMessage("Starting SD api server\nPlease stand by")
|
||||
from lollms.services.sd.lollms_sd import get_sd
|
||||
lollmsElfServer.sd = get_sd(lollmsElfServer.lollms_paths)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
|
||||
from lollms.services.sd.lollms_sd import LollmsSD
|
||||
lollmsElfServer.sd = LollmsSD.get(lollmsElfServer)(lollmsElfServer, lollmsElfServer.personality.name if lollmsElfServer.personality is not None else "Artbot")
|
||||
ASCIIColors.success("Done")
|
||||
lollmsElfServer.HideBlockingMessage()
|
||||
return {"status":True}
|
||||
|
@ -8,6 +8,7 @@ description:
|
||||
|
||||
"""
|
||||
from fastapi import APIRouter, Request, UploadFile, File, HTTPException
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from lollms_webui import LOLLMSWebUI
|
||||
from pydantic import BaseModel
|
||||
from starlette.responses import StreamingResponse
|
||||
@ -39,10 +40,7 @@ def list_voices():
|
||||
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
||||
|
||||
ASCIIColors.yellow("Listing voices")
|
||||
voices=["main_voice"]
|
||||
voices_dir:Path=lollmsElfServer.lollms_paths.custom_voices_path
|
||||
voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
|
||||
return {"voices":voices}
|
||||
return {"voices":lollmsElfServer.tts.get_voices()}
|
||||
|
||||
@router.post("/set_voice")
|
||||
async def set_voice(request: Request):
|
||||
@ -70,6 +68,24 @@ async def set_voice(request: Request):
|
||||
return {"status":False,"error":str(ex)}
|
||||
|
||||
|
||||
class LollmsAudio2TextRequest(BaseModel):
|
||||
wave_file_path: str
|
||||
voice: str = None
|
||||
fn:str = None
|
||||
|
||||
@router.post("/audio2text")
|
||||
async def audio2text(request: LollmsAudio2TextRequest):
|
||||
if lollmsElfServer.config.headless_server_mode:
|
||||
return {"status":False,"error":"Code execution is blocked when in headless mode for obvious security reasons!"}
|
||||
|
||||
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
|
||||
return {"status":False,"error":"Code execution is blocked when the server is exposed outside for very obvious reasons!"}
|
||||
|
||||
result = lollmsElfServer.whisper.transcribe(str(request.wave_file_path))
|
||||
return PlainTextResponse(result)
|
||||
|
||||
|
||||
|
||||
class LollmsText2AudioRequest(BaseModel):
|
||||
text: str
|
||||
voice: str = None
|
||||
@ -94,67 +110,13 @@ async def text2Audio(request: LollmsText2AudioRequest):
|
||||
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
|
||||
|
||||
try:
|
||||
# Get the JSON data from the POST request.
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
voice=lollmsElfServer.config.xtts_current_voice
|
||||
if lollmsElfServer.tts is None:
|
||||
voice=lollmsElfServer.config.xtts_current_voice
|
||||
if voice!="main_voice":
|
||||
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
|
||||
lollmsElfServer.tts = LollmsXTTS(
|
||||
lollmsElfServer,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=Path(__file__).parent/"voices",
|
||||
xtts_base_url= lollmsElfServer.config.xtts_base_url,
|
||||
use_deep_speed= lollmsElfServer.config.xtts_use_deep_speed,
|
||||
use_streaming_mode= lollmsElfServer.config.xtts_use_streaming_mode,
|
||||
)
|
||||
except Exception as ex:
|
||||
return {"url": None, "error":f"{ex}"}
|
||||
|
||||
voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
|
||||
index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
|
||||
output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
|
||||
if voice is None:
|
||||
voice = "main_voice"
|
||||
lollmsElfServer.info("Starting to build voice")
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
# If the personality has a voice, then use it
|
||||
personality_audio:Path = lollmsElfServer.personality.personality_package_path/"audio"
|
||||
if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
|
||||
voices_folder = personality_audio
|
||||
elif voice!="main_voice":
|
||||
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
if lollmsElfServer.tts is None:
|
||||
lollmsElfServer.tts = LollmsXTTS(
|
||||
lollmsElfServer,
|
||||
voices_folder=voices_folder,
|
||||
voice_samples_path=Path(__file__).parent/"voices",
|
||||
xtts_base_url= lollmsElfServer.config.xtts_base_url,
|
||||
use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
|
||||
use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode
|
||||
)
|
||||
if lollmsElfServer.tts.ready:
|
||||
language = lollmsElfServer.config.xtts_current_language# convert_language_name()
|
||||
lollmsElfServer.tts.set_speaker_folder(voices_folder)
|
||||
preprocessed_text= add_period(request.text)
|
||||
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
||||
if len(voice_file)==0:
|
||||
return {"status":False,"error":"Voice not found"}
|
||||
lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||
else:
|
||||
lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.")
|
||||
return {"status":False, "error":"Service not ready yet"}
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
return {"url": None}
|
||||
if lollmsElfServer.tts is None:
|
||||
return {"url": None, "error":f"No TTS service is on"}
|
||||
if lollmsElfServer.tts.ready:
|
||||
response = lollmsElfServer.tts.tts_to_audio(request.text, request.voice, file_name_or_path=request.fn)
|
||||
return response
|
||||
else:
|
||||
return {"url": None, "error":f"TTS service is not ready yet"}
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
lollmsElfServer.error(ex)
|
||||
@ -255,9 +217,9 @@ def install_xtts(data:Identification):
|
||||
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
|
||||
return {"status":False,"error":"Service installation is blocked when the server is exposed outside for very obvious reasons!"}
|
||||
|
||||
from lollms.services.xtts.lollms_xtts import install_xtts
|
||||
from lollms.services.xtts.lollms_xtts import LollmsTTS
|
||||
lollmsElfServer.ShowBlockingMessage("Installing xTTS api server\nPlease stand by")
|
||||
install_xtts(lollmsElfServer)
|
||||
LollmsTTS.install(lollmsElfServer)
|
||||
lollmsElfServer.HideBlockingMessage()
|
||||
return {"status":True}
|
||||
except Exception as ex:
|
||||
|
@ -1,10 +1,7 @@
|
||||
# Title LollmsDalle
|
||||
# Licence: MIT
|
||||
# Licence: Apache 2.0
|
||||
# Author : Paris Neo
|
||||
# Adapted from the work of mix1009's sdwebuiapi
|
||||
# check it out : https://github.com/mix1009/sdwebuiapi/tree/main
|
||||
# Here is a copy of the LICENCE https://github.com/mix1009/sdwebuiapi/blob/main/LICENSE
|
||||
# All rights are reserved
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
@ -29,17 +26,16 @@ from typing import List, Dict, Any
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.utilities import PackageManager, find_next_available_filename
|
||||
from lollms.tti import LollmsTTI
|
||||
import subprocess
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
import threading
|
||||
from io import BytesIO
|
||||
|
||||
def get_Dalli(lollms_paths:LollmsPaths):
|
||||
return LollmsDalle
|
||||
|
||||
class LollmsDalle:
|
||||
has_controlnet = False
|
||||
|
||||
class LollmsDalle(LollmsTTI):
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
@ -47,7 +43,7 @@ class LollmsDalle:
|
||||
generation_engine="dall-e-3",# other possibility "dall-e-2"
|
||||
output_path=None
|
||||
):
|
||||
self.app = app
|
||||
super().__init__(app)
|
||||
self.key = key
|
||||
self.generation_engine = generation_engine
|
||||
self.output_path = output_path
|
||||
@ -141,3 +137,6 @@ class LollmsDalle:
|
||||
ASCIIColors.red("Failed to download the image")
|
||||
|
||||
return file_name
|
||||
@staticmethod
|
||||
def get(app:LollmsApplication):
|
||||
return LollmsDalle
|
142
lollms/services/midjourney/lollms_midjourney.py
Normal file
142
lollms/services/midjourney/lollms_midjourney.py
Normal file
@ -0,0 +1,142 @@
|
||||
# Title LollmsMidjourney
|
||||
# Licence: Apache 2.0
|
||||
# Author : Paris Neo
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
import requests
|
||||
import os
|
||||
import base64
|
||||
import subprocess
|
||||
import time
|
||||
import json
|
||||
import platform
|
||||
from dataclasses import dataclass
|
||||
from PIL import Image, PngImagePlugin
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.utilities import PackageManager, find_next_available_filename
|
||||
from lollms.tti import LollmsTTI
|
||||
import subprocess
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
import threading
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
|
||||
class LollmsMidjourney(LollmsTTI):
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
key="",
|
||||
generation_engine="dall-e-3",# other possibility "dall-e-2"
|
||||
output_path=None
|
||||
):
|
||||
super().__init__(app)
|
||||
self.key = key
|
||||
self.generation_engine = generation_engine
|
||||
self.output_path = output_path
|
||||
|
||||
def paint(
|
||||
self,
|
||||
prompt,
|
||||
width=512,
|
||||
height=512,
|
||||
images = [],
|
||||
generation_engine=None,
|
||||
output_path = None
|
||||
):
|
||||
if output_path is None:
|
||||
output_path = self.output_path
|
||||
if generation_engine is None:
|
||||
generation_engine = self.generation_engine
|
||||
if not PackageManager.check_package_installed("openai"):
|
||||
PackageManager.install_package("openai")
|
||||
import openai
|
||||
openai.api_key = self.key
|
||||
if generation_engine=="dall-e-2":
|
||||
supported_resolutions = [
|
||||
[512, 512],
|
||||
[1024, 1024],
|
||||
]
|
||||
# Find the closest resolution
|
||||
closest_resolution = min(supported_resolutions, key=lambda res: abs(res[0] - width) + abs(res[1] - height))
|
||||
|
||||
else:
|
||||
supported_resolutions = [
|
||||
[1024, 1024],
|
||||
[1024, 1792],
|
||||
[1792, 1024]
|
||||
]
|
||||
# Find the closest resolution
|
||||
if width>height:
|
||||
closest_resolution = [1792, 1024]
|
||||
elif width<height:
|
||||
closest_resolution = [1024, 1792]
|
||||
else:
|
||||
closest_resolution = [1024, 1024]
|
||||
|
||||
|
||||
# Update the width and height
|
||||
width = closest_resolution[0]
|
||||
height = closest_resolution[1]
|
||||
|
||||
if len(images)>0 and generation_engine=="dall-e-2":
|
||||
# Read the image file from disk and resize it
|
||||
image = Image.open(self.personality.image_files[0])
|
||||
width, height = width, height
|
||||
image = image.resize((width, height))
|
||||
|
||||
# Convert the image to a BytesIO object
|
||||
byte_stream = BytesIO()
|
||||
image.save(byte_stream, format='PNG')
|
||||
byte_array = byte_stream.getvalue()
|
||||
response = openai.images.create_variation(
|
||||
image=byte_array,
|
||||
n=1,
|
||||
model=generation_engine, # for now only dalle 2 supports variations
|
||||
size=f"{width}x{height}"
|
||||
)
|
||||
else:
|
||||
response = openai.images.generate(
|
||||
model=generation_engine,
|
||||
prompt=prompt.strip(),
|
||||
quality="standard",
|
||||
size=f"{width}x{height}",
|
||||
n=1,
|
||||
|
||||
)
|
||||
# download image to outputs
|
||||
output_dir = Path(output_path)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
image_url = response.data[0].url
|
||||
|
||||
# Get the image data from the URL
|
||||
response = requests.get(image_url)
|
||||
|
||||
if response.status_code == 200:
|
||||
# Generate the full path for the image file
|
||||
file_name = output_dir/find_next_available_filename(output_dir, "img_dalle_") # You can change the filename if needed
|
||||
|
||||
# Save the image to the specified folder
|
||||
with open(file_name, "wb") as file:
|
||||
file.write(response.content)
|
||||
ASCIIColors.yellow(f"Image saved to {file_name}")
|
||||
else:
|
||||
ASCIIColors.red("Failed to download the image")
|
||||
|
||||
return file_name
|
||||
@staticmethod
|
||||
def get(app:LollmsApplication):
|
||||
return LollmsMidjourney
|
108
lollms/services/open_ai_tts/lollms_openai_tts.py
Normal file
108
lollms/services/open_ai_tts/lollms_openai_tts.py
Normal file
@ -0,0 +1,108 @@
|
||||
# Title LollmsOpenAITTS
|
||||
# Licence: MIT
|
||||
# Author : Paris Neo
|
||||
# Uses open AI api to perform text to speech
|
||||
#
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
import requests
|
||||
import os
|
||||
import base64
|
||||
import subprocess
|
||||
import time
|
||||
import json
|
||||
import platform
|
||||
from dataclasses import dataclass
|
||||
from PIL import Image, PngImagePlugin
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.utilities import PackageManager, find_next_available_filename
|
||||
from lollms.tts import LollmsTTS
|
||||
import subprocess
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
import threading
|
||||
from io import BytesIO
|
||||
from openai import OpenAI
|
||||
|
||||
if not PackageManager.check_package_installed("sounddevice"):
|
||||
PackageManager.install_package("sounddevice")
|
||||
if not PackageManager.check_package_installed("soundfile"):
|
||||
PackageManager.install_package("soundfile")
|
||||
|
||||
import sounddevice as sd
|
||||
import soundfile as sf
|
||||
|
||||
def get_Whisper(lollms_paths:LollmsPaths):
|
||||
return LollmsOpenAITTS
|
||||
|
||||
class LollmsOpenAITTS(LollmsTTS):
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
model ="tts-1",
|
||||
voice="alloy",
|
||||
api_key="",
|
||||
output_path=None
|
||||
):
|
||||
super().__init__(app, model, voice, api_key, output_path)
|
||||
self.client = OpenAI(api_key=api_key)
|
||||
self.voices = [
|
||||
"alloy",
|
||||
"echo",
|
||||
"fable",
|
||||
"nova",
|
||||
"shimmer"
|
||||
]
|
||||
self.models = [
|
||||
"tts-1"
|
||||
]
|
||||
|
||||
self.voice = voice
|
||||
self.output_path = output_path
|
||||
self.ready = True
|
||||
|
||||
|
||||
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
speech_file_path = file_name_or_path
|
||||
response = self.client.audio.speech.create(
|
||||
model=self.model,
|
||||
voice=self.voice,
|
||||
input=text,
|
||||
response_format="wav"
|
||||
|
||||
)
|
||||
|
||||
response.write_to_file(speech_file_path)
|
||||
|
||||
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
speech_file_path = file_name_or_path
|
||||
response = self.client.audio.speech.create(
|
||||
model=self.model,
|
||||
voice=self.voice,
|
||||
input=text,
|
||||
response_format="wav"
|
||||
|
||||
)
|
||||
|
||||
response.write_to_file(speech_file_path)
|
||||
def play_audio(file_path):
|
||||
# Read the audio file
|
||||
data, fs = sf.read(file_path, dtype='float32')
|
||||
# Play the audio file
|
||||
sd.play(data, fs)
|
||||
# Wait until the file is done playing
|
||||
sd.wait()
|
||||
|
||||
# Example usage
|
||||
play_audio(speech_file_path)
|
70
lollms/services/openai_whisper/lollms_whisper.py
Normal file
70
lollms/services/openai_whisper/lollms_whisper.py
Normal file
@ -0,0 +1,70 @@
|
||||
# Title LollmsOpenAIWhisper
|
||||
# Licence: MIT
|
||||
# Author : Paris Neo
|
||||
#
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
import requests
|
||||
import os
|
||||
import base64
|
||||
import subprocess
|
||||
import time
|
||||
import json
|
||||
import platform
|
||||
from dataclasses import dataclass
|
||||
from PIL import Image, PngImagePlugin
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.utilities import PackageManager, find_next_available_filename
|
||||
import subprocess
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
import threading
|
||||
from io import BytesIO
|
||||
from openai import OpenAI
|
||||
|
||||
|
||||
def get_Whisper(lollms_paths:LollmsPaths):
|
||||
return LollmsOpenAIWhisper
|
||||
|
||||
class LollmsOpenAIWhisper:
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
model="whisper-1",
|
||||
api_key="",
|
||||
output_path=None
|
||||
):
|
||||
self.client = OpenAI(api_key=api_key)
|
||||
self.app = app
|
||||
self.model = model
|
||||
self.output_path = output_path
|
||||
self.ready = True
|
||||
|
||||
def transcribe(
|
||||
self,
|
||||
wav_path: str|Path,
|
||||
model:str="",
|
||||
output_path:str|Path=None
|
||||
):
|
||||
if model=="" or model is None:
|
||||
model = self.model
|
||||
if output_path is None:
|
||||
output_path = self.output_path
|
||||
audio_file= open(str(wav_path), "rb")
|
||||
transcription = self.client.audio.transcriptions.create(
|
||||
model=model,
|
||||
file=audio_file,
|
||||
response_format="text"
|
||||
)
|
||||
return transcription
|
@ -28,18 +28,14 @@ from typing import List, Dict, Any
|
||||
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.tti import LollmsTTI
|
||||
from lollms.utilities import git_pull, show_yes_no_dialog, run_script_in_env, create_conda_env
|
||||
import subprocess
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
import threading
|
||||
|
||||
def verify_sd(lollms_paths:LollmsPaths):
|
||||
# Clone repository
|
||||
root_dir = lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
sd_folder = shared_folder / "auto_sd"
|
||||
return sd_folder.exists()
|
||||
|
||||
|
||||
def download_file(url, folder_path, local_filename):
|
||||
# Make sure 'folder_path' exists
|
||||
@ -137,20 +133,6 @@ def upgrade_sd(lollms_app:LollmsApplication):
|
||||
ASCIIColors.success("DONE")
|
||||
|
||||
|
||||
def get_sd(lollms_paths:LollmsPaths):
|
||||
root_dir = lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
sd_folder = shared_folder / "auto_sd"
|
||||
sd_script_path = sd_folder / "lollms_sd.py"
|
||||
git_pull(sd_folder)
|
||||
|
||||
if sd_script_path.exists():
|
||||
ASCIIColors.success("lollms_sd found.")
|
||||
ASCIIColors.success("Loading source file...",end="")
|
||||
# use importlib to load the module from the file path
|
||||
from lollms.services.sd.lollms_sd import LollmsSD
|
||||
ASCIIColors.success("ok")
|
||||
return LollmsSD
|
||||
|
||||
|
||||
def raw_b64_img(image: Image) -> str:
|
||||
@ -274,7 +256,7 @@ class ControlNetUnit:
|
||||
"pixel_perfect": self.pixel_perfect,
|
||||
}
|
||||
|
||||
class LollmsSD:
|
||||
class LollmsSD(LollmsTTI):
|
||||
has_controlnet = False
|
||||
def __init__(
|
||||
self,
|
||||
@ -290,19 +272,19 @@ class LollmsSD:
|
||||
share=False,
|
||||
wait_for_service=True
|
||||
):
|
||||
super().__init__(app)
|
||||
if auto_sd_base_url=="" or auto_sd_base_url=="http://127.0.0.1:7860":
|
||||
auto_sd_base_url = None
|
||||
self.ready = False
|
||||
# Get the current directory
|
||||
lollms_paths = app.lollms_paths
|
||||
self.app = app
|
||||
root_dir = lollms_paths.personal_path
|
||||
|
||||
self.wm = wm
|
||||
# Store the path to the script
|
||||
if auto_sd_base_url is None:
|
||||
self.auto_sd_base_url = "http://127.0.0.1:7860"
|
||||
if not verify_sd(lollms_paths):
|
||||
if not LollmsSD.verify(app):
|
||||
install_sd(app.lollms_paths)
|
||||
else:
|
||||
self.auto_sd_base_url = auto_sd_base_url
|
||||
@ -364,6 +346,30 @@ class LollmsSD:
|
||||
else:
|
||||
self.check_controlnet()
|
||||
|
||||
@staticmethod
|
||||
def verify(app:LollmsApplication):
|
||||
# Clone repository
|
||||
root_dir = app.lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
sd_folder = shared_folder / "auto_sd"
|
||||
return sd_folder.exists()
|
||||
|
||||
def get(app:LollmsApplication):
|
||||
root_dir = app.lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
sd_folder = shared_folder / "auto_sd"
|
||||
sd_script_path = sd_folder / "lollms_sd.py"
|
||||
git_pull(sd_folder)
|
||||
|
||||
if sd_script_path.exists():
|
||||
ASCIIColors.success("lollms_sd found.")
|
||||
ASCIIColors.success("Loading source file...",end="")
|
||||
# use importlib to load the module from the file path
|
||||
from lollms.services.sd.lollms_sd import LollmsSD
|
||||
ASCIIColors.success("ok")
|
||||
return LollmsSD
|
||||
|
||||
|
||||
def paint(
|
||||
self,
|
||||
sd_positive_prompt,
|
||||
|
42
lollms/services/whisper/lollms_whisper.py
Normal file
42
lollms/services/whisper/lollms_whisper.py
Normal file
@ -0,0 +1,42 @@
|
||||
# Title LollmsWhisper
|
||||
# Licence: MIT
|
||||
# Author : Paris Neo
|
||||
#
|
||||
|
||||
from pathlib import Path
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||
from lollms.utilities import PackageManager
|
||||
from lollms.stt import LollmsSTT
|
||||
from dataclasses import dataclass
|
||||
from PIL import Image, PngImagePlugin
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
import subprocess
|
||||
|
||||
if not PackageManager.check_package_installed("whisper"):
|
||||
PackageManager.install_package("whisper")
|
||||
import whisper
|
||||
|
||||
|
||||
class LollmsWhisper(LollmsSTT):
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
model="small",
|
||||
output_path=None
|
||||
):
|
||||
self.app = app
|
||||
self.output_path = output_path
|
||||
self.whisper = whisper.load_model(model)
|
||||
|
||||
def transcribe(
|
||||
self,
|
||||
wav_path: str|Path
|
||||
):
|
||||
result = self.whisper.transcribe(str(wav_path))
|
||||
return result
|
@ -11,7 +11,7 @@ import sys
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
|
||||
from lollms.utilities import PackageManager
|
||||
from lollms.utilities import PackageManager, find_first_available_file_index, add_period
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
@ -32,59 +32,11 @@ import uuid
|
||||
from ascii_colors import ASCIIColors, trace_exception
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.utilities import git_pull, show_yes_no_dialog, run_python_script_in_env, create_conda_env, run_pip_in_env, environment_exists
|
||||
from lollms.tts import LollmsTTS
|
||||
import subprocess
|
||||
import platform
|
||||
|
||||
def verify_xtts(lollms_paths:LollmsPaths):
|
||||
# Clone repository
|
||||
root_dir = lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
return xtts_path.exists()
|
||||
|
||||
def install_xtts(lollms_app:LollmsApplication):
|
||||
ASCIIColors.green("XTTS installation started")
|
||||
repo_url = "https://github.com/ParisNeo/xtts-api-server"
|
||||
root_dir = lollms_app.lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
|
||||
# Step 1: Clone or update the repository
|
||||
if os.path.exists(xtts_path):
|
||||
print("Repository already exists. Pulling latest changes...")
|
||||
try:
|
||||
subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
|
||||
except:
|
||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||
|
||||
else:
|
||||
print("Cloning repository...")
|
||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||
|
||||
# Step 2: Create or update the Conda environment
|
||||
if environment_exists("xtts"):
|
||||
print("Conda environment 'xtts' already exists. Updating...")
|
||||
# Here you might want to update the environment, e.g., update Python or dependencies
|
||||
# This step is highly dependent on how you manage your Conda environments and might involve
|
||||
# running `conda update` commands or similar.
|
||||
else:
|
||||
print("Creating Conda environment 'xtts'...")
|
||||
create_conda_env("xtts", "3.8")
|
||||
|
||||
# Step 3: Install or update dependencies using your custom function
|
||||
requirements_path = os.path.join(xtts_path, "requirements.txt")
|
||||
run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
|
||||
run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
|
||||
|
||||
# Step 4: Launch the server
|
||||
# Assuming the server can be started with a Python script in the cloned repository
|
||||
print("Launching XTTS API server...")
|
||||
run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
|
||||
|
||||
print("XTTS API server setup and launch completed.")
|
||||
ASCIIColors.cyan("Done")
|
||||
ASCIIColors.cyan("Installing xtts-api-server")
|
||||
ASCIIColors.green("XTTS server installed successfully")
|
||||
|
||||
|
||||
|
||||
@ -103,8 +55,7 @@ def get_xtts(lollms_paths:LollmsPaths):
|
||||
ASCIIColors.success("ok")
|
||||
return LollmsXTTS
|
||||
|
||||
class LollmsXTTS:
|
||||
has_controlnet = False
|
||||
class LollmsXTTS(LollmsTTS):
|
||||
def __init__(
|
||||
self,
|
||||
app:LollmsApplication,
|
||||
@ -117,6 +68,7 @@ class LollmsXTTS:
|
||||
use_deep_speed=False,
|
||||
use_streaming_mode = True
|
||||
):
|
||||
super().__init__(app)
|
||||
self.generation_threads = []
|
||||
self.voices_folder = voices_folder
|
||||
self.ready = False
|
||||
@ -124,7 +76,6 @@ class LollmsXTTS:
|
||||
xtts_base_url = None
|
||||
# Get the current directory
|
||||
lollms_paths = app.lollms_paths
|
||||
self.app = app
|
||||
root_dir = lollms_paths.personal_path
|
||||
self.voice_samples_path = voice_samples_path
|
||||
self.use_deep_speed = use_deep_speed
|
||||
@ -133,8 +84,8 @@ class LollmsXTTS:
|
||||
# Store the path to the script
|
||||
if xtts_base_url is None:
|
||||
self.xtts_base_url = "http://127.0.0.1:8020"
|
||||
if not verify_xtts(lollms_paths):
|
||||
install_xtts(app.lollms_paths)
|
||||
if not LollmsXTTS.verify(lollms_paths):
|
||||
LollmsXTTS.install(app)
|
||||
else:
|
||||
self.xtts_base_url = xtts_base_url
|
||||
|
||||
@ -167,6 +118,57 @@ class LollmsXTTS:
|
||||
else:
|
||||
self.wait_for_service_in_another_thread(max_retries=max_retries)
|
||||
|
||||
def install(lollms_app:LollmsApplication):
|
||||
ASCIIColors.green("XTTS installation started")
|
||||
repo_url = "https://github.com/ParisNeo/xtts-api-server"
|
||||
root_dir = lollms_app.lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
|
||||
# Step 1: Clone or update the repository
|
||||
if os.path.exists(xtts_path):
|
||||
print("Repository already exists. Pulling latest changes...")
|
||||
try:
|
||||
subprocess.run(["git", "-C", xtts_path, "pull"], check=True)
|
||||
except:
|
||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||
|
||||
else:
|
||||
print("Cloning repository...")
|
||||
subprocess.run(["git", "clone", repo_url, xtts_path], check=True)
|
||||
|
||||
# Step 2: Create or update the Conda environment
|
||||
if environment_exists("xtts"):
|
||||
print("Conda environment 'xtts' already exists. Updating...")
|
||||
# Here you might want to update the environment, e.g., update Python or dependencies
|
||||
# This step is highly dependent on how you manage your Conda environments and might involve
|
||||
# running `conda update` commands or similar.
|
||||
else:
|
||||
print("Creating Conda environment 'xtts'...")
|
||||
create_conda_env("xtts", "3.8")
|
||||
|
||||
# Step 3: Install or update dependencies using your custom function
|
||||
requirements_path = os.path.join(xtts_path, "requirements.txt")
|
||||
run_pip_in_env("xtts", f"install -r {requirements_path}", cwd=xtts_path)
|
||||
run_pip_in_env("xtts", f"install torch==2.1.1+cu118 torchaudio==2.1.1+cu118 --index-url https://download.pytorch.org/whl/cu118", cwd=xtts_path)
|
||||
|
||||
# Step 4: Launch the server
|
||||
# Assuming the server can be started with a Python script in the cloned repository
|
||||
print("Launching XTTS API server...")
|
||||
run_python_script_in_env("xtts", "xtts_api_server", cwd=xtts_path)
|
||||
|
||||
print("XTTS API server setup and launch completed.")
|
||||
ASCIIColors.cyan("Done")
|
||||
ASCIIColors.cyan("Installing xtts-api-server")
|
||||
ASCIIColors.green("XTTS server installed successfully")
|
||||
|
||||
@staticmethod
|
||||
def verify(lollms_paths:LollmsPaths)->bool:
|
||||
# Clone repository
|
||||
root_dir = lollms_paths.personal_path
|
||||
shared_folder = root_dir/"shared"
|
||||
xtts_path = shared_folder / "xtts"
|
||||
return xtts_path.exists()
|
||||
|
||||
def run_xtts_api_server(self):
|
||||
# Get the path to the current Python interpreter
|
||||
@ -198,7 +200,7 @@ class LollmsXTTS:
|
||||
if self.voices_folder is not None:
|
||||
print("Generating sample audio.")
|
||||
voice_file = [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
|
||||
self.tts_to_audio("x t t s is ready",voice_file[0].name)
|
||||
self.tts_to_audio("x t t s is ready",voice_file[0].stem)
|
||||
print("Service is available.")
|
||||
if self.app is not None:
|
||||
self.app.success("XTTS Service is now available.")
|
||||
@ -237,13 +239,13 @@ class LollmsXTTS:
|
||||
print("Request failed with status code:", response.status_code)
|
||||
return False
|
||||
|
||||
def tts_to_file(self, text, speaker_wav, file_name_or_path, language="en"):
|
||||
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
url = f"{self.xtts_base_url}/tts_to_file"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
"text": text,
|
||||
"speaker_wav": speaker_wav,
|
||||
"speaker_wav": speaker,
|
||||
"language": language,
|
||||
"file_name_or_path": file_name_or_path
|
||||
}
|
||||
@ -262,14 +264,43 @@ class LollmsXTTS:
|
||||
else:
|
||||
print("Request failed with status code:", response.status_code)
|
||||
|
||||
def tts_to_audio(self, text, speaker_wav, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
def tts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
voice=self.app.config.xtts_current_voice if speaker is None else speaker
|
||||
index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav")
|
||||
output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path
|
||||
if voice is None:
|
||||
voice = "main_voice"
|
||||
self.app.info("Starting to build voice")
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
# If the personality has a voice, then use it
|
||||
personality_audio:Path = self.app.personality.personality_package_path/"audio"
|
||||
if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
|
||||
voices_folder = personality_audio
|
||||
elif voice!="main_voice":
|
||||
voices_folder = self.app.lollms_paths.custom_voices_path
|
||||
else:
|
||||
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
|
||||
language = self.app.config.xtts_current_language# convert_language_name()
|
||||
self.set_speaker_folder(voices_folder)
|
||||
preprocessed_text= add_period(text)
|
||||
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
|
||||
if len(voice_file)==0:
|
||||
return {"status":False,"error":"Voice not found"}
|
||||
self.xtts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
|
||||
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
return {"status":False,"error":f"{ex}"}
|
||||
|
||||
def xtts_to_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False):
|
||||
def tts2_audio_th(thread_uid=None):
|
||||
url = f"{self.xtts_base_url}/tts_to_audio"
|
||||
|
||||
# Define the request body
|
||||
payload = {
|
||||
"text": text,
|
||||
"speaker_wav": speaker_wav,
|
||||
"speaker_wav": speaker,
|
||||
"language": language
|
||||
}
|
||||
headers = {
|
||||
@ -308,3 +339,10 @@ class LollmsXTTS:
|
||||
return thread
|
||||
else:
|
||||
return tts2_audio_th()
|
||||
|
||||
def get_voices(self):
|
||||
ASCIIColors.yellow("Listing voices")
|
||||
voices=["main_voice"]
|
||||
voices_dir:Path=self.app.lollms_paths.custom_voices_path
|
||||
voices += [v.stem for v in voices_dir.iterdir() if v.suffix==".wav"]
|
||||
return voices
|
||||
|
93
lollms/stt.py
Normal file
93
lollms/stt.py
Normal file
@ -0,0 +1,93 @@
|
||||
"""
|
||||
Lollms STT Module
|
||||
=================
|
||||
|
||||
This module is part of the Lollms library, designed to provide Speech-to-Text (STT) functionalities within the LollmsApplication framework. The base class `LollmsSTT` is intended to be inherited and implemented by other classes that provide specific STT functionalities.
|
||||
|
||||
Author: ParisNeo, a computer geek passionate about AI
|
||||
"""
|
||||
|
||||
from lollms.app import LollmsApplication
|
||||
from pathlib import Path
|
||||
|
||||
class LollmsSTT:
|
||||
"""
|
||||
LollmsSTT is a base class for implementing Speech-to-Text (STT) functionalities within the LollmsApplication.
|
||||
|
||||
Attributes:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
model (str): The STT model to be used for transcription.
|
||||
output_path (Path or str): Path where the output transcription files will be saved.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
app: LollmsApplication,
|
||||
model="",
|
||||
output_path=None
|
||||
):
|
||||
"""
|
||||
Initializes the LollmsSTT class with the given parameters.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
model (str, optional): The STT model to be used for transcription. Defaults to an empty string.
|
||||
output_path (Path or str, optional): Path where the output transcription files will be saved. Defaults to None.
|
||||
"""
|
||||
self.ready = False
|
||||
self.app = app
|
||||
self.output_path = output_path
|
||||
self.model = model
|
||||
|
||||
def transcribe(
|
||||
self,
|
||||
wav_path: str | Path,
|
||||
prompt=""
|
||||
):
|
||||
"""
|
||||
Transcribes the given audio file to text.
|
||||
|
||||
Args:
|
||||
wav_path (str or Path): The path to the WAV audio file to be transcribed.
|
||||
prompt (str, optional): An optional prompt to guide the transcription. Defaults to an empty string.
|
||||
"""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def verify(app: LollmsApplication) -> bool:
|
||||
"""
|
||||
Verifies if the STT service is available.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is available, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def install(app: LollmsApplication) -> bool:
|
||||
"""
|
||||
Installs the necessary components for the STT service.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
bool: True if the installation was successful, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def get(app: LollmsApplication) -> 'LollmsSTT':
|
||||
"""
|
||||
Returns the LollmsSTT class.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
LollmsSTT: The LollmsSTT class.
|
||||
"""
|
||||
return LollmsSTT
|
115
lollms/tti.py
Normal file
115
lollms/tti.py
Normal file
@ -0,0 +1,115 @@
|
||||
"""
|
||||
Lollms TTI Module
|
||||
=================
|
||||
|
||||
This module is part of the Lollms library, designed to provide Text-to-Image (TTI) functionalities within the LollmsApplication framework. The base class `LollmsTTI` is intended to be inherited and implemented by other classes that provide specific TTI functionalities.
|
||||
|
||||
Author: ParisNeo, a computer geek passionate about AI
|
||||
"""
|
||||
|
||||
from lollms.app import LollmsApplication
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
|
||||
class LollmsTTI:
|
||||
"""
|
||||
LollmsTTI is a base class for implementing Text-to-Image (TTI) functionalities within the LollmsApplication.
|
||||
|
||||
Attributes:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
model (str): The TTI model to be used for image generation.
|
||||
api_key (str): API key for accessing external TTI services (if needed).
|
||||
output_path (Path or str): Path where the output image files will be saved.
|
||||
voices (List[str]): List of available voices for TTI (to be filled by the child class).
|
||||
models (List[str]): List of available models for TTI (to be filled by the child class).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
app: LollmsApplication,
|
||||
model="",
|
||||
api_key="",
|
||||
output_path=None
|
||||
):
|
||||
"""
|
||||
Initializes the LollmsTTI class with the given parameters.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
model (str, optional): The TTI model to be used for image generation. Defaults to an empty string.
|
||||
api_key (str, optional): API key for accessing external TTI services. Defaults to an empty string.
|
||||
output_path (Path or str, optional): Path where the output image files will be saved. Defaults to None.
|
||||
"""
|
||||
self.ready = False
|
||||
self.app = app
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
self.output_path = output_path
|
||||
self.voices = [] # To be filled by the child class
|
||||
self.models = [] # To be filled by the child class
|
||||
|
||||
def paint(self, positive_prompt: str, negative_prompt: str = "") -> List[Dict[str, str]]:
|
||||
"""
|
||||
Generates images based on the given positive and negative prompts.
|
||||
|
||||
Args:
|
||||
positive_prompt (str): The positive prompt describing the desired image.
|
||||
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
|
||||
"""
|
||||
pass
|
||||
|
||||
def paint_from_images(self, positive_prompt: str, images: List[str], negative_prompt: str = "") -> List[Dict[str, str]]:
|
||||
"""
|
||||
Generates images based on the given positive prompt and reference images.
|
||||
|
||||
Args:
|
||||
positive_prompt (str): The positive prompt describing the desired image.
|
||||
images (List[str]): A list of paths to reference images.
|
||||
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
|
||||
"""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def verify(app: LollmsApplication) -> bool:
|
||||
"""
|
||||
Verifies if the TTI service is available.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is available, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def install(app: LollmsApplication) -> bool:
|
||||
"""
|
||||
Installs the necessary components for the TTI service.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
bool: True if the installation was successful, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def get(app: LollmsApplication) -> 'LollmsTTI':
|
||||
"""
|
||||
Returns the LollmsTTI class.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
LollmsTTI: The LollmsTTI class.
|
||||
"""
|
||||
return LollmsTTI
|
122
lollms/tts.py
Normal file
122
lollms/tts.py
Normal file
@ -0,0 +1,122 @@
|
||||
"""
|
||||
Lollms TTS Module
|
||||
=================
|
||||
|
||||
This module is part of the Lollms library, designed to provide Text-to-Speech (TTS) functionalities within the LollmsApplication framework. The base class `LollmsTTS` is intended to be inherited and implemented by other classes that provide specific TTS functionalities.
|
||||
|
||||
Author: ParisNeo, a computer geek passionate about AI
|
||||
"""
|
||||
from lollms.app import LollmsApplication
|
||||
from pathlib import Path
|
||||
|
||||
class LollmsTTS:
|
||||
"""
|
||||
LollmsTTS is a base class for implementing Text-to-Speech (TTS) functionalities within the LollmsApplication.
|
||||
|
||||
Attributes:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
voice (str): The voice model to be used for TTS.
|
||||
api_key (str): API key for accessing external TTS services (if needed).
|
||||
output_path (Path or str): Path where the output audio files will be saved.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
app: LollmsApplication,
|
||||
model="",
|
||||
voice="",
|
||||
api_key="",
|
||||
output_path=None
|
||||
):
|
||||
"""
|
||||
Initializes the LollmsTTS class with the given parameters.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
model (str, optional): The speach generation model to be used for TTS. Defaults to "".
|
||||
voice (str, optional): The voice model to be used for TTS. Defaults to "alloy".
|
||||
api_key (str, optional): API key for accessing external TTS services. Defaults to an empty string.
|
||||
output_path (Path or str, optional): Path where the output audio files will be saved. Defaults to None.
|
||||
"""
|
||||
self.ready = False
|
||||
self.app = app
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.api_key = api_key
|
||||
self.output_path = output_path
|
||||
self.voices = [] # To be filled by the child class
|
||||
self.models = [] # To be filled by the child class
|
||||
|
||||
def tts_to_file(self, text, speaker, file_name_or_path, language="en"):
|
||||
"""
|
||||
Converts the given text to speech and saves it to a file.
|
||||
|
||||
Args:
|
||||
text (str): The text to be converted to speech.
|
||||
speaker (str): The speaker/voice model to be used.
|
||||
file_name_or_path (Path or str): The name or path of the output file.
|
||||
language (str, optional): The language of the text. Defaults to "en".
|
||||
"""
|
||||
pass
|
||||
|
||||
def tts_to_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||
"""
|
||||
Converts the given text to speech and returns the audio data.
|
||||
|
||||
Args:
|
||||
text (str): The text to be converted to speech.
|
||||
speaker (str): The speaker/voice model to be used.
|
||||
file_name_or_path (Path or str, optional): The name or path of the output file. Defaults to None.
|
||||
language (str, optional): The language of the text. Defaults to "en".
|
||||
use_threading (bool, optional): Whether to use threading for the operation. Defaults to False.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@staticmethod
|
||||
def verify(app: LollmsApplication) -> bool:
|
||||
"""
|
||||
Verifies if the TTS service is available.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
bool: True if the service is available, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def install(app: LollmsApplication) -> bool:
|
||||
"""
|
||||
Installs the necessary components for the TTS service.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
bool: True if the installation was successful, False otherwise.
|
||||
"""
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def get(app: LollmsApplication) -> 'LollmsTTS':
|
||||
"""
|
||||
Returns the LollmsTTS class.
|
||||
|
||||
Args:
|
||||
app (LollmsApplication): The instance of the main Lollms application.
|
||||
|
||||
Returns:
|
||||
LollmsTTS: The LollmsTTS class.
|
||||
"""
|
||||
return LollmsTTS
|
||||
|
||||
def get_voices(self):
|
||||
"""
|
||||
Retrieves the available voices for TTS.
|
||||
|
||||
Returns:
|
||||
list: A list of available voices.
|
||||
"""
|
||||
return self.voices
|
Loading…
Reference in New Issue
Block a user