enhanced core

This commit is contained in:
Saifeddine ALOUI 2025-03-01 22:54:42 +01:00
parent 27538a5f3e
commit 0739f1c279
8 changed files with 312 additions and 13 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 156
version: 157
# video viewing and news recovering
last_viewed_video: null
@ -111,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
# -------------------- Services --------------------------
# ***************** STT *****************
@ -221,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861
# ***************** TTV *****************
# Novita_ai configuration
novita_ai_key: ""
cog_video_x_model: "THUDM/CogVideoX-5b"
# lumalabs configuration
@ -360,6 +363,8 @@ thinking_prompt: "Use a think first process to answer the user:
mounted_function_calls: []
# { name: the function name,
# author: the author of the function
# category: the category of the function
# value: the function name without spaces,
# selected: selected or not,
# icon: the icon in form feather:icon name or img:url or b64:base64,

View File

@ -624,12 +624,18 @@ class LollmsApplication(LoLLMsCom):
ASCIIColors.execute_with_animation("Loading loacal TTI services", start_tti, ASCIIColors.color_blue)
def start_ttv(*args, **kwargs):
if self.config.active_ttv_service == "lumalabs" and (self.ttv is None or self.tti.name!="lumalabs"):
if self.config.active_ttv_service == "lumalabs" and (self.ttv is None or self.ttv.name!="lumalabs"):
try:
from lollms.services.ttv.lumalabs.lollms_lumalabs import LollmsLumaLabs
self.sd = LollmsLumaLabs(self.config.lumalabs_key)
self.ttv = LollmsLumaLabs(self.config.lumalabs_key)
except:
self.warning(f"Couldn't load SD")
self.warning(f"Couldn't create lumalabs binding")
if self.config.active_ttv_service == "novita_ai" and (self.ttv is None or self.ttv.name!="novita_ai"):
try:
from lollms.services.ttv.novita_ai.lollms_novita_ai import LollmsNovitaAITextToVideo
self.ttv = LollmsNovitaAITextToVideo(self.config.novita_ai_key)
except:
self.warning(f"Couldn't create novita ai bvinding")
ASCIIColors.execute_with_animation("Loading loacal TTV services", start_ttv, ASCIIColors.color_blue)

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 156
version: 157
# video viewing and news recovering
last_viewed_video: null
@ -111,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
# -------------------- Services --------------------------
# ***************** STT *****************
@ -221,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861
# ***************** TTV *****************
# Novita_ai configuration
novita_ai_key: ""
cog_video_x_model: "THUDM/CogVideoX-5b"
# lumalabs configuration
@ -360,6 +363,8 @@ thinking_prompt: "Use a think first process to answer the user:
mounted_function_calls: []
# { name: the function name,
# author: the author of the function
# category: the category of the function
# value: the function name without spaces,
# selected: selected or not,
# icon: the icon in form feather:icon name or img:url or b64:base64,

View File

@ -1,15 +1,16 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 149
version: 157
# video viewing and news recovering
last_viewed_video: null
last_viewed_changelog_version: null
binding_name: null
model_name: null
model_variant: null
model_type: null
show_news_panel: true
show_news_panel: false
# Security measures
turn_on_setting_update_validation: true
@ -110,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
# -------------------- Services --------------------------
# ***************** STT *****************
@ -220,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861
# ***************** TTV *****************
# Novita_ai configuration
novita_ai_key: ""
cog_video_x_model: "THUDM/CogVideoX-5b"
# lumalabs configuration
@ -336,10 +340,39 @@ positive_boost: null
negative_boost: null
current_language: english
fun_mode: false
think_first_mode: false
thinking_prompt: "Use a think first process to answer the user:
<think>
Ask yourself about the user's request and answer it with logical details.
If the user is requesting general information that does not require internet search and you are confident about it, then prepare to answer directly.
If the user is requesting general information that does require internet search and you have in the context enough information to answer, then use that data to answer.
If the user is requesting general information that does require internet search but you do not have any information, then ask him to activate internet search.
if the user is posing a riddle or asking a math question, make sure you use regourous math hypothisis, testing and analysis.
If the user is requesting to perform a task, then plan it through steps and prepare to answer
If the user is just discussing casually, do not perform the think first process
Make sure you continue thinking until you find a satisfactory answer
Assess any potential errors you may make
</think>
After thinking you can answer the user."
mounted_function_calls: []
# { name: the function name,
# author: the author of the function
# category: the category of the function
# value: the function name without spaces,
# selected: selected or not,
# icon: the icon in form feather:icon name or img:url or b64:base64,
# help: the help
# }
# webui configurations
show_code_of_conduct: true
activate_audio_infos: true
keep_thoughts: false

View File

@ -0,0 +1,40 @@
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import Optional
from base64 import b64encode
import io
from PIL import Image
from fastapi import APIRouter
from lollms_webui import LOLLMSWebUI
from pydantic import BaseModel
from typing import List
from ascii_colors import trace_exception
from lollms.security import check_access
router = APIRouter()
lollmsElfServer = LOLLMSWebUI.get_instance()
# Define a Pydantic model for the request body
class TTMServiceRequest(BaseModel):
client_id: str
@router.post("/list_ttm_services", response_model=List[str])
async def list_ttm_services(request: TTMServiceRequest):
"""
Dumb endpoint that returns a static list of TTM services.
Args:
request (TTMServiceRequest): The request body containing the client_id.
Returns:
List[str]: A list of TTV service names.
"""
# Validate the client_id (dumb validation for demonstration)
check_access(lollmsElfServer, request.client_id)
# Static list of TTV services
ttm_services = ["suno"]
return ttm_services

View File

@ -0,0 +1,40 @@
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import Optional
from base64 import b64encode
import io
from PIL import Image
from fastapi import APIRouter
from lollms_webui import LOLLMSWebUI
from pydantic import BaseModel
from typing import List
from ascii_colors import trace_exception
from lollms.security import check_access
router = APIRouter()
lollmsElfServer = LOLLMSWebUI.get_instance()
# Define a Pydantic model for the request body
class TTVServiceRequest(BaseModel):
client_id: str
@router.post("/list_ttv_services", response_model=List[str])
async def list_ttv_services(request: TTVServiceRequest):
"""
Dumb endpoint that returns a static list of TTV services.
Args:
request (TTVServiceRequest): The request body containing the client_id.
Returns:
List[str]: A list of TTV service names.
"""
# Validate the client_id (dumb validation for demonstration)
check_access(lollmsElfServer, request.client_id)
# Static list of TTV services
ttv_services = ["novita_ai", "cog_video_x", "diffusers", "lumalab"]
return ttv_services

View File

@ -0,0 +1,127 @@
from pathlib import Path
from typing import List, Optional, Dict, Any
from lollms.ttv import LollmsTTV
import requests
import json
import os
class LollmsNovitaAITextToVideo(LollmsTTV):
"""
A binding for the Novita.ai Text-to-Video API.
This class allows generating videos from text prompts using the Novita.ai service.
"""
def __init__(self, api_key: str, base_url: str = "https://api.novita.ai/v3/async"):
"""
Initializes the NovitaAITextToVideo binding.
Args:
api_key (str): The API key for authentication.
base_url (str): The base URL for the Novita.ai API. Defaults to "https://api.novita.ai/v3/async".
"""
super().__init__("novita_ai")
if api_key is None:
# Check for the NOVITA_AI_KEY environment variable if no API key is provided
api_key = os.getenv("NOVITA_AI_KEY","")
if api_key is None:
raise ValueError("No API key provided and NOVITA_AI_KEY environment variable is not set.")
self.api_key = api_key
self.base_url = base_url
def generate_video(
self,
prompt: str,
negative_prompt: Optional[str] = None,
model_name: str = "darkSushiMixMix_225D_64380.safetensors",
height: int = 512,
width: int = 512,
steps: int = 20,
seed: int = -1,
guidance_scale: Optional[float] = None,
loras: Optional[List[Dict[str, Any]]] = None,
embeddings: Optional[List[Dict[str, Any]]] = None,
closed_loop: Optional[bool] = None,
clip_skip: Optional[int] = None,
) -> str:
"""
Generates a video from text prompts using the Novita.ai API.
Args:
model_name (str): Name of the model checkpoint.
height (int): Height of the video, range [256, 1024].
width (int): Width of the video, range [256, 1024].
steps (int): Number of denoising steps, range [1, 50].
prompts (List[Dict[str, Any]]): List of prompts with frames and text descriptions.
negative_prompt (Optional[str]): Text input to avoid in the video. Defaults to None.
seed (int): Random seed for reproducibility. Defaults to -1.
guidance_scale (Optional[float]): Controls adherence to the prompt. Defaults to None.
loras (Optional[List[Dict[str, Any]]]): List of LoRA parameters. Defaults to None.
embeddings (Optional[List[Dict[str, Any]]]): List of embeddings. Defaults to None.
closed_loop (Optional[bool]): Controls animation loop behavior. Defaults to None.
clip_skip (Optional[int]): Number of layers to skip during optimization. Defaults to None.
Returns:
str: The task_id for retrieving the generated video.
"""
url = f"{self.base_url}/txt2video"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model_name": model_name,
"height": height,
"width": width,
"steps": steps,
"prompts": [prompt],
"negative_prompt": negative_prompt,
"seed": seed,
"guidance_scale": guidance_scale,
"loras": loras,
"embeddings": embeddings,
"closed_loop": closed_loop,
"clip_skip": clip_skip,
}
# Remove None values from the payload to avoid sending null fields
payload = {k: v for k, v in payload.items() if v is not None}
response = requests.post(url, headers=headers, data=json.dumps(payload))
response.raise_for_status() # Raise an exception for HTTP errors
return response.json().get("task_id")
def get_task_result(self, task_id: str) -> Dict[str, Any]:
"""
Retrieves the result of a video generation task using the task_id.
Args:
task_id (str): The task_id returned by the generate_video method.
Returns:
Dict[str, Any]: The task result containing the video URL and other details.
"""
url = f"{self.base_url}/task-result"
headers = {
"Authorization": f"Bearer {self.api_key}",
}
params = {
"task_id": task_id,
}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status() # Raise an exception for HTTP errors
return response.json()
def download_video(self, video_url: str, save_path: Path) -> None:
"""
Downloads the generated video from the provided URL and saves it to the specified path.
Args:
video_url (str): The URL of the video to download.
save_path (Path): The path where the video will be saved.
"""
response = requests.get(video_url)
response.raise_for_status() # Raise an exception for HTTP errors
with open(save_path, "wb") as file:
file.write(response.content)

View File

@ -2,8 +2,51 @@ from abc import ABC, abstractmethod
from typing import List, Optional
class LollmsTTV(ABC):
"""
Abstract base class for text-to-video generation services.
Subclasses must implement the methods to generate videos from text prompts.
"""
def __init__(self, service_name):
self.name = service_name
@abstractmethod
def generate_video(self, prompt: str, num_frames: int = 49, fps: int = 8,
def generate_video(self, prompt: str, negative_prompt: str, num_frames: int = 49, fps: int = 8,
num_inference_steps: int = 50, guidance_scale: float = 6.0,
seed: Optional[int] = None) -> str:
pass
"""
Generates a video from a single text prompt.
Args:
prompt (str): The text prompt describing the video.
negative_prompt (str): Text describing elements to avoid in the video.
num_frames (int): Number of frames in the video. Default is 49.
fps (int): Frames per second. Default is 8.
num_inference_steps (int): Number of steps for the model to infer. Default is 50.
guidance_scale (float): Controls how closely the model adheres to the prompt. Default is 6.0.
seed (Optional[int]): Random seed for reproducibility. Default is None.
Returns:
str: The path to the generated video.
"""
pass
@abstractmethod
def generate_video_by_frames(self, prompts: List[str], frames: List[int], negative_prompt: str, fps: int = 8,
num_inference_steps: int = 50, guidance_scale: float = 6.0,
seed: Optional[int] = None) -> str:
"""
Generates a video from a list of prompts and corresponding frames.
Args:
prompts (List[str]): List of text prompts for each frame.
frames (List[int]): List of frame indices corresponding to each prompt.
negative_prompt (str): Text describing elements to avoid in the video.
fps (int): Frames per second. Default is 8.
num_inference_steps (int): Number of steps for the model to infer. Default is 50.
guidance_scale (float): Controls how closely the model adheres to the prompt. Default is 6.0.
seed (Optional[int]): Random seed for reproducibility. Default is None.
Returns:
str: The path to the generated video.
"""
pass