mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-03-13 15:56:37 +00:00
enhanced core
This commit is contained in:
parent
27538a5f3e
commit
0739f1c279
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 156
|
||||
version: 157
|
||||
|
||||
# video viewing and news recovering
|
||||
last_viewed_video: null
|
||||
@ -111,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
|
||||
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
|
||||
active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
@ -221,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
|
||||
# ***************** TTV *****************
|
||||
# Novita_ai configuration
|
||||
novita_ai_key: ""
|
||||
|
||||
cog_video_x_model: "THUDM/CogVideoX-5b"
|
||||
|
||||
# lumalabs configuration
|
||||
@ -360,6 +363,8 @@ thinking_prompt: "Use a think first process to answer the user:
|
||||
|
||||
mounted_function_calls: []
|
||||
# { name: the function name,
|
||||
# author: the author of the function
|
||||
# category: the category of the function
|
||||
# value: the function name without spaces,
|
||||
# selected: selected or not,
|
||||
# icon: the icon in form feather:icon name or img:url or b64:base64,
|
||||
|
@ -624,12 +624,18 @@ class LollmsApplication(LoLLMsCom):
|
||||
ASCIIColors.execute_with_animation("Loading loacal TTI services", start_tti, ASCIIColors.color_blue)
|
||||
|
||||
def start_ttv(*args, **kwargs):
|
||||
if self.config.active_ttv_service == "lumalabs" and (self.ttv is None or self.tti.name!="lumalabs"):
|
||||
if self.config.active_ttv_service == "lumalabs" and (self.ttv is None or self.ttv.name!="lumalabs"):
|
||||
try:
|
||||
from lollms.services.ttv.lumalabs.lollms_lumalabs import LollmsLumaLabs
|
||||
self.sd = LollmsLumaLabs(self.config.lumalabs_key)
|
||||
self.ttv = LollmsLumaLabs(self.config.lumalabs_key)
|
||||
except:
|
||||
self.warning(f"Couldn't load SD")
|
||||
self.warning(f"Couldn't create lumalabs binding")
|
||||
if self.config.active_ttv_service == "novita_ai" and (self.ttv is None or self.ttv.name!="novita_ai"):
|
||||
try:
|
||||
from lollms.services.ttv.novita_ai.lollms_novita_ai import LollmsNovitaAITextToVideo
|
||||
self.ttv = LollmsNovitaAITextToVideo(self.config.novita_ai_key)
|
||||
except:
|
||||
self.warning(f"Couldn't create novita ai bvinding")
|
||||
|
||||
|
||||
ASCIIColors.execute_with_animation("Loading loacal TTV services", start_ttv, ASCIIColors.color_blue)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 156
|
||||
version: 157
|
||||
|
||||
# video viewing and news recovering
|
||||
last_viewed_video: null
|
||||
@ -111,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
|
||||
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
|
||||
active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
@ -221,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
|
||||
# ***************** TTV *****************
|
||||
# Novita_ai configuration
|
||||
novita_ai_key: ""
|
||||
|
||||
cog_video_x_model: "THUDM/CogVideoX-5b"
|
||||
|
||||
# lumalabs configuration
|
||||
@ -360,6 +363,8 @@ thinking_prompt: "Use a think first process to answer the user:
|
||||
|
||||
mounted_function_calls: []
|
||||
# { name: the function name,
|
||||
# author: the author of the function
|
||||
# category: the category of the function
|
||||
# value: the function name without spaces,
|
||||
# selected: selected or not,
|
||||
# icon: the icon in form feather:icon name or img:url or b64:base64,
|
||||
|
@ -1,15 +1,16 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 149
|
||||
version: 157
|
||||
|
||||
# video viewing and news recovering
|
||||
last_viewed_video: null
|
||||
last_viewed_changelog_version: null
|
||||
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
model_type: null
|
||||
|
||||
show_news_panel: true
|
||||
show_news_panel: false
|
||||
|
||||
# Security measures
|
||||
turn_on_setting_update_validation: true
|
||||
@ -110,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
|
||||
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
|
||||
active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
@ -220,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
|
||||
# ***************** TTV *****************
|
||||
# Novita_ai configuration
|
||||
novita_ai_key: ""
|
||||
|
||||
cog_video_x_model: "THUDM/CogVideoX-5b"
|
||||
|
||||
# lumalabs configuration
|
||||
@ -336,10 +340,39 @@ positive_boost: null
|
||||
negative_boost: null
|
||||
current_language: english
|
||||
fun_mode: false
|
||||
think_first_mode: false
|
||||
thinking_prompt: "Use a think first process to answer the user:
|
||||
<think>
|
||||
Ask yourself about the user's request and answer it with logical details.
|
||||
If the user is requesting general information that does not require internet search and you are confident about it, then prepare to answer directly.
|
||||
If the user is requesting general information that does require internet search and you have in the context enough information to answer, then use that data to answer.
|
||||
If the user is requesting general information that does require internet search but you do not have any information, then ask him to activate internet search.
|
||||
|
||||
if the user is posing a riddle or asking a math question, make sure you use regourous math hypothisis, testing and analysis.
|
||||
If the user is requesting to perform a task, then plan it through steps and prepare to answer
|
||||
If the user is just discussing casually, do not perform the think first process
|
||||
|
||||
Make sure you continue thinking until you find a satisfactory answer
|
||||
Assess any potential errors you may make
|
||||
</think>
|
||||
|
||||
After thinking you can answer the user."
|
||||
|
||||
|
||||
|
||||
|
||||
mounted_function_calls: []
|
||||
# { name: the function name,
|
||||
# author: the author of the function
|
||||
# category: the category of the function
|
||||
# value: the function name without spaces,
|
||||
# selected: selected or not,
|
||||
# icon: the icon in form feather:icon name or img:url or b64:base64,
|
||||
# help: the help
|
||||
# }
|
||||
|
||||
# webui configurations
|
||||
show_code_of_conduct: true
|
||||
activate_audio_infos: true
|
||||
|
||||
|
||||
keep_thoughts: false
|
||||
|
40
lollms/server/endpoints/lollms_ttm.py
Normal file
40
lollms/server/endpoints/lollms_ttm.py
Normal file
@ -0,0 +1,40 @@
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from base64 import b64encode
|
||||
import io
|
||||
from PIL import Image
|
||||
from fastapi import APIRouter
|
||||
from lollms_webui import LOLLMSWebUI
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from ascii_colors import trace_exception
|
||||
from lollms.security import check_access
|
||||
|
||||
router = APIRouter()
|
||||
lollmsElfServer = LOLLMSWebUI.get_instance()
|
||||
|
||||
|
||||
# Define a Pydantic model for the request body
|
||||
class TTMServiceRequest(BaseModel):
|
||||
client_id: str
|
||||
|
||||
@router.post("/list_ttm_services", response_model=List[str])
|
||||
async def list_ttm_services(request: TTMServiceRequest):
|
||||
"""
|
||||
Dumb endpoint that returns a static list of TTM services.
|
||||
|
||||
Args:
|
||||
request (TTMServiceRequest): The request body containing the client_id.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of TTV service names.
|
||||
"""
|
||||
# Validate the client_id (dumb validation for demonstration)
|
||||
check_access(lollmsElfServer, request.client_id)
|
||||
|
||||
|
||||
# Static list of TTV services
|
||||
ttm_services = ["suno"]
|
||||
|
||||
return ttm_services
|
40
lollms/server/endpoints/lollms_ttv.py
Normal file
40
lollms/server/endpoints/lollms_ttv.py
Normal file
@ -0,0 +1,40 @@
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from base64 import b64encode
|
||||
import io
|
||||
from PIL import Image
|
||||
from fastapi import APIRouter
|
||||
from lollms_webui import LOLLMSWebUI
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from ascii_colors import trace_exception
|
||||
from lollms.security import check_access
|
||||
|
||||
router = APIRouter()
|
||||
lollmsElfServer = LOLLMSWebUI.get_instance()
|
||||
|
||||
|
||||
# Define a Pydantic model for the request body
|
||||
class TTVServiceRequest(BaseModel):
|
||||
client_id: str
|
||||
|
||||
@router.post("/list_ttv_services", response_model=List[str])
|
||||
async def list_ttv_services(request: TTVServiceRequest):
|
||||
"""
|
||||
Dumb endpoint that returns a static list of TTV services.
|
||||
|
||||
Args:
|
||||
request (TTVServiceRequest): The request body containing the client_id.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of TTV service names.
|
||||
"""
|
||||
# Validate the client_id (dumb validation for demonstration)
|
||||
check_access(lollmsElfServer, request.client_id)
|
||||
|
||||
|
||||
# Static list of TTV services
|
||||
ttv_services = ["novita_ai", "cog_video_x", "diffusers", "lumalab"]
|
||||
|
||||
return ttv_services
|
127
lollms/services/ttv/novita_ai/lollms_novita_ai.py
Normal file
127
lollms/services/ttv/novita_ai/lollms_novita_ai.py
Normal file
@ -0,0 +1,127 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from lollms.ttv import LollmsTTV
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
|
||||
class LollmsNovitaAITextToVideo(LollmsTTV):
|
||||
"""
|
||||
A binding for the Novita.ai Text-to-Video API.
|
||||
This class allows generating videos from text prompts using the Novita.ai service.
|
||||
"""
|
||||
def __init__(self, api_key: str, base_url: str = "https://api.novita.ai/v3/async"):
|
||||
"""
|
||||
Initializes the NovitaAITextToVideo binding.
|
||||
|
||||
Args:
|
||||
api_key (str): The API key for authentication.
|
||||
base_url (str): The base URL for the Novita.ai API. Defaults to "https://api.novita.ai/v3/async".
|
||||
"""
|
||||
super().__init__("novita_ai")
|
||||
if api_key is None:
|
||||
# Check for the NOVITA_AI_KEY environment variable if no API key is provided
|
||||
api_key = os.getenv("NOVITA_AI_KEY","")
|
||||
if api_key is None:
|
||||
raise ValueError("No API key provided and NOVITA_AI_KEY environment variable is not set.")
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
|
||||
def generate_video(
|
||||
self,
|
||||
prompt: str,
|
||||
negative_prompt: Optional[str] = None,
|
||||
model_name: str = "darkSushiMixMix_225D_64380.safetensors",
|
||||
height: int = 512,
|
||||
width: int = 512,
|
||||
steps: int = 20,
|
||||
seed: int = -1,
|
||||
guidance_scale: Optional[float] = None,
|
||||
loras: Optional[List[Dict[str, Any]]] = None,
|
||||
embeddings: Optional[List[Dict[str, Any]]] = None,
|
||||
closed_loop: Optional[bool] = None,
|
||||
clip_skip: Optional[int] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Generates a video from text prompts using the Novita.ai API.
|
||||
|
||||
Args:
|
||||
model_name (str): Name of the model checkpoint.
|
||||
height (int): Height of the video, range [256, 1024].
|
||||
width (int): Width of the video, range [256, 1024].
|
||||
steps (int): Number of denoising steps, range [1, 50].
|
||||
prompts (List[Dict[str, Any]]): List of prompts with frames and text descriptions.
|
||||
negative_prompt (Optional[str]): Text input to avoid in the video. Defaults to None.
|
||||
seed (int): Random seed for reproducibility. Defaults to -1.
|
||||
guidance_scale (Optional[float]): Controls adherence to the prompt. Defaults to None.
|
||||
loras (Optional[List[Dict[str, Any]]]): List of LoRA parameters. Defaults to None.
|
||||
embeddings (Optional[List[Dict[str, Any]]]): List of embeddings. Defaults to None.
|
||||
closed_loop (Optional[bool]): Controls animation loop behavior. Defaults to None.
|
||||
clip_skip (Optional[int]): Number of layers to skip during optimization. Defaults to None.
|
||||
|
||||
Returns:
|
||||
str: The task_id for retrieving the generated video.
|
||||
"""
|
||||
url = f"{self.base_url}/txt2video"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model_name": model_name,
|
||||
"height": height,
|
||||
"width": width,
|
||||
"steps": steps,
|
||||
"prompts": [prompt],
|
||||
"negative_prompt": negative_prompt,
|
||||
"seed": seed,
|
||||
"guidance_scale": guidance_scale,
|
||||
"loras": loras,
|
||||
"embeddings": embeddings,
|
||||
"closed_loop": closed_loop,
|
||||
"clip_skip": clip_skip,
|
||||
}
|
||||
# Remove None values from the payload to avoid sending null fields
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
|
||||
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
||||
response.raise_for_status() # Raise an exception for HTTP errors
|
||||
|
||||
return response.json().get("task_id")
|
||||
|
||||
def get_task_result(self, task_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieves the result of a video generation task using the task_id.
|
||||
|
||||
Args:
|
||||
task_id (str): The task_id returned by the generate_video method.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: The task result containing the video URL and other details.
|
||||
"""
|
||||
url = f"{self.base_url}/task-result"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
}
|
||||
params = {
|
||||
"task_id": task_id,
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
response.raise_for_status() # Raise an exception for HTTP errors
|
||||
|
||||
return response.json()
|
||||
|
||||
def download_video(self, video_url: str, save_path: Path) -> None:
|
||||
"""
|
||||
Downloads the generated video from the provided URL and saves it to the specified path.
|
||||
|
||||
Args:
|
||||
video_url (str): The URL of the video to download.
|
||||
save_path (Path): The path where the video will be saved.
|
||||
"""
|
||||
response = requests.get(video_url)
|
||||
response.raise_for_status() # Raise an exception for HTTP errors
|
||||
|
||||
with open(save_path, "wb") as file:
|
||||
file.write(response.content)
|
@ -2,8 +2,51 @@ from abc import ABC, abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
class LollmsTTV(ABC):
|
||||
"""
|
||||
Abstract base class for text-to-video generation services.
|
||||
Subclasses must implement the methods to generate videos from text prompts.
|
||||
"""
|
||||
def __init__(self, service_name):
|
||||
self.name = service_name
|
||||
|
||||
@abstractmethod
|
||||
def generate_video(self, prompt: str, num_frames: int = 49, fps: int = 8,
|
||||
def generate_video(self, prompt: str, negative_prompt: str, num_frames: int = 49, fps: int = 8,
|
||||
num_inference_steps: int = 50, guidance_scale: float = 6.0,
|
||||
seed: Optional[int] = None) -> str:
|
||||
pass
|
||||
"""
|
||||
Generates a video from a single text prompt.
|
||||
|
||||
Args:
|
||||
prompt (str): The text prompt describing the video.
|
||||
negative_prompt (str): Text describing elements to avoid in the video.
|
||||
num_frames (int): Number of frames in the video. Default is 49.
|
||||
fps (int): Frames per second. Default is 8.
|
||||
num_inference_steps (int): Number of steps for the model to infer. Default is 50.
|
||||
guidance_scale (float): Controls how closely the model adheres to the prompt. Default is 6.0.
|
||||
seed (Optional[int]): Random seed for reproducibility. Default is None.
|
||||
|
||||
Returns:
|
||||
str: The path to the generated video.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def generate_video_by_frames(self, prompts: List[str], frames: List[int], negative_prompt: str, fps: int = 8,
|
||||
num_inference_steps: int = 50, guidance_scale: float = 6.0,
|
||||
seed: Optional[int] = None) -> str:
|
||||
"""
|
||||
Generates a video from a list of prompts and corresponding frames.
|
||||
|
||||
Args:
|
||||
prompts (List[str]): List of text prompts for each frame.
|
||||
frames (List[int]): List of frame indices corresponding to each prompt.
|
||||
negative_prompt (str): Text describing elements to avoid in the video.
|
||||
fps (int): Frames per second. Default is 8.
|
||||
num_inference_steps (int): Number of steps for the model to infer. Default is 50.
|
||||
guidance_scale (float): Controls how closely the model adheres to the prompt. Default is 6.0.
|
||||
seed (Optional[int]): Random seed for reproducibility. Default is None.
|
||||
|
||||
Returns:
|
||||
str: The path to the generated video.
|
||||
"""
|
||||
pass
|
||||
|
Loading…
x
Reference in New Issue
Block a user