enhanced core

2025-04-13 22:02:56 +00:00 · 2025-03-01 22:54:42 +01:00 · 2025-03-01 22:54:42 +01:00 · 0739f1c279
commit 0739f1c279
parent 27538a5f3e
8 changed files with 312 additions and 13 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 156
+version: 157

 # video viewing and news recovering
 last_viewed_video: null
@ -111,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
 active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
-active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
+active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
 # -------------------- Services --------------------------

 # ***************** STT *****************
@ -221,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861


 # ***************** TTV *****************
+# Novita_ai configuration
+novita_ai_key: ""
+
 cog_video_x_model: "THUDM/CogVideoX-5b"

 # lumalabs configuration
@ -360,6 +363,8 @@ thinking_prompt:   "Use a think first process to answer the user:

 mounted_function_calls: []
 # { name: the function name,
+#   author: the author of the function
+#   category: the category of the function
 #   value: the function name without spaces,
 #   selected: selected or not,
 #   icon: the icon in form feather:icon name or img:url or b64:base64,
--- a/lollms/app.py
+++ b/lollms/app.py
@ -624,12 +624,18 @@ class LollmsApplication(LoLLMsCom):
        ASCIIColors.execute_with_animation("Loading loacal TTI services", start_tti, ASCIIColors.color_blue)

        def start_ttv(*args, **kwargs):
-            if self.config.active_ttv_service == "lumalabs" and (self.ttv is None or self.tti.name!="lumalabs"):
+            if self.config.active_ttv_service == "lumalabs" and (self.ttv is None or self.ttv.name!="lumalabs"):
                try:
                    from lollms.services.ttv.lumalabs.lollms_lumalabs import LollmsLumaLabs
-                    self.sd = LollmsLumaLabs(self.config.lumalabs_key)
+                    self.ttv = LollmsLumaLabs(self.config.lumalabs_key)
                except:
-                    self.warning(f"Couldn't load SD")
+                    self.warning(f"Couldn't create lumalabs binding")
+            if self.config.active_ttv_service == "novita_ai" and (self.ttv is None or self.ttv.name!="novita_ai"):
+                try:
+                    from lollms.services.ttv.novita_ai.lollms_novita_ai import LollmsNovitaAITextToVideo
+                    self.ttv = LollmsNovitaAITextToVideo(self.config.novita_ai_key)
+                except:
+                    self.warning(f"Couldn't create novita ai bvinding")


        ASCIIColors.execute_with_animation("Loading loacal TTV services", start_ttv, ASCIIColors.color_blue)
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 156
+version: 157

 # video viewing and news recovering
 last_viewed_video: null
@ -111,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
 active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
-active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
+active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
 # -------------------- Services --------------------------

 # ***************** STT *****************
@ -221,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861


 # ***************** TTV *****************
+# Novita_ai configuration
+novita_ai_key: ""
+
 cog_video_x_model: "THUDM/CogVideoX-5b"

 # lumalabs configuration
@ -360,6 +363,8 @@ thinking_prompt:   "Use a think first process to answer the user:

 mounted_function_calls: []
 # { name: the function name,
+#   author: the author of the function
+#   category: the category of the function
 #   value: the function name without spaces,
 #   selected: selected or not,
 #   icon: the icon in form feather:icon name or img:url or b64:base64,
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -1,15 +1,16 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 149
+version: 157

 # video viewing and news recovering
 last_viewed_video: null
+last_viewed_changelog_version: null

 binding_name: null
 model_name: null
 model_variant: null
 model_type: null

-show_news_panel: true
+show_news_panel: false

 # Security measures
 turn_on_setting_update_validation: true
@ -110,7 +111,7 @@ active_tts_service: "None" # xtts (offline), openai_tts (API key required), elev
 active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
-active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
+active_ttv_service: "None" # novita_ai, cog_video_x, diffusers, lumalab (offline)
 # -------------------- Services --------------------------

 # ***************** STT *****************
@ -220,6 +221,9 @@ motion_ctrl_base_url: http://localhost:7861


 # ***************** TTV *****************
+# Novita_ai configuration
+novita_ai_key: ""
+
 cog_video_x_model: "THUDM/CogVideoX-5b"

 # lumalabs configuration
@ -336,10 +340,39 @@ positive_boost: null
 negative_boost: null
 current_language: english
 fun_mode: false
+think_first_mode: false
+thinking_prompt:   "Use a think first process to answer the user:
+  <think>
+  Ask yourself about the user's request and answer it with logical details.
+  If the user is requesting general information that does not require internet search and you are confident about it, then prepare to answer directly.
+  If the user is requesting general information that does require internet search and you have in the context enough information to answer, then use that data to answer.
+  If the user is requesting general information that does require internet search but you do not have any information, then ask him to activate internet search.

+  if the user is posing a riddle or asking a math question, make sure you use regourous math hypothisis, testing and analysis.
+  If the user is requesting to perform a task, then plan it through steps and prepare to answer
+  If the user is just discussing casually, do not perform the think first process
+
+  Make sure you continue thinking until you find a satisfactory answer
+  Assess any potential errors you may make
+  </think>
+
+  After thinking you can answer the user."
+
+
+
+
+mounted_function_calls: []
+# { name: the function name,
+#   author: the author of the function
+#   category: the category of the function
+#   value: the function name without spaces,
+#   selected: selected or not,
+#   icon: the icon in form feather:icon name or img:url or b64:base64,
+#   help: the help
+# }

 # webui configurations
 show_code_of_conduct: true
 activate_audio_infos: true

-
+keep_thoughts: false
--- a/lollms/server/endpoints/lollms_ttm.py
+++ b/lollms/server/endpoints/lollms_ttm.py
@ -0,0 +1,40 @@
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Optional
+from base64 import b64encode
+import io
+from PIL import Image
+from fastapi import APIRouter
+from lollms_webui import LOLLMSWebUI
+from pydantic import BaseModel
+from typing import List
+from ascii_colors import trace_exception
+from lollms.security import check_access
+
+router = APIRouter()
+lollmsElfServer = LOLLMSWebUI.get_instance()
+
+
+# Define a Pydantic model for the request body
+class TTMServiceRequest(BaseModel):
+    client_id: str
+
+@router.post("/list_ttm_services", response_model=List[str])
+async def list_ttm_services(request: TTMServiceRequest):
+    """
+    Dumb endpoint that returns a static list of TTM services.
+    
+    Args:
+        request (TTMServiceRequest): The request body containing the client_id.
+    
+    Returns:
+        List[str]: A list of TTV service names.
+    """
+    # Validate the client_id (dumb validation for demonstration)
+    check_access(lollmsElfServer, request.client_id)
+    
+    
+    # Static list of TTV services
+    ttm_services = ["suno"]
+    
+    return ttm_services
--- a/lollms/server/endpoints/lollms_ttv.py
+++ b/lollms/server/endpoints/lollms_ttv.py
@ -0,0 +1,40 @@
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Optional
+from base64 import b64encode
+import io
+from PIL import Image
+from fastapi import APIRouter
+from lollms_webui import LOLLMSWebUI
+from pydantic import BaseModel
+from typing import List
+from ascii_colors import trace_exception
+from lollms.security import check_access
+
+router = APIRouter()
+lollmsElfServer = LOLLMSWebUI.get_instance()
+
+
+# Define a Pydantic model for the request body
+class TTVServiceRequest(BaseModel):
+    client_id: str
+
+@router.post("/list_ttv_services", response_model=List[str])
+async def list_ttv_services(request: TTVServiceRequest):
+    """
+    Dumb endpoint that returns a static list of TTV services.
+    
+    Args:
+        request (TTVServiceRequest): The request body containing the client_id.
+    
+    Returns:
+        List[str]: A list of TTV service names.
+    """
+    # Validate the client_id (dumb validation for demonstration)
+    check_access(lollmsElfServer, request.client_id)
+    
+    
+    # Static list of TTV services
+    ttv_services = ["novita_ai", "cog_video_x", "diffusers", "lumalab"]
+    
+    return ttv_services
--- a/lollms/services/ttv/novita_ai/lollms_novita_ai.py
+++ b/lollms/services/ttv/novita_ai/lollms_novita_ai.py
@ -0,0 +1,127 @@
+from pathlib import Path
+from typing import List, Optional, Dict, Any
+from lollms.ttv import LollmsTTV
+import requests
+import json
+import os
+
+class LollmsNovitaAITextToVideo(LollmsTTV):
+    """
+    A binding for the Novita.ai Text-to-Video API.
+    This class allows generating videos from text prompts using the Novita.ai service.
+    """
+    def __init__(self, api_key: str, base_url: str = "https://api.novita.ai/v3/async"):
+        """
+        Initializes the NovitaAITextToVideo binding.
+
+        Args:
+            api_key (str): The API key for authentication.
+            base_url (str): The base URL for the Novita.ai API. Defaults to "https://api.novita.ai/v3/async".
+        """
+        super().__init__("novita_ai")
+        if api_key is None:
+            # Check for the NOVITA_AI_KEY environment variable if no API key is provided
+            api_key = os.getenv("NOVITA_AI_KEY","")
+            if api_key is None:
+                raise ValueError("No API key provided and NOVITA_AI_KEY environment variable is not set.")        
+        self.api_key = api_key
+        self.base_url = base_url
+
+    def generate_video(
+        self,
+        prompt: str,
+        negative_prompt: Optional[str] = None,
+        model_name: str = "darkSushiMixMix_225D_64380.safetensors",
+        height: int = 512,
+        width: int = 512,
+        steps: int = 20,
+        seed: int = -1,
+        guidance_scale: Optional[float] = None,
+        loras: Optional[List[Dict[str, Any]]] = None,
+        embeddings: Optional[List[Dict[str, Any]]] = None,
+        closed_loop: Optional[bool] = None,
+        clip_skip: Optional[int] = None,
+    ) -> str:
+        """
+        Generates a video from text prompts using the Novita.ai API.
+
+        Args:
+            model_name (str): Name of the model checkpoint.
+            height (int): Height of the video, range [256, 1024].
+            width (int): Width of the video, range [256, 1024].
+            steps (int): Number of denoising steps, range [1, 50].
+            prompts (List[Dict[str, Any]]): List of prompts with frames and text descriptions.
+            negative_prompt (Optional[str]): Text input to avoid in the video. Defaults to None.
+            seed (int): Random seed for reproducibility. Defaults to -1.
+            guidance_scale (Optional[float]): Controls adherence to the prompt. Defaults to None.
+            loras (Optional[List[Dict[str, Any]]]): List of LoRA parameters. Defaults to None.
+            embeddings (Optional[List[Dict[str, Any]]]): List of embeddings. Defaults to None.
+            closed_loop (Optional[bool]): Controls animation loop behavior. Defaults to None.
+            clip_skip (Optional[int]): Number of layers to skip during optimization. Defaults to None.
+
+        Returns:
+            str: The task_id for retrieving the generated video.
+        """
+        url = f"{self.base_url}/txt2video"
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "model_name": model_name,
+            "height": height,
+            "width": width,
+            "steps": steps,
+            "prompts": [prompt],
+            "negative_prompt": negative_prompt,
+            "seed": seed,
+            "guidance_scale": guidance_scale,
+            "loras": loras,
+            "embeddings": embeddings,
+            "closed_loop": closed_loop,
+            "clip_skip": clip_skip,
+        }
+        # Remove None values from the payload to avoid sending null fields
+        payload = {k: v for k, v in payload.items() if v is not None}
+
+        response = requests.post(url, headers=headers, data=json.dumps(payload))
+        response.raise_for_status()  # Raise an exception for HTTP errors
+
+        return response.json().get("task_id")
+
+    def get_task_result(self, task_id: str) -> Dict[str, Any]:
+        """
+        Retrieves the result of a video generation task using the task_id.
+
+        Args:
+            task_id (str): The task_id returned by the generate_video method.
+
+        Returns:
+            Dict[str, Any]: The task result containing the video URL and other details.
+        """
+        url = f"{self.base_url}/task-result"
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+        }
+        params = {
+            "task_id": task_id,
+        }
+
+                response = requests.get(url, headers=headers, params=params)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+
+        return response.json()
+
+    def download_video(self, video_url: str, save_path: Path) -> None:
+        """
+        Downloads the generated video from the provided URL and saves it to the specified path.
+
+        Args:
+            video_url (str): The URL of the video to download.
+            save_path (Path): The path where the video will be saved.
+        """
+        response = requests.get(video_url)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+
+        with open(save_path, "wb") as file:
+            file.write(response.content)
--- a/lollms/ttv.py
+++ b/lollms/ttv.py
@ -2,8 +2,51 @@ from abc import ABC, abstractmethod
 from typing import List, Optional

 class LollmsTTV(ABC):
+    """
+    Abstract base class for text-to-video generation services.
+    Subclasses must implement the methods to generate videos from text prompts.
+    """
+    def __init__(self, service_name):
+        self.name = service_name
+
    @abstractmethod
-    def generate_video(self, prompt: str, num_frames: int = 49, fps: int = 8, 
+    def generate_video(self, prompt: str, negative_prompt: str, num_frames: int = 49, fps: int = 8, 
                       num_inference_steps: int = 50, guidance_scale: float = 6.0, 
                       seed: Optional[int] = None) -> str:
-        pass
+        """
+        Generates a video from a single text prompt.
+
+        Args:
+            prompt (str): The text prompt describing the video.
+            negative_prompt (str): Text describing elements to avoid in the video.
+            num_frames (int): Number of frames in the video. Default is 49.
+            fps (int): Frames per second. Default is 8.
+            num_inference_steps (int): Number of steps for the model to infer. Default is 50.
+            guidance_scale (float): Controls how closely the model adheres to the prompt. Default is 6.0.
+            seed (Optional[int]): Random seed for reproducibility. Default is None.
+
+        Returns:
+            str: The path to the generated video.
+        """
+        pass
+
+    @abstractmethod
+    def generate_video_by_frames(self, prompts: List[str], frames: List[int], negative_prompt: str, fps: int = 8, 
+                       num_inference_steps: int = 50, guidance_scale: float = 6.0, 
+                       seed: Optional[int] = None) -> str:
+        """
+        Generates a video from a list of prompts and corresponding frames.
+
+        Args:
+            prompts (List[str]): List of text prompts for each frame.
+            frames (List[int]): List of frame indices corresponding to each prompt.
+            negative_prompt (str): Text describing elements to avoid in the video.
+            fps (int): Frames per second. Default is 8.
+            num_inference_steps (int): Number of steps for the model to infer. Default is 50.
+            guidance_scale (float): Controls how closely the model adheres to the prompt. Default is 6.0.
+            seed (Optional[int]): Random seed for reproducibility. Default is None.
+
+        Returns:
+            str: The path to the generated video.
+        """
+        pass