Added dalle

2025-04-24 13:05:49 +00:00 · 2024-05-13 22:56:50 +02:00 · 2024-05-13 22:56:50 +02:00 · bdf68a19d1
commit bdf68a19d1
parent 76456e7962
7 changed files with 168 additions and 9 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 95
+version: 96
 binding_name: null
 model_name: null
 model_variant: null
@ -105,6 +105,11 @@ xtts_enable_text_splitting: true
 enable_sd_service: false
 sd_base_url: http://localhost:7860

+# Dall e service key
+dall_e_key: ""
+dall_e_generation_engine: "dall-e-3"
+
+
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 95
+version: 96
 binding_name: null
 model_name: null
 model_variant: null
@ -105,6 +105,11 @@ xtts_enable_text_splitting: true
 enable_sd_service: false
 sd_base_url: http://localhost:7860

+# Dall e service key
+dall_e_key: ""
+dall_e_generation_engine: "dall-e-3"
+
+
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -3341,7 +3341,10 @@ The AI should respond in this format using data from actions_list:
            if function:
                try:
                    # Assuming parameters is a dictionary that maps directly to the function's arguments.
-                    result = function(*parameters)
+                    if type(parameters)==list:
+                        result = function(*parameters)
+                    elif type(parameters)==dict:
+                        result = function(**parameters)
                    results.append(result)
                except TypeError as e:
                    # Handle cases where the function call fails due to incorrect parameters, etc.
--- a/lollms/server/endpoints/lollms_xtts.py
+++ b/lollms/server/endpoints/lollms_xtts.py
@ -14,7 +14,7 @@ from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
 from lollms.main_config import BaseConfig
 from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
-from lollms.security import sanitize_path, validate_path
+from lollms.security import sanitize_path, validate_path, check_access
 from pathlib import Path
 from ascii_colors import ASCIIColors
 import os
@ -25,6 +25,8 @@ import platform
 router = APIRouter()
 lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()

+class Identification(BaseModel):
+    client_id: str

 # ----------------------- voice ------------------------------

@ -243,8 +245,9 @@ async def text2Wav(request: LollmsText2AudioRequest):
        return {"status":False,"error":str(ex)}
    

-@router.get("/install_xtts")
-def install_xtts():
+@router.post("/install_xtts")
+def install_xtts(data:Identification):
+    check_access(lollmsElfServer, data.client_id)
    try:
        if lollmsElfServer.config.headless_server_mode:
            return {"status":False,"error":"Service installation is blocked when in headless mode for obvious security reasons!"}
--- a/lollms/services/dalle/lollms_dalle.py
+++ b/lollms/services/dalle/lollms_dalle.py
@ -0,0 +1,143 @@
+# Title LollmsDalle
+# Licence: MIT
+# Author : Paris Neo
+# Adapted from the work of mix1009's sdwebuiapi
+# check it out : https://github.com/mix1009/sdwebuiapi/tree/main
+# Here is a copy of the LICENCE https://github.com/mix1009/sdwebuiapi/blob/main/LICENSE
+# All rights are reserved
+
+from pathlib import Path
+import sys
+from lollms.app import LollmsApplication
+from lollms.paths import LollmsPaths
+from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
+import time
+import io
+import sys
+import requests
+import os
+import base64
+import subprocess
+import time
+import json
+import platform
+from dataclasses import dataclass
+from PIL import Image, PngImagePlugin
+from enum import Enum
+from typing import List, Dict, Any
+
+from ascii_colors import ASCIIColors, trace_exception
+from lollms.paths import LollmsPaths
+from lollms.utilities import PackageManager, find_next_available_filename
+import subprocess
+import shutil
+from tqdm import tqdm
+import threading
+from io import BytesIO
+
+def get_Dalli(lollms_paths:LollmsPaths):
+    return LollmsDalle
+
+class LollmsDalle:
+    has_controlnet = False
+    def __init__(
+                    self, 
+                    app:LollmsApplication, 
+                    key="",
+                    generation_engine="dall-e-3",# other possibility "dall-e-2"
+                    output_path=None
+                    ):
+        self.app = app
+        self.key = key 
+        self.generation_engine = generation_engine
+        self.output_path = output_path
+
+    def paint(
+                self,
+                prompt,
+                width=512,
+                height=512,
+                images = [],
+                generation_engine=None,
+                output_path = None
+                ):
+        if output_path is None:
+            output_path = self.output_path
+        if generation_engine is None:
+            generation_engine = self.generation_engine
+        if not PackageManager.check_package_installed("openai"):
+            PackageManager.install_package("openai")
+        import openai
+        openai.api_key = self.key
+        if generation_engine=="dall-e-2":
+            supported_resolutions = [
+                [512, 512],
+                [1024, 1024],
+            ]
+            # Find the closest resolution
+            closest_resolution = min(supported_resolutions, key=lambda res: abs(res[0] - width) + abs(res[1] - height))
+            
+        else:
+            supported_resolutions = [
+                [1024, 1024],
+                [1024, 1792],
+                [1792, 1024]
+            ]
+            # Find the closest resolution
+            if width>height:
+                closest_resolution = [1792, 1024]
+            elif width<height: 
+                closest_resolution = [1024, 1792]
+            else:
+                closest_resolution = [1024, 1024]
+
+
+        # Update the width and height
+        width = closest_resolution[0]
+        height = closest_resolution[1]                    
+
+        if len(images)>0 and generation_engine=="dall-e-2":
+            # Read the image file from disk and resize it
+            image = Image.open(self.personality.image_files[0])
+            width, height = width, height
+            image = image.resize((width, height))
+
+            # Convert the image to a BytesIO object
+            byte_stream = BytesIO()
+            image.save(byte_stream, format='PNG')
+            byte_array = byte_stream.getvalue()
+            response = openai.images.create_variation(
+                image=byte_array,
+                n=1,
+                model=generation_engine, # for now only dalle 2 supports variations
+                size=f"{width}x{height}"
+            )
+        else:
+            response = openai.images.generate(
+                model=generation_engine,
+                prompt=prompt.strip(),
+                quality="standard",
+                size=f"{width}x{height}",
+                n=1,
+                
+                )
+        # download image to outputs
+        output_dir = Path(output_path)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        image_url = response.data[0].url
+
+        # Get the image data from the URL
+        response = requests.get(image_url)
+
+        if response.status_code == 200:
+            # Generate the full path for the image file
+            file_name = output_dir/find_next_available_filename(output_dir, "img_dalle_")  # You can change the filename if needed
+
+            # Save the image to the specified folder
+            with open(file_name, "wb") as file:
+                file.write(response.content)
+            ASCIIColors.yellow(f"Image saved to {file_name}")
+        else:
+            ASCIIColors.red("Failed to download the image")
+
+        return file_name
--- a/lollms/services/sd/lollms_sd.py
+++ b/lollms/services/sd/lollms_sd.py
@ -390,8 +390,8 @@ class LollmsSD:
                            seed=seed,
                            cfg_scale=scale,
                            steps=steps,
-                            width=width,
-                            height=height,
+                            width=int(width),
+                            height=int(height),
                            denoising_strength=img2img_denoising_strength,
                            tiling=False,
                            restore_faces=restore_faces,
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -198,7 +198,7 @@ class LollmsXTTS:
                    if self.voices_folder is not None:
                        print("Generating sample audio.")
                        voice_file =  [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
-                        self.tts_to_audio("xtts is ready",voice_file[0].name)
+                        self.tts_to_audio("x t t s is ready",voice_file[0].name)
                    print("Service is available.")
                    if self.app is not None:
                        self.app.success("XTTS Service is now available.")