added text to music

This commit is contained in:
Saifeddine ALOUI 2024-06-07 01:34:14 +02:00
parent eec5a2a471
commit 4b6afce818
10 changed files with 655 additions and 46 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 110
version: 111
binding_name: null
model_name: null
model_variant: null
@ -189,6 +189,7 @@ midjourney_key: ""
# Image generation service comfyui
enable_comfyui_service: false
comfyui_base_url: http://127.0.0.1:8188/
comfyui_model: v1-5-pruned-emaonly.ckpt
# Motion control service
enable_motion_ctrl_service: false

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 110
version: 111
binding_name: null
model_name: null
model_variant: null
@ -189,6 +189,7 @@ midjourney_key: ""
# Image generation service comfyui
enable_comfyui_service: false
comfyui_base_url: http://127.0.0.1:8188/
comfyui_model: v1-5-pruned-emaonly.ckpt
# Motion control service
enable_motion_ctrl_service: false

View File

@ -37,7 +37,6 @@ def build_image(prompt, negative_prompt, width, height, processor:APScript, clie
file, infos = processor.personality.app.tti.paint(
prompt,
negative_prompt,
processor.personality.image_files,
width = width,
height = height,
output_path=client.discussion.discussion_folder
@ -52,7 +51,6 @@ def build_image(prompt, negative_prompt, width, height, processor:APScript, clie
file, infos = processor.personality.app.tti.paint(
prompt,
negative_prompt,
processor.personality.image_files,
width = width,
height = height,
output_path=client.discussion.discussion_folder

View File

@ -25,14 +25,21 @@ from dataclasses import dataclass
from PIL import Image, PngImagePlugin
from enum import Enum
from typing import List, Dict, Any
import uuid
from ascii_colors import ASCIIColors, trace_exception
from lollms.paths import LollmsPaths
from lollms.utilities import git_pull, show_yes_no_dialog, run_script_in_env, create_conda_env, run_python_script_in_env
from lollms.utilities import git_pull, show_yes_no_dialog, run_script_in_env, create_conda_env, run_python_script_in_env, PackageManager
from lollms.tti import LollmsTTI
import subprocess
import shutil
from tqdm import tqdm
if not PackageManager.check_package_installed("websocket"):
PackageManager.install_or_update("websocket-client")
import websocket
if not PackageManager.check_package_installed("urllib"):
PackageManager.install_or_update("urllib")
from urllib import request, parse
def verify_comfyui(lollms_paths:LollmsPaths):
# Clone repository
@ -144,18 +151,13 @@ def get_comfyui(lollms_paths:LollmsPaths):
ASCIIColors.success("ok")
return LollmsComfyUI
class LollmsComfyUI:
class LollmsComfyUI(LollmsTTI):
has_controlnet = False
def __init__(
self,
app:LollmsApplication,
wm = "Artbot",
max_retries=50,
sampler="Euler a",
steps=20,
use_https=False,
username=None,
password=None,
comfyui_base_url=None,
share=False,
wait_for_service=True
@ -250,3 +252,183 @@ class LollmsComfyUI:
if self.app is not None:
self.app.error("Comfyui Service did not become available within the given time.")
return False
def paint(
self,
positive_prompt,
negative_prompt,
sampler_name="",
seed=-1,
scale=7.5,
steps=20,
img2img_denoising_strength=0.9,
width=512,
height=512,
restore_faces=True,
output_path=None
):
client_id = str(uuid.uuid4())
def queue_prompt(prompt):
p = {"prompt": prompt, "client_id": client_id}
data = json.dumps(p).encode('utf-8')
req = request.Request("http://{}/prompt".format(self.comfyui_base_url), data=data)
return json.loads(request.urlopen(req).read())
def get_image(filename, subfolder, folder_type):
data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
url_values = parse.urlencode(data)
with request.urlopen("http://{}/view?{}".format(self.comfyui_base_url, url_values)) as response:
return response.read()
def get_history(prompt_id):
with request.urlopen("http://{}/history/{}".format(self.comfyui_base_url, prompt_id)) as response:
return json.loads(response.read())
def get_images(ws, prompt):
prompt_id = queue_prompt(prompt)['prompt_id']
output_images = {}
while True:
out = ws.recv()
if isinstance(out, str):
message = json.loads(out)
if message['type'] == 'executing':
data = message['data']
if data['node'] is None and data['prompt_id'] == prompt_id:
break #Execution is done
else:
continue #previews are binary data
history = get_history(prompt_id)[prompt_id]
for o in history['outputs']:
for node_id in history['outputs']:
node_output = history['outputs'][node_id]
if 'images' in node_output:
images_output = []
for image in node_output['images']:
image_data = get_image(image['filename'], image['subfolder'], image['type'])
images_output.append(image_data)
output_images[node_id] = images_output
return output_images
prompt_text = """
{
"3": {
"class_type": "KSampler",
"inputs": {
"cfg": 8,
"denoise": 1,
"latent_image": [
"5",
0
],
"model": [
"4",
0
],
"negative": [
"7",
0
],
"positive": [
"6",
0
],
"sampler_name": "euler",
"scheduler": "normal",
"seed": 8566257,
"steps": 20
}
},
"4": {
"class_type": "CheckpointLoaderSimple",
"inputs": {
"""+f"""
"ckpt_name": "{self.app.config.comfyui_model}"
"""+"""
}
},
"5": {
"class_type": "EmptyLatentImage",
"inputs": {
"batch_size": 1,
"height": 512,
"width": 512
}
},
"6": {
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],"""+f"""
"text": "{positive_prompt}"
"""+"""
}
},
"7": {
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],"""+f"""
"text": "{negative_prompt}"
"""+"""
}
},
"8": {
"class_type": "VAEDecode",
"inputs": {
"samples": [
"3",
0
],
"vae": [
"4",
2
]
}
},
"9": {
"class_type": "SaveImage",
"inputs": {
"filename_prefix": "ComfyUI",
"images": [
"8",
0
]
}
}
}
"""
prompt = json.loads(prompt_text)
#set the text prompt for our positive CLIPTextEncode
prompt["6"]["inputs"]["text"] = "masterpiece best quality man"
#set the seed for our KSampler node
prompt["3"]["inputs"]["seed"] = 5
ws = websocket.WebSocket()
ws.connect("ws://{}/ws?clientId={}".format(self.comfyui_base_url, client_id))
images = get_images(ws, prompt)
return None
def paint_from_images(self, positive_prompt: str,
images: List[str],
negative_prompt: str = "",
sampler_name="",
seed=-1,
scale=7.5,
steps=20,
img2img_denoising_strength=0.9,
width=512,
height=512,
restore_faces=True,
output_path=None
) -> List[Dict[str, str]]:
return None

View File

@ -50,7 +50,7 @@ class LollmsDalle(LollmsTTI):
def paint(
self,
prompt,
positive_prompt,
negative_prompt,
width=512,
height=512,
@ -93,31 +93,15 @@ class LollmsDalle(LollmsTTI):
width = closest_resolution[0]
height = closest_resolution[1]
if len(images)>0 and generation_engine=="dall-e-2":
# Read the image file from disk and resize it
image = Image.open(self.personality.image_files[0])
width, height = width, height
image = image.resize((width, height))
# Convert the image to a BytesIO object
byte_stream = BytesIO()
image.save(byte_stream, format='PNG')
byte_array = byte_stream.getvalue()
response = openai.images.create_variation(
image=byte_array,
n=1,
model=generation_engine, # for now only dalle 2 supports variations
size=f"{width}x{height}"
response = openai.images.generate(
model=generation_engine,
prompt=positive_prompt.strip(),
quality="standard",
size=f"{width}x{height}",
n=1,
)
else:
response = openai.images.generate(
model=generation_engine,
prompt=prompt.strip(),
quality="standard",
size=f"{width}x{height}",
n=1,
)
# download image to outputs
output_dir = Path(output_path)
output_dir.mkdir(parents=True, exist_ok=True)
@ -138,6 +122,80 @@ class LollmsDalle(LollmsTTI):
ASCIIColors.red("Failed to download the image")
return file_name
def paint_from_images(self, positive_prompt: str, images: List[str], negative_prompt: str = "") -> List[Dict[str, str]]:
if output_path is None:
output_path = self.output_path
if generation_engine is None:
generation_engine = self.generation_engine
if not PackageManager.check_package_installed("openai"):
PackageManager.install_package("openai")
import openai
openai.api_key = self.key
if generation_engine=="dall-e-2":
supported_resolutions = [
[512, 512],
[1024, 1024],
]
# Find the closest resolution
closest_resolution = min(supported_resolutions, key=lambda res: abs(res[0] - width) + abs(res[1] - height))
else:
supported_resolutions = [
[1024, 1024],
[1024, 1792],
[1792, 1024]
]
# Find the closest resolution
if width>height:
closest_resolution = [1792, 1024]
elif width<height:
closest_resolution = [1024, 1792]
else:
closest_resolution = [1024, 1024]
# Update the width and height
width = closest_resolution[0]
height = closest_resolution[1]
# Read the image file from disk and resize it
image = Image.open(images[0])
width, height = width, height
image = image.resize((width, height))
# Convert the image to a BytesIO object
byte_stream = BytesIO()
image.save(byte_stream, format='PNG')
byte_array = byte_stream.getvalue()
response = openai.images.create_variation(
image=byte_array,
n=1,
model=generation_engine, # for now only dalle 2 supports variations
size=f"{width}x{height}"
)
# download image to outputs
output_dir = Path(output_path)
output_dir.mkdir(parents=True, exist_ok=True)
image_url = response.data[0].url
# Get the image data from the URL
response = requests.get(image_url)
if response.status_code == 200:
# Generate the full path for the image file
file_name = output_dir/find_next_available_filename(output_dir, "img_dalle_") # You can change the filename if needed
# Save the image to the specified folder
with open(file_name, "wb") as file:
file.write(response.content)
ASCIIColors.yellow(f"Image saved to {file_name}")
else:
ASCIIColors.red("Failed to download the image")
return file_name
@staticmethod
def get(app:LollmsApplication):
return LollmsDalle

View File

@ -121,11 +121,15 @@ class LollmsDiffusers(LollmsTTI):
ASCIIColors.red(" |______| ")
import torch
from diffusers import PixArtSigmaPipeline
self.model = PixArtSigmaPipeline.from_pretrained(
from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image#PixArtSigmaPipeline
self.model = AutoPipelineForText2Image.from_pretrained(
app.config.diffusers_model, torch_dtype=torch.float16, cache_dir=self.models_dir,
use_safetensors=True,
)
# self.model = StableDiffusionPipeline.from_pretrained(
# "CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16, cache_dir=self.models_dir,
# use_safetensors=True,
# ) # app.config.diffusers_model
# Enable memory optimizations.
if app.config.diffusers_offloading_mode=="sequential_cpu_offload":
self.model.enable_sequential_cpu_offload()
@ -155,13 +159,31 @@ class LollmsDiffusers(LollmsTTI):
ASCIIColors.success("ok")
return LollmsDiffusers
def get_scheduler_by_name(self, scheduler_name="LMS"):
if scheduler_name == "LMS":
from diffusers import LMSDiscreteScheduler
return LMSDiscreteScheduler(
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear"
)
elif scheduler_name == "Euler":
from diffusers import EulerDiscreteScheduler
return LMSDiscreteScheduler()
elif scheduler_name == "DDPMS":
from diffusers import DDPMScheduler
return DDPMScheduler()
elif scheduler_name == "DDIMS":
from diffusers import DDIMScheduler
return DDIMScheduler()
def paint(
self,
positive_prompt,
negative_prompt,
files=[],
sampler_name="Euler",
sampler_name="",
seed=-1,
scale=7.5,
steps=20,
@ -171,10 +193,51 @@ class LollmsDiffusers(LollmsTTI):
restore_faces=True,
output_path=None
):
import torch
if sampler_name!="":
sc = self.get_scheduler_by_name(sampler_name)
if sc:
self.model.scheduler = sc
if output_path is None:
output_path = self.output_dir
from diffusers.utils.pil_utils import pt_to_pil
image = self.model(positive_prompt, negative_prompt=negative_prompt, guidance_scale=scale, num_inference_steps=steps,).images[0]
if seed!=-1:
generator = torch.Generator("cuda").manual_seed(seed)
image = self.model(positive_prompt, negative_prompt=negative_prompt, height=height, width=width, guidance_scale=scale, num_inference_steps=steps, generator=generator).images[0]
else:
image = self.model(positive_prompt, negative_prompt=negative_prompt, height=height, width=width, guidance_scale=scale, num_inference_steps=steps).images[0]
output_path = Path(output_path)
fn = find_next_available_filename(output_path,"diff_img_")
# Save the image
image.save(fn)
return fn, {"prompt":positive_prompt, "negative_prompt":negative_prompt}
def paint_from_images(self, positive_prompt: str,
images: List[str],
negative_prompt: str = "",
sampler_name="",
seed=-1,
scale=7.5,
steps=20,
img2img_denoising_strength=0.9,
width=512,
height=512,
restore_faces=True,
output_path=None
) -> List[Dict[str, str]]:
import torch
if sampler_name!="":
sc = self.get_scheduler_by_name(sampler_name)
if sc:
self.model.scheduler = sc
if output_path is None:
output_path = self.output_dir
if seed!=-1:
generator = torch.Generator("cuda").manual_seed(seed)
image = self.model(positive_prompt, negative_prompt=negative_prompt, height=height, width=width, guidance_scale=scale, num_inference_steps=steps, generator=generator).images[0]
else:
image = self.model(positive_prompt, negative_prompt=negative_prompt, height=height, width=width, guidance_scale=scale, num_inference_steps=steps).images[0]
output_path = Path(output_path)
fn = find_next_available_filename(output_path,"diff_img_")
# Save the image

View File

@ -0,0 +1,140 @@
"""
Lollms TTM Module
=================
This module is part of the Lollms library, designed to provide Text-to-Music (TTM) functionalities within the LollmsApplication framework. The base class `LollmsTTM` is intended to be inherited and implemented by other classes that provide specific TTM functionalities.
Author: ParisNeo, a computer geek passionate about AI
"""
from lollms.app import LollmsApplication
from pathlib import Path
from typing import List, Dict
from lollms.ttm import LollmsTTM
from lollms.utilities import PackageManager, File_Path_Generator, check_and_install_torch
class LollmsMusicGen(LollmsTTM):
"""
LollmsMusicGen is a model class for implementing Text-to-Music (TTM) functionalities within the LollmsApplication.
Attributes:
app (LollmsApplication): The instance of the main Lollms application.
model (str): The TTM model to be used for image generation.
api_key (str): API key for accessing external TTM services (if needed).
output_path (Path or str): Path where the output image files will be saved.
voices (List[str]): List of available voices for TTM (to be filled by the child class).
models (List[str]): List of available models for TTM (to be filled by the child class).
"""
def __init__(
self,
name:str,
app: LollmsApplication,
model="facebook/musicgen-melody",#"facebook/musicgen-small","facebook/musicgen-medium","facebook/musicgen-melody","facebook/musicgen-large"
device="cuda",
api_key="",
output_path=None
):
"""
Initializes the LollmsTTM class with the given parameters.
Args:
app (LollmsApplication): The instance of the main Lollms application.
model (str, optional): The TTM model to be used for image generation. Defaults to an empty string.
api_key (str, optional): API key for accessing external TTM services. Defaults to an empty string.
output_path (Path or str, optional): Path where the output image files will be saved. Defaults to None.
"""
self.name = name
self.app = app
self.model = model
self.api_key = api_key
self.output_path = output_path
if not PackageManager.check_package_installed("musicgen"):
check_and_install_torch(True if device=="cuda" else False)
PackageManager.install_or_update("musicgen")
from audiocraft.models import musicgen
self.music_model = musicgen.MusicGen.get_pretrained(model, device=device)
self.models = [] # To be filled by the child class
self.ready = True
def generate(self,
positive_prompt: str,
negative_prompt: str = "",
duration=30,
generation_engine=None,
output_path = None) -> List[Dict[str, str]]:
"""
Generates images based on the given positive and negative prompts.
Args:
positive_prompt (str): The positive prompt describing the desired image.
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
Returns:
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
"""
if output_path is None:
output_path = self.output_path
import torchaudio
self.music_model.set_generation_params(duration=duration)
res = self.music_model.generate([positive_prompt], progress=True)
output_path.mkdir(parents=True, exist_ok=True)
output_file = File_Path_Generator.generate_unique_file_path(output_path, "generation","wav")
torchaudio.save(output_file, res.reshape(1, -1).cpu(), 32000)
return output_file, {"prompt":positive_prompt,"duration":duration}
def generate_from_samples(self, positive_prompt: str, samples: List[str], negative_prompt: str = "") -> List[Dict[str, str]]:
"""
Generates images based on the given positive prompt and reference images.
Args:
positive_prompt (str): The positive prompt describing the desired image.
images (List[str]): A list of paths to reference images.
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
Returns:
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
"""
pass
@staticmethod
def verify(app: LollmsApplication) -> bool:
"""
Verifies if the TTM service is available.
Args:
app (LollmsApplication): The instance of the main Lollms application.
Returns:
bool: True if the service is available, False otherwise.
"""
return True
@staticmethod
def install(app: LollmsApplication) -> bool:
"""
Installs the necessary components for the TTM service.
Args:
app (LollmsApplication): The instance of the main Lollms application.
Returns:
bool: True if the installation was successful, False otherwise.
"""
return True
@staticmethod
def get(app: LollmsApplication) -> 'LollmsTTM':
"""
Returns the LollmsTTM class.
Args:
app (LollmsApplication): The instance of the main Lollms application.
Returns:
LollmsTTM: The LollmsTTM class.
"""
return LollmsTTM

View File

@ -358,7 +358,6 @@ class LollmsSD(LollmsTTI):
self,
positive_prompt,
negative_prompt,
files=[],
sampler_name="Euler",
seed=-1,
scale=7.5,
@ -433,6 +432,54 @@ class LollmsSD(LollmsTTI):
return img_paths[0] if len(img_paths)>0 else None, infos
def paint_from_images(self, positive_prompt: str,
images: List[str],
negative_prompt: str = "",
sampler_name="",
seed=-1,
scale=7.5,
steps=20,
img2img_denoising_strength=0.9,
width=512,
height=512,
restore_faces=True,
output_path=None
) -> List[Dict[str, str]]:
if output_path is None:
output_path = self.output_dir
infos = {}
img_paths = []
try:
generated = self.img2img(
positive_prompt,
negative_prompt,
[self.loadImage(images[-1])],
sampler_name=sampler_name,
seed=seed,
cfg_scale=scale,
steps=steps,
width=int(width),
height=int(height),
denoising_strength=img2img_denoising_strength,
tiling=False,
restore_faces=restore_faces,
styles=None,
script_name="",
)
"""
images: list
parameters: dict
info: dict
"""
for img in generated.images:
img_paths.append(self.saveImage(img, output_path))
infos = generated.info
except Exception as ex:
ASCIIColors.error("Couldn't generate the image")
trace_exception(ex)
return img_paths[0] if len(img_paths)>0 else None, infos
def check_controlnet(self):
try:
scripts = self.get_scripts()

View File

@ -47,7 +47,6 @@ class LollmsTTI:
self.model = model
self.api_key = api_key
self.output_path = output_path
self.voices = [] # To be filled by the child class
self.models = [] # To be filled by the child class
def paint(self,
@ -55,7 +54,6 @@ class LollmsTTI:
negative_prompt: str = "",
width=512,
height=512,
images = [],
generation_engine=None,
output_path = None) -> List[Dict[str, str]]:
"""

121
lollms/ttm.py Normal file
View File

@ -0,0 +1,121 @@
"""
Lollms TTM Module
=================
This module is part of the Lollms library, designed to provide Text-to-Music (TTM) functionalities within the LollmsApplication framework. The base class `LollmsTTM` is intended to be inherited and implemented by other classes that provide specific TTM functionalities.
Author: ParisNeo, a computer geek passionate about AI
"""
from lollms.app import LollmsApplication
from pathlib import Path
from typing import List, Dict
class LollmsTTM:
"""
LollmsTTM is a base class for implementing Text-to-Music (TTM) functionalities within the LollmsApplication.
Attributes:
app (LollmsApplication): The instance of the main Lollms application.
model (str): The TTM model to be used for image generation.
api_key (str): API key for accessing external TTM services (if needed).
output_path (Path or str): Path where the output image files will be saved.
voices (List[str]): List of available voices for TTM (to be filled by the child class).
models (List[str]): List of available models for TTM (to be filled by the child class).
"""
def __init__(
self,
name:str,
app: LollmsApplication,
model="",
api_key="",
output_path=None
):
"""
Initializes the LollmsTTM class with the given parameters.
Args:
app (LollmsApplication): The instance of the main Lollms application.
model (str, optional): The TTM model to be used for image generation. Defaults to an empty string.
api_key (str, optional): API key for accessing external TTM services. Defaults to an empty string.
output_path (Path or str, optional): Path where the output image files will be saved. Defaults to None.
"""
self.ready = False
self.name = name
self.app = app
self.model = model
self.api_key = api_key
self.output_path = output_path
self.models = [] # To be filled by the child class
def generate(self,
positive_prompt: str,
negative_prompt: str = "",
duration=30,
generation_engine=None,
output_path = None) -> List[Dict[str, str]]:
"""
Generates images based on the given positive and negative prompts.
Args:
positive_prompt (str): The positive prompt describing the desired image.
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
Returns:
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
"""
pass
def generate_from_samples(self, positive_prompt: str, samples: List[str], negative_prompt: str = "") -> List[Dict[str, str]]:
"""
Generates images based on the given positive prompt and reference images.
Args:
positive_prompt (str): The positive prompt describing the desired image.
images (List[str]): A list of paths to reference images.
negative_prompt (str, optional): The negative prompt describing what should be avoided in the image. Defaults to an empty string.
Returns:
List[Dict[str, str]]: A list of dictionaries containing image paths, URLs, and metadata.
"""
pass
@staticmethod
def verify(app: LollmsApplication) -> bool:
"""
Verifies if the TTM service is available.
Args:
app (LollmsApplication): The instance of the main Lollms application.
Returns:
bool: True if the service is available, False otherwise.
"""
return True
@staticmethod
def install(app: LollmsApplication) -> bool:
"""
Installs the necessary components for the TTM service.
Args:
app (LollmsApplication): The instance of the main Lollms application.
Returns:
bool: True if the installation was successful, False otherwise.
"""
return True
@staticmethod
def get(app: LollmsApplication) -> 'LollmsTTM':
"""
Returns the LollmsTTM class.
Args:
app (LollmsApplication): The instance of the main Lollms application.
Returns:
LollmsTTM: The LollmsTTM class.
"""
return LollmsTTM