From 03368ef8e16be0aad8c2efa5a1c7fe907898d9fa Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Fri, 3 May 2024 00:58:21 +0200 Subject: [PATCH] upgraded to the new version --- configs/config.yaml | 2 +- .../personal/configs/lollms_elf_config.yaml | 2 +- .../personal/configs/lollms_elf_config.yaml | 2 +- .../configs/lollms_elf_local_config.yaml | 2 +- lollms/app.py | 22 +- lollms/binding.py | 14 +- lollms/com.py | 25 +++ lollms/databases/discussions_database.py | 189 +++++++++++++++++- lollms/server/configs/config.yaml | 2 +- lollms/server/endpoints/lollms_discussion.py | 49 ++++- .../events/lollms_personality_events.py | 6 +- lollms/types.py | 14 ++ .../configs/lollms_discord_local_config.yaml | 2 +- 13 files changed, 285 insertions(+), 46 deletions(-) diff --git a/configs/config.yaml b/configs/config.yaml index a91f8d7..bd12e3b 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -155,7 +155,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto data_visualization_method: "PCA" #"PCA" or "TSNE" data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path -data_vectorization_save_db: false # For each new session, new files +data_vectorization_save_db: true # For each new session, new files data_vectorization_chunk_size: 512 # chunk size data_vectorization_overlap_size: 128 # overlap between chunks size data_vectorization_nb_chunks: 2 # number of chunks to use diff --git a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml index 6039a3e..7f56eff 100644 --- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml @@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto data_visualization_method: "PCA" #"PCA" or "TSNE" data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path -data_vectorization_save_db: False # For each new session, new files +data_vectorization_save_db: true # For each new session, new files data_vectorization_chunk_size: 512 # chunk size data_vectorization_overlap_size: 128 # overlap between chunks size data_vectorization_nb_chunks: 2 # number of chunks to use diff --git a/elf_test_cfg/personal/configs/lollms_elf_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_config.yaml index 6039a3e..7f56eff 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml @@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto data_visualization_method: "PCA" #"PCA" or "TSNE" data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path -data_vectorization_save_db: False # For each new session, new files +data_vectorization_save_db: true # For each new session, new files data_vectorization_chunk_size: 512 # chunk size data_vectorization_overlap_size: 128 # overlap between chunks size data_vectorization_nb_chunks: 2 # number of chunks to use diff --git a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml index 6039a3e..7f56eff 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml @@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto data_visualization_method: "PCA" #"PCA" or "TSNE" data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path -data_vectorization_save_db: False # For each new session, new files +data_vectorization_save_db: true # For each new session, new files data_vectorization_chunk_size: 512 # chunk size data_vectorization_overlap_size: 128 # overlap between chunks size data_vectorization_nb_chunks: 2 # number of chunks to use diff --git a/lollms/app.py b/lollms/app.py index 1247c2c..2aa0f25 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -640,7 +640,9 @@ class LollmsApplication(LoLLMsCom): if self.personality.callback is None: self.personality.callback = partial(self.process_chunk, client_id=client_id) # Get the list of messages - messages = self.session.get_client(client_id).discussion.get_messages() + client = self.session.get_client(client_id) + discussion = client.discussion + messages = discussion.get_messages() # Find the index of the message with the specified message_id message_index = -1 @@ -769,12 +771,14 @@ class LollmsApplication(LoLLMsCom): trace_exception(ex) self.warning("Couldn't add documentation to the context. Please verify the vector database") - if len(self.personality.text_files) > 0 and self.personality.vectorizer: + if (len(client.discussion.text_files) > 0) and client.discussion.vectorizer is not None: + if discussion is None: + discussion = self.recover_discussion(client_id) + if documentation=="": documentation="\n!@>important information: Use the documentation data to answer the user questions. If the data is not present in the documentation, please tell the user that the information he is asking for does not exist in the documentation section. It is strictly forbidden to give the user an answer without having actual proof from the documentation.\n!@>Documentation:\n" if self.config.data_vectorization_build_keys_words: - discussion = self.recover_discussion(client_id) self.personality.step_start("Building vector store query") query = self.personality.fast_gen(f"\n!@>instruction: Read the discussion and rewrite the last prompt for someone who didn't read the entire discussion.\nDo not answer the prompt. Do not add explanations.\n!@>discussion:\n{discussion[-2048:]}\n!@>enhanced query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink) self.personality.step_end("Building vector store query") @@ -783,20 +787,20 @@ class LollmsApplication(LoLLMsCom): query = current_message.content try: - if self.config.data_vectorization_force_first_chunk and len(self.personality.vectorizer.chunks)>0: - doc_index = list(self.personality.vectorizer.chunks.keys())[0] + if self.config.data_vectorization_force_first_chunk and len(client.discussion.vectorizer.chunks)>0: + doc_index = list(client.discussion.vectorizer.chunks.keys())[0] - doc_id = self.personality.vectorizer.chunks[doc_index]['document_id'] - content = self.personality.vectorizer.chunks[doc_index]['chunk_text'] + doc_id = client.discussion.vectorizer.chunks[doc_index]['document_id'] + content = client.discussion.vectorizer.chunks[doc_index]['chunk_text'] if self.config.data_vectorization_put_chunk_informations_into_context: documentation += f"!@>document chunk:\nchunk_infos:{doc_id}\ncontent:{content}\n" else: documentation += f"!@>chunk:\n{content}\n" - docs, sorted_similarities, document_ids = self.personality.vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks) + docs, sorted_similarities, document_ids = client.discussion.vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks) for doc, infos in zip(docs, sorted_similarities): - if self.config.data_vectorization_force_first_chunk and len(self.personality.vectorizer.chunks)>0 and infos[0]==doc_id: + if self.config.data_vectorization_force_first_chunk and len(client.discussion.vectorizer.chunks)>0 and infos[0]==doc_id: continue if self.config.data_vectorization_put_chunk_informations_into_context: documentation += f"!@>document chunk:\nchunk path: {infos[0]}\nchunk content:\n{doc}\n" diff --git a/lollms/binding.py b/lollms/binding.py index 39f7ede..a78adc6 100644 --- a/lollms/binding.py +++ b/lollms/binding.py @@ -26,6 +26,7 @@ from lollms.main_config import LOLLMSConfig from lollms.com import NotificationType, NotificationDisplayType, LoLLMsCom from lollms.security import sanitize_path from lollms.utilities import show_message_dialog +from lollms.types import BindingType import urllib import inspect @@ -41,20 +42,7 @@ __author__ = "parisneo" __github__ = "https://github.com/ParisNeo/lollms_bindings_zoo" __copyright__ = "Copyright 2023, " __license__ = "Apache 2.0" -class BindingType(Enum): - """Binding types.""" - - TEXT_ONLY = 0 - """This binding only supports text.""" - - TEXT_IMAGE = 1 - """This binding supports text and image.""" - TEXT_IMAGE_VIDEO = 2 - """This binding supports text, image and video.""" - - TEXT_AUDIO = 3 - """This binding supports text and audio.""" class LLMBinding: diff --git a/lollms/com.py b/lollms/com.py index af469c5..79a9c70 100644 --- a/lollms/com.py +++ b/lollms/com.py @@ -1,4 +1,6 @@ from ascii_colors import ASCIIColors +from lollms.types import MSG_TYPE, SENDER_TYPES +from typing import Callable import socketio from enum import Enum class NotificationType(Enum): @@ -96,6 +98,9 @@ class LoLLMsCom: self.sio.sleep(1) return infos["result"] + def close_message(self, client_id): + pass + def info(self, content, duration:int=4, client_id=None, verbose:bool=None): self.notify( content, @@ -136,6 +141,26 @@ class LoLLMsCom: verbose = verbose ) + def new_message(self, + client_id, + sender=None, + content="", + parameters=None, + metadata=None, + ui=None, + message_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_FULL, + sender_type:SENDER_TYPES=SENDER_TYPES.SENDER_TYPES_AI, + open=False + ): + pass + def full(self, full_text:str, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None): + """This sends full text to front end + + Args: + step_text (dict): The step text + callback (callable, optional): A callable with this signature (str, MSG_TYPE) to send the text to. Defaults to None. + """ + pass def notify( self, diff --git a/lollms/databases/discussions_database.py b/lollms/databases/discussions_database.py index 661442e..195071a 100644 --- a/lollms/databases/discussions_database.py +++ b/lollms/databases/discussions_database.py @@ -2,10 +2,16 @@ import sqlite3 from pathlib import Path from datetime import datetime -from lollms.helpers import ASCIIColors +from ascii_colors import ASCIIColors, trace_exception +from lollms.types import MSG_TYPE +from lollms.types import BindingType +from lollms.utilities import PackageManager, discussion_path_to_url from lollms.paths import LollmsPaths from lollms.databases.skills_database import SkillsLibrary +from lollms.com import LoLLMsCom +from safe_store import TextVectorizer, VisualizationMethod, GenericDataLoader import json +import shutil __author__ = "parisneo" __github__ = "https://github.com/ParisNeo/lollms-webui" @@ -16,7 +22,8 @@ __license__ = "Apache 2.0" # =================================== Database ================================================================== class DiscussionsDB: - def __init__(self, lollms_paths:LollmsPaths, discussion_db_name="default"): + def __init__(self, lollms:LoLLMsCom, lollms_paths:LollmsPaths, discussion_db_name="default"): + self.lollms = lollms self.lollms_paths = lollms_paths self.discussion_db_name = discussion_db_name @@ -25,7 +32,6 @@ class DiscussionsDB: self.discussion_db_path.mkdir(exist_ok=True, parents= True) self.discussion_db_file_path = self.discussion_db_path/"database.db" - def create_tables(self): db_version = 12 with sqlite3.connect(self.discussion_db_file_path) as conn: @@ -199,7 +205,7 @@ class DiscussionsDB: else: last_discussion_id = last_discussion_id[0] self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (last_discussion_id,), fetch_all=False) - return Discussion(last_discussion_id, self) + return Discussion(self.lollms, last_discussion_id, self) def create_discussion(self, title="untitled"): """Creates a new discussion @@ -211,10 +217,10 @@ class DiscussionsDB: Discussion: A Discussion instance """ discussion_id = self.insert(f"INSERT INTO discussion (title) VALUES (?)",(title,)) - return Discussion(discussion_id, self) + return Discussion(self.lollms, discussion_id, self) def build_discussion(self, discussion_id=0): - return Discussion(discussion_id, self) + return Discussion(self.lollms, discussion_id, self) def get_discussions(self): rows = self.select("SELECT * FROM discussion") @@ -618,7 +624,8 @@ class Message: return msgJson class Discussion: - def __init__(self, discussion_id, discussions_db:DiscussionsDB): + def __init__(self, lollms:LoLLMsCom, discussion_id, discussions_db:DiscussionsDB): + self.lollms = lollms self.discussion_id = discussion_id self.discussions_db = discussions_db self.discussion_folder = self.discussions_db.discussion_db_path/f"{discussion_id}" @@ -627,19 +634,181 @@ class Discussion: self.discussion_text_folder = self.discussion_folder / "text_data" self.discussion_skills_folder = self.discussion_folder / "skills" self.discussion_rag_folder = self.discussion_folder / "rag" + self.discussion_view_images_folder = self.discussion_folder / "view_images" + self.discussion_folder.mkdir(exist_ok=True) self.discussion_images_folder.mkdir(exist_ok=True) self.discussion_text_folder.mkdir(exist_ok=True) self.discussion_skills_folder.mkdir(exist_ok=True) self.discussion_rag_folder.mkdir(exist_ok=True) + self.discussion_view_images_folder.mkdir(exist_ok=True) self.messages = self.get_messages() if len(self.messages)>0: self.current_message = self.messages[-1] - def add_file(self, file_name): - # TODO : add file - pass + # Initialize the file lists + self.update_file_lists() + + + self.vectorizer = TextVectorizer( + self.lollms.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer" + model=self.lollms.model, #needed in case of using model_embedding + database_path=self.discussion_rag_folder/"db.json", + save_db=self.lollms.config.data_vectorization_save_db, + data_visualization_method=VisualizationMethod.PCA, + database_dict=None) + + if len(self.vectorizer.chunks)==0 and len(self.text_files)>0: + for path in self.text_files: + data = GenericDataLoader.read_file(path) + self.vectorizer.add_document(path, data, self.lollms.config.data_vectorization_chunk_size, self.lollms.config.data_vectorization_overlap_size, add_first_line_to_all_chunks=True if path.suffix==".csv" else False) + self.vectorizer.index() + + + def update_file_lists(self): + self.text_files = [Path(file) for file in self.discussion_text_folder.glob('*')] + self.image_files = [Path(file) for file in self.discussion_images_folder.glob('*')] + self.audio_files = [Path(file) for file in self.discussion_audio_folder.glob('*')] + self.rag_db = [Path(file) for file in self.discussion_rag_folder.glob('*')] + + + def remove_file(self, file_name, callback=None): + try: + all_files = self.text_files+self.image_files+self.audio_files + if any(file_name == entry.name for entry in self.text_files): + fn = [entry for entry in self.text_files if entry.name == file_name][0] + self.text_files = [entry for entry in self.text_files if entry.name != file_name] + Path(fn).unlink() + if len(self.text_files)>0: + try: + self.vectorizer.remove_document(fn) + if callback is not None: + callback("File removed successfully",MSG_TYPE.MSG_TYPE_INFO) + return True + except ValueError as ve: + ASCIIColors.error(f"Couldn't remove the file") + return False + else: + self.vectorizer = None + elif any(file_name == entry.name for entry in self.image_files): + fn = [entry for entry in self.image_files if entry.name == file_name][0] + self.image_files = [entry for entry in self.image_files if entry.name != file_name] + Path(fn).unlink() + elif any(file_name == entry.name for entry in self.audio_files): + fn = [entry for entry in self.audio_files if entry.name == file_name][0] + self.audio_files = [entry for entry in self.audio_files if entry.name != file_name] + Path(fn).unlink() + + except Exception as ex: + trace_exception(ex) + ASCIIColors.warning(f"Couldn't remove the file {file_name}") + + def remove_all_files(self): + # Iterate over each directory and remove all files + for path in [self.discussion_images_folder, self.discussion_rag_folder, self.discussion_audio_folder, self.discussion_text_folder]: + for file in path.glob('*'): + if file.is_file(): # Ensure it's a file, not a directory + file.unlink() # Delete the file + + # Clear the lists to reflect the current state (empty directories) + self.text_files.clear() + self.image_files.clear() + self.audio_files.clear() + + def add_file(self, path, client, callback=None, process=True): + output = "" + + path = Path(path) + if path.suffix in [".wav",".mp3"]: + self.audio_files.append(path) + if process: + self.lollms.new_messagenew_message(client.client_id if client is not None else 0, content = "", message_type = MSG_TYPE.MSG_TYPE_FULL) + self.lollms.info(f"Transcribing ... ") + if self.whisper is None: + if not PackageManager.check_package_installed("whisper"): + PackageManager.install_package("openai-whisper") + try: + import conda.cli + conda.cli.main("install", "conda-forge::ffmpeg", "-y") + except: + ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually") + + import whisper + self.whisper = whisper.load_model("base") + result = self.whisper.transcribe(str(path)) + transcription_fn = str(path)+".txt" + with open(transcription_fn, "w", encoding="utf-8") as f: + f.write(result["text"]) + + self.info(f"File saved to {transcription_fn}") + self.full(result["text"]) + self.step_end("Transcribing ... ") + elif path.suffix in [".png",".jpg",".jpeg",".gif",".bmp",".svg",".webp"]: + self.image_files.append(path) + if process: + + try: + view_file = self.discussion_view_images_folder/path.name + shutil.copyfile(path, view_file) + pth = str(view_file).replace("\\","/").split('/') + if "discussion_databases" in pth: + pth = discussion_path_to_url(view_file) + self.lollms.new_message(client.client_id if client is not None else 0, content = "", message_type = MSG_TYPE.MSG_TYPE_FULL) + output = f'\n\n' + self.lollms.full(output, client_id=client.client_id) + self.lollms.close_message(client.client_id if client is not None else 0) + + if self.lollms.model.binding_type not in [BindingType.TEXT_IMAGE, BindingType.TEXT_IMAGE_VIDEO]: + # self.ShowBlockingMessage("Understanding image (please wait)") + from PIL import Image + img = Image.open(str(view_file)) + # Convert the image to RGB mode + img = img.convert("RGB") + output += "## image description :\n"+ self.lollms.model.interrogate_blip([img])[0] + # output += "## image description :\n"+ self.lollms.model.qna_blip([img],"q:Describe this photo with as much details as possible.\na:")[0] + self.lollms.full(output) + self.lollms.close_message(client.client_id if client is not None else 0) + self.lollms.HideBlockingMessage("Understanding image (please wait)") + if self.lollms.config.debug: + ASCIIColors.yellow(output) + else: + # self.ShowBlockingMessage("Importing image (please wait)") + self.lollms.HideBlockingMessage("Importing image (please wait)") + + except Exception as ex: + trace_exception(ex) + self.lollms.HideBlockingMessage("Understanding image (please wait)", False) + ASCIIColors.error("Couldn't create new message") + ASCIIColors.info("Received image file") + if callback is not None: + callback("Image file added successfully", MSG_TYPE.MSG_TYPE_INFO) + else: + try: + # self.ShowBlockingMessage("Adding file to vector store.\nPlease stand by") + self.text_files.append(path) + ASCIIColors.info("Received text compatible file") + self.lollms.ShowBlockingMessage("Processing file\nPlease wait ...") + if process: + if self.vectorizer is None: + self.vectorizer = TextVectorizer( + self.lollms.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer" + model=self.lollms.model, #needed in case of using model_embedding + database_path=self.discussion_rag_folder/"db.json", + save_db=self.lollms.config.data_vectorization_save_db, + data_visualization_method=VisualizationMethod.PCA, + database_dict=None) + data = GenericDataLoader.read_file(path) + self.vectorizer.add_document(path, data, self.lollms.config.data_vectorization_chunk_size, self.lollms.config.data_vectorization_overlap_size, add_first_line_to_all_chunks=True if path.suffix==".csv" else False) + self.vectorizer.index() + if callback is not None: + callback("File added successfully",MSG_TYPE.MSG_TYPE_INFO) + self.lollms.HideBlockingMessage(client.client_id) + return True + except Exception as e: + trace_exception(e) + self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id) + return False def load_message(self, id): """Gets a list of messages information diff --git a/lollms/server/configs/config.yaml b/lollms/server/configs/config.yaml index a91f8d7..bd12e3b 100644 --- a/lollms/server/configs/config.yaml +++ b/lollms/server/configs/config.yaml @@ -155,7 +155,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto data_visualization_method: "PCA" #"PCA" or "TSNE" data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path -data_vectorization_save_db: false # For each new session, new files +data_vectorization_save_db: true # For each new session, new files data_vectorization_chunk_size: 512 # chunk size data_vectorization_overlap_size: 128 # overlap between chunks size data_vectorization_nb_chunks: 2 # number of chunks to use diff --git a/lollms/server/endpoints/lollms_discussion.py b/lollms/server/endpoints/lollms_discussion.py index 4fbcca4..a943658 100644 --- a/lollms/server/endpoints/lollms_discussion.py +++ b/lollms/server/endpoints/lollms_discussion.py @@ -17,7 +17,7 @@ from lollms.security import sanitize_path, check_access from ascii_colors import ASCIIColors from lollms.databases.discussions_database import DiscussionsDB, Discussion from typing import List - +import shutil from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod import tqdm from pathlib import Path @@ -69,7 +69,7 @@ def select_database(data:DatabaseSelectionParameters): print(f'Selecting database {data.name}') # Create database object - lollmsElfServer.db = DiscussionsDB(lollmsElfServer.lollms_paths, data.name) + lollmsElfServer.db = DiscussionsDB(lollmsElfServer, lollmsElfServer.lollms_paths, data.name) ASCIIColors.info("Checking discussions database... ",end="") lollmsElfServer.db.create_tables() lollmsElfServer.db.add_missing_columns() @@ -114,7 +114,7 @@ async def make_title(discussion_title: DiscussionTitle): try: ASCIIColors.info("Making title") discussion_id = discussion_title.id - discussion = Discussion(discussion_id, lollmsElfServer.db) + discussion = Discussion(lollmsElfServer, discussion_id, lollmsElfServer.db) title = lollmsElfServer.make_discussion_title(discussion) discussion.rename(title) return {'status':True, 'title':title} @@ -151,10 +151,14 @@ async def delete_discussion(discussion: DiscussionDelete): try: client_id = discussion.client_id - discussion_id = discussion.id - lollmsElfServer.session.get_client(client_id).discussion = Discussion(discussion_id, lollmsElfServer.db) + discussion_id = sanitize_path(discussion.id) + discussion_path = lollmsElfServer.lollms_paths.personal_discussions_path/lollmsElfServer.config.discussion_db_name/discussion_id + + lollmsElfServer.session.get_client(client_id).discussion = Discussion(lollmsElfServer, discussion_id, lollmsElfServer.db) lollmsElfServer.session.get_client(client_id).discussion.delete_discussion() lollmsElfServer.session.get_client(client_id).discussion = None + + shutil.rmtree(discussion_path) return {'status':True} except Exception as ex: trace_exception(ex) @@ -208,3 +212,38 @@ async def import_multiple_discussions(discussion_import: DiscussionImport): trace_exception(ex) lollmsElfServer.error(ex) return {"status":False,"error":str(ex)} + + + +# ------------------------------------------- Files manipulation ----------------------------------------------------- +class Identification(BaseModel): + client_id:str + +@router.post("/get_discussion_files_list") +def get_discussion_files_list(data:Identification): + client = check_access(lollmsElfServer, data.client_id) + return {"state":True, "files":[{"name":Path(f).name, "size":Path(f).stat().st_size} for f in client.discussion.text_files]+[{"name":Path(f).name, "size":Path(f).stat().st_size} for f in client.discussion.image_files]} + +@router.post("/clear_discussion_files_list") +def clear_discussion_files_list(data:Identification): + client = check_access(lollmsElfServer, data.client_id) + if lollmsElfServer.personality is None: + return {"state":False, "error":"No personality selected"} + client.discussion.remove_all_files() + return {"state":True} + +class RemoveFileData(BaseModel): + client_id:str + name:str + +@router.post("/remove_discussion_file") +def remove_discussion_file(data:RemoveFileData): + """ + Removes a file form the personality files + """ + client = check_access(lollmsElfServer, data.client_id) + + if lollmsElfServer.personality is None: + return {"state":False, "error":"No personality selected"} + client.discussion.remove_file(data.name) + return {"state":True} diff --git a/lollms/server/events/lollms_personality_events.py b/lollms/server/events/lollms_personality_events.py index d240ec1..86cba70 100644 --- a/lollms/server/events/lollms_personality_events.py +++ b/lollms/server/events/lollms_personality_events.py @@ -81,7 +81,7 @@ def add_events(sio:socketio): ext = filename.split(".")[-1].lower() if ext in ["wav", "mp3"]: path:Path = client.discussion.discussion_audio_folder - elif ext in [".png",".jpg",".jpeg",".gif",".bmp",".svg",".webp"]: + elif ext in ["png","jpg","jpeg","gif","bmp","svg","webp"]: path:Path = client.discussion.discussion_images_folder else: path:Path = client.discussion.discussion_text_folder @@ -108,9 +108,9 @@ def add_events(sio:socketio): lollmsElfServer.ShowBlockingMessage(f"File received {file_path.name}.\nVectorizing the data ...") if lollmsElfServer.personality.processor: - result = lollmsElfServer.personality.processor.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id)) + result = client.discussion.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id)) else: - result = lollmsElfServer.personality.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id)) + result = client.discussion.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id)) ASCIIColors.success('File processed successfully') run_async(partial(sio.emit,'file_received', {'status': True, 'filename': filename})) diff --git a/lollms/types.py b/lollms/types.py index 9257fb5..017320e 100644 --- a/lollms/types.py +++ b/lollms/types.py @@ -61,6 +61,20 @@ class GenerationPresets: """ return {'temperature': 0.5, 'top_k': 20, 'top_p': 0.85} +class BindingType(Enum): + """Binding types.""" + + TEXT_ONLY = 0 + """This binding only supports text.""" + + TEXT_IMAGE = 1 + """This binding supports text and image.""" + + TEXT_IMAGE_VIDEO = 2 + """This binding supports text, image and video.""" + + TEXT_AUDIO = 3 + """This binding supports text and audio.""" class SUMMARY_MODE(Enum): SUMMARY_MODE_SEQUENCIAL = 0 diff --git a/personal_data/configs/lollms_discord_local_config.yaml b/personal_data/configs/lollms_discord_local_config.yaml index 6039a3e..7f56eff 100644 --- a/personal_data/configs/lollms_discord_local_config.yaml +++ b/personal_data/configs/lollms_discord_local_config.yaml @@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto data_visualization_method: "PCA" #"PCA" or "TSNE" data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path -data_vectorization_save_db: False # For each new session, new files +data_vectorization_save_db: true # For each new session, new files data_vectorization_chunk_size: 512 # chunk size data_vectorization_overlap_size: 128 # overlap between chunks size data_vectorization_nb_chunks: 2 # number of chunks to use