upgraded to the new version

2024-12-19 20:57:58 +00:00 · 2024-05-03 00:58:21 +02:00 · 2024-05-03 00:58:21 +02:00 · 03368ef8e1
commit 03368ef8e1
parent 21384fc34b
13 changed files with 285 additions and 46 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -155,7 +155,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path

-data_vectorization_save_db: false # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path

-data_vectorization_save_db: False # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path

-data_vectorization_save_db: False # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path

-data_vectorization_save_db: False # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
--- a/lollms/app.py
+++ b/lollms/app.py
@ -640,7 +640,9 @@ class LollmsApplication(LoLLMsCom):
        if self.personality.callback is None:
            self.personality.callback = partial(self.process_chunk, client_id=client_id)
        # Get the list of messages
-        messages = self.session.get_client(client_id).discussion.get_messages()
+        client = self.session.get_client(client_id)
+        discussion = client.discussion
+        messages = discussion.get_messages()

        # Find the index of the message with the specified message_id
        message_index = -1
@ -769,12 +771,14 @@ class LollmsApplication(LoLLMsCom):
                    trace_exception(ex)
                    self.warning("Couldn't add documentation to the context. Please verify the vector database")
            
-            if len(self.personality.text_files) > 0 and self.personality.vectorizer:
+            if (len(client.discussion.text_files) > 0) and client.discussion.vectorizer is not None:
+                if discussion is None:
+                    discussion = self.recover_discussion(client_id)
+
                if documentation=="":
                    documentation="\n!@>important information: Use the documentation data to answer the user questions. If the data is not present in the documentation, please tell the user that the information he is asking for does not exist in the documentation section. It is strictly forbidden to give the user an answer without having actual proof from the documentation.\n!@>Documentation:\n"

                if self.config.data_vectorization_build_keys_words:
-                    discussion = self.recover_discussion(client_id)
                    self.personality.step_start("Building vector store query")
                    query = self.personality.fast_gen(f"\n!@>instruction: Read the discussion and rewrite the last prompt for someone who didn't read the entire discussion.\nDo not answer the prompt. Do not add explanations.\n!@>discussion:\n{discussion[-2048:]}\n!@>enhanced query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink)
                    self.personality.step_end("Building vector store query")
@ -783,20 +787,20 @@ class LollmsApplication(LoLLMsCom):
                    query = current_message.content

                try:
-                    if self.config.data_vectorization_force_first_chunk and len(self.personality.vectorizer.chunks)>0:
-                        doc_index = list(self.personality.vectorizer.chunks.keys())[0]
+                    if self.config.data_vectorization_force_first_chunk and len(client.discussion.vectorizer.chunks)>0:
+                        doc_index = list(client.discussion.vectorizer.chunks.keys())[0]

-                        doc_id = self.personality.vectorizer.chunks[doc_index]['document_id']
-                        content = self.personality.vectorizer.chunks[doc_index]['chunk_text']
+                        doc_id = client.discussion.vectorizer.chunks[doc_index]['document_id']
+                        content = client.discussion.vectorizer.chunks[doc_index]['chunk_text']
                        
                        if self.config.data_vectorization_put_chunk_informations_into_context:
                            documentation += f"!@>document chunk:\nchunk_infos:{doc_id}\ncontent:{content}\n"
                        else:
                            documentation += f"!@>chunk:\n{content}\n"

-                    docs, sorted_similarities, document_ids = self.personality.vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks)
+                    docs, sorted_similarities, document_ids = client.discussion.vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks)
                    for doc, infos in zip(docs, sorted_similarities):
-                        if self.config.data_vectorization_force_first_chunk and len(self.personality.vectorizer.chunks)>0 and infos[0]==doc_id:
+                        if self.config.data_vectorization_force_first_chunk and len(client.discussion.vectorizer.chunks)>0 and infos[0]==doc_id:
                            continue
                        if self.config.data_vectorization_put_chunk_informations_into_context:
                            documentation += f"!@>document chunk:\nchunk path: {infos[0]}\nchunk content:\n{doc}\n"
--- a/lollms/binding.py
+++ b/lollms/binding.py
@ -26,6 +26,7 @@ from lollms.main_config import LOLLMSConfig
 from lollms.com import NotificationType, NotificationDisplayType, LoLLMsCom
 from lollms.security import sanitize_path
 from lollms.utilities import show_message_dialog
+from lollms.types import BindingType

 import urllib
 import inspect
@ -41,20 +42,7 @@ __author__ = "parisneo"
 __github__ = "https://github.com/ParisNeo/lollms_bindings_zoo"
 __copyright__ = "Copyright 2023, "
 __license__ = "Apache 2.0"
-class BindingType(Enum):
-    """Binding types."""
-    
-    TEXT_ONLY = 0
-    """This binding only supports text."""
-    
-    TEXT_IMAGE = 1
-    """This binding supports text and image."""

-    TEXT_IMAGE_VIDEO = 2
-    """This binding supports text, image and video."""
-
-    TEXT_AUDIO = 3
-    """This binding supports text and audio."""

 class LLMBinding:
    
--- a/lollms/com.py
+++ b/lollms/com.py
@ -1,4 +1,6 @@
 from ascii_colors import ASCIIColors
+from lollms.types import MSG_TYPE, SENDER_TYPES
+from typing import Callable
 import socketio
 from enum import Enum
 class NotificationType(Enum):
@ -96,6 +98,9 @@ class LoLLMsCom:
            self.sio.sleep(1)
        return infos["result"]

+    def close_message(self, client_id):
+        pass
+    
    def info(self, content, duration:int=4, client_id=None, verbose:bool=None):
        self.notify(
                content, 
@ -136,6 +141,26 @@ class LoLLMsCom:
                verbose = verbose
            )
        
+    def new_message(self, 
+                            client_id, 
+                            sender=None, 
+                            content="",
+                            parameters=None,
+                            metadata=None,
+                            ui=None,
+                            message_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_FULL, 
+                            sender_type:SENDER_TYPES=SENDER_TYPES.SENDER_TYPES_AI,
+                            open=False
+                        ):
+        pass
+    def full(self, full_text:str, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
+        """This sends full text to front end
+
+        Args:
+            step_text (dict): The step text
+            callback (callable, optional): A callable with this signature (str, MSG_TYPE) to send the text to. Defaults to None.
+        """
+        pass

    def notify(
                self, 
--- a/lollms/databases/discussions_database.py
+++ b/lollms/databases/discussions_database.py
@ -2,10 +2,16 @@
 import sqlite3
 from pathlib import Path
 from datetime import datetime
-from lollms.helpers import ASCIIColors
+from ascii_colors import ASCIIColors, trace_exception
+from lollms.types import MSG_TYPE
+from lollms.types import BindingType
+from lollms.utilities import PackageManager, discussion_path_to_url
 from lollms.paths import LollmsPaths
 from lollms.databases.skills_database import SkillsLibrary
+from lollms.com import LoLLMsCom
+from safe_store import TextVectorizer, VisualizationMethod, GenericDataLoader
 import json
+import shutil

 __author__ = "parisneo"
 __github__ = "https://github.com/ParisNeo/lollms-webui"
@ -16,7 +22,8 @@ __license__ = "Apache 2.0"
 # =================================== Database ==================================================================
 class DiscussionsDB:
    
-    def __init__(self, lollms_paths:LollmsPaths, discussion_db_name="default"):
+    def __init__(self, lollms:LoLLMsCom, lollms_paths:LollmsPaths, discussion_db_name="default"):
+        self.lollms = lollms
        self.lollms_paths = lollms_paths
        
        self.discussion_db_name = discussion_db_name
@ -25,7 +32,6 @@ class DiscussionsDB:
        self.discussion_db_path.mkdir(exist_ok=True, parents= True)
        self.discussion_db_file_path = self.discussion_db_path/"database.db"

-
    def create_tables(self):
        db_version = 12
        with sqlite3.connect(self.discussion_db_file_path) as conn:
@ -199,7 +205,7 @@ class DiscussionsDB:
        else:
            last_discussion_id = last_discussion_id[0]
        self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (last_discussion_id,), fetch_all=False)
-        return Discussion(last_discussion_id, self)
+        return Discussion(self.lollms, last_discussion_id, self)
    
    def create_discussion(self, title="untitled"):
        """Creates a new discussion
@ -211,10 +217,10 @@ class DiscussionsDB:
            Discussion: A Discussion instance 
        """
        discussion_id = self.insert(f"INSERT INTO discussion (title) VALUES (?)",(title,))
-        return Discussion(discussion_id, self)
+        return Discussion(self.lollms, discussion_id, self)

    def build_discussion(self, discussion_id=0):
-        return Discussion(discussion_id, self)
+        return Discussion(self.lollms, discussion_id, self)

    def get_discussions(self):
        rows = self.select("SELECT * FROM discussion")         
@ -618,7 +624,8 @@ class Message:
        return msgJson

 class Discussion:
-    def __init__(self, discussion_id, discussions_db:DiscussionsDB):
+    def __init__(self, lollms:LoLLMsCom, discussion_id, discussions_db:DiscussionsDB):
+        self.lollms = lollms
        self.discussion_id = discussion_id
        self.discussions_db = discussions_db
        self.discussion_folder = self.discussions_db.discussion_db_path/f"{discussion_id}"
@ -627,19 +634,181 @@ class Discussion:
        self.discussion_text_folder = self.discussion_folder / "text_data"
        self.discussion_skills_folder = self.discussion_folder / "skills"
        self.discussion_rag_folder = self.discussion_folder / "rag"
+        self.discussion_view_images_folder = self.discussion_folder / "view_images"
+
        self.discussion_folder.mkdir(exist_ok=True)
        self.discussion_images_folder.mkdir(exist_ok=True)
        self.discussion_text_folder.mkdir(exist_ok=True)
        self.discussion_skills_folder.mkdir(exist_ok=True)
        self.discussion_rag_folder.mkdir(exist_ok=True)
+        self.discussion_view_images_folder.mkdir(exist_ok=True)
        self.messages = self.get_messages()
        
        if len(self.messages)>0:
            self.current_message = self.messages[-1]

-    def add_file(self, file_name):
-        # TODO : add file
-        pass
+        # Initialize the file lists
+        self.update_file_lists()
+
+
+        self.vectorizer = TextVectorizer(
+            self.lollms.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
+            model=self.lollms.model, #needed in case of using model_embedding
+            database_path=self.discussion_rag_folder/"db.json",
+            save_db=self.lollms.config.data_vectorization_save_db,
+            data_visualization_method=VisualizationMethod.PCA,
+            database_dict=None)
+    
+        if len(self.vectorizer.chunks)==0 and len(self.text_files)>0:
+            for path in self.text_files:
+                data = GenericDataLoader.read_file(path)
+                self.vectorizer.add_document(path, data, self.lollms.config.data_vectorization_chunk_size, self.lollms.config.data_vectorization_overlap_size, add_first_line_to_all_chunks=True if path.suffix==".csv" else False)
+                self.vectorizer.index()
+            
+
+    def update_file_lists(self):
+        self.text_files = [Path(file) for file in self.discussion_text_folder.glob('*')]
+        self.image_files = [Path(file) for file in self.discussion_images_folder.glob('*')]
+        self.audio_files = [Path(file) for file in self.discussion_audio_folder.glob('*')]
+        self.rag_db = [Path(file) for file in self.discussion_rag_folder.glob('*')]
+
+
+    def remove_file(self, file_name, callback=None):
+        try:
+            all_files = self.text_files+self.image_files+self.audio_files
+            if any(file_name == entry.name for entry in self.text_files):
+                fn = [entry for entry in self.text_files if entry.name == file_name][0]
+                self.text_files = [entry for entry in self.text_files if entry.name != file_name]
+                Path(fn).unlink()
+                if len(self.text_files)>0:
+                    try:
+                        self.vectorizer.remove_document(fn)
+                        if callback is not None:
+                            callback("File removed successfully",MSG_TYPE.MSG_TYPE_INFO)
+                        return True
+                    except ValueError as ve:
+                        ASCIIColors.error(f"Couldn't remove the file")
+                        return False
+                else:
+                    self.vectorizer = None
+            elif any(file_name == entry.name for entry in self.image_files):
+                fn = [entry for entry in self.image_files if entry.name == file_name][0]
+                self.image_files = [entry for entry in self.image_files if entry.name != file_name]
+                Path(fn).unlink()
+            elif any(file_name == entry.name for entry in self.audio_files):
+                fn = [entry for entry in self.audio_files if entry.name == file_name][0]
+                self.audio_files = [entry for entry in self.audio_files if entry.name != file_name]
+                Path(fn).unlink()
+
+        except Exception as ex:
+            trace_exception(ex)
+            ASCIIColors.warning(f"Couldn't remove the file {file_name}")
+
+    def remove_all_files(self):
+        # Iterate over each directory and remove all files
+        for path in [self.discussion_images_folder, self.discussion_rag_folder, self.discussion_audio_folder, self.discussion_text_folder]:
+            for file in path.glob('*'):
+                if file.is_file():  # Ensure it's a file, not a directory
+                    file.unlink()  # Delete the file
+                    
+        # Clear the lists to reflect the current state (empty directories)
+        self.text_files.clear()
+        self.image_files.clear()
+        self.audio_files.clear()
+
+    def add_file(self, path, client, callback=None, process=True):
+        output = ""
+
+        path = Path(path)
+        if path.suffix in [".wav",".mp3"]:
+            self.audio_files.append(path)
+            if process:
+                self.lollms.new_messagenew_message(client.client_id if client is not None else 0, content = "", message_type = MSG_TYPE.MSG_TYPE_FULL)
+                self.lollms.info(f"Transcribing ... ")
+                if self.whisper is None:
+                    if not PackageManager.check_package_installed("whisper"):
+                        PackageManager.install_package("openai-whisper")
+                        try:
+                            import conda.cli
+                            conda.cli.main("install", "conda-forge::ffmpeg", "-y")
+                        except:
+                            ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually")
+
+                    import whisper
+                    self.whisper = whisper.load_model("base")
+                result = self.whisper.transcribe(str(path))
+                transcription_fn = str(path)+".txt"
+                with open(transcription_fn, "w", encoding="utf-8") as f:
+                    f.write(result["text"])
+
+                self.info(f"File saved to {transcription_fn}")
+                self.full(result["text"])
+                self.step_end("Transcribing ... ")
+        elif path.suffix in [".png",".jpg",".jpeg",".gif",".bmp",".svg",".webp"]:
+            self.image_files.append(path)
+            if process:
+                
+                try:
+                    view_file = self.discussion_view_images_folder/path.name
+                    shutil.copyfile(path, view_file)
+                    pth = str(view_file).replace("\\","/").split('/')
+                    if "discussion_databases" in pth:
+                        pth = discussion_path_to_url(view_file)
+                        self.lollms.new_message(client.client_id if client is not None else 0, content = "", message_type = MSG_TYPE.MSG_TYPE_FULL)
+                        output = f'<img src="{pth}" width="800">\n\n'
+                        self.lollms.full(output, client_id=client.client_id)
+                        self.lollms.close_message(client.client_id if client is not None else 0)
+
+                    if self.lollms.model.binding_type not in [BindingType.TEXT_IMAGE, BindingType.TEXT_IMAGE_VIDEO]:
+                        # self.ShowBlockingMessage("Understanding image (please wait)")
+                        from PIL import Image
+                        img = Image.open(str(view_file))
+                        # Convert the image to RGB mode
+                        img = img.convert("RGB")
+                        output += "## image description :\n"+ self.lollms.model.interrogate_blip([img])[0]
+                        # output += "## image description :\n"+ self.lollms.model.qna_blip([img],"q:Describe this photo with as much details as possible.\na:")[0]
+                        self.lollms.full(output)
+                        self.lollms.close_message(client.client_id if client is not None else 0)
+                        self.lollms.HideBlockingMessage("Understanding image (please wait)")
+                        if self.lollms.config.debug:
+                            ASCIIColors.yellow(output)
+                    else:
+                        # self.ShowBlockingMessage("Importing image (please wait)")
+                        self.lollms.HideBlockingMessage("Importing image (please wait)")
+
+                except Exception as ex:
+                    trace_exception(ex)
+                    self.lollms.HideBlockingMessage("Understanding image (please wait)", False)
+                    ASCIIColors.error("Couldn't create new message")
+            ASCIIColors.info("Received image file")
+            if callback is not None:
+                callback("Image file added successfully", MSG_TYPE.MSG_TYPE_INFO)
+        else:
+            try:
+                # self.ShowBlockingMessage("Adding file to vector store.\nPlease stand by")
+                self.text_files.append(path)
+                ASCIIColors.info("Received text compatible file")
+                self.lollms.ShowBlockingMessage("Processing file\nPlease wait ...")
+                if process:
+                    if self.vectorizer is None:
+                        self.vectorizer = TextVectorizer(
+                                    self.lollms.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
+                                    model=self.lollms.model, #needed in case of using model_embedding
+                                    database_path=self.discussion_rag_folder/"db.json",
+                                    save_db=self.lollms.config.data_vectorization_save_db,
+                                    data_visualization_method=VisualizationMethod.PCA,
+                                    database_dict=None)
+                    data = GenericDataLoader.read_file(path)
+                    self.vectorizer.add_document(path, data, self.lollms.config.data_vectorization_chunk_size, self.lollms.config.data_vectorization_overlap_size, add_first_line_to_all_chunks=True if path.suffix==".csv" else False)
+                    self.vectorizer.index()
+                    if callback is not None:
+                        callback("File added successfully",MSG_TYPE.MSG_TYPE_INFO)
+                    self.lollms.HideBlockingMessage(client.client_id)
+                    return True
+            except Exception as e:
+                trace_exception(e)
+                self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id)
+                return False

    def load_message(self, id):
        """Gets a list of messages information
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -155,7 +155,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path

-data_vectorization_save_db: false # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
--- a/lollms/server/endpoints/lollms_discussion.py
+++ b/lollms/server/endpoints/lollms_discussion.py
@ -17,7 +17,7 @@ from lollms.security import sanitize_path, check_access
 from ascii_colors import ASCIIColors
 from lollms.databases.discussions_database import DiscussionsDB, Discussion
 from typing import List
-
+import shutil
 from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
 import tqdm
 from pathlib import Path
@ -69,7 +69,7 @@ def select_database(data:DatabaseSelectionParameters):

    print(f'Selecting database {data.name}')
    # Create database object
-    lollmsElfServer.db = DiscussionsDB(lollmsElfServer.lollms_paths, data.name)
+    lollmsElfServer.db = DiscussionsDB(lollmsElfServer, lollmsElfServer.lollms_paths, data.name)
    ASCIIColors.info("Checking discussions database... ",end="")
    lollmsElfServer.db.create_tables()
    lollmsElfServer.db.add_missing_columns()
@ -114,7 +114,7 @@ async def make_title(discussion_title: DiscussionTitle):
    try:
        ASCIIColors.info("Making title")
        discussion_id = discussion_title.id
-        discussion = Discussion(discussion_id, lollmsElfServer.db)
+        discussion = Discussion(lollmsElfServer, discussion_id, lollmsElfServer.db)
        title = lollmsElfServer.make_discussion_title(discussion)
        discussion.rename(title)
        return {'status':True, 'title':title}
@ -151,10 +151,14 @@ async def delete_discussion(discussion: DiscussionDelete):
    try:

        client_id           = discussion.client_id
-        discussion_id       = discussion.id
-        lollmsElfServer.session.get_client(client_id).discussion = Discussion(discussion_id, lollmsElfServer.db)
+        discussion_id       = sanitize_path(discussion.id)
+        discussion_path = lollmsElfServer.lollms_paths.personal_discussions_path/lollmsElfServer.config.discussion_db_name/discussion_id
+
+        lollmsElfServer.session.get_client(client_id).discussion = Discussion(lollmsElfServer, discussion_id, lollmsElfServer.db)
        lollmsElfServer.session.get_client(client_id).discussion.delete_discussion()
        lollmsElfServer.session.get_client(client_id).discussion = None
+
+        shutil.rmtree(discussion_path)
        return {'status':True}
    except Exception as ex:
        trace_exception(ex)
@ -208,3 +212,38 @@ async def import_multiple_discussions(discussion_import: DiscussionImport):
        trace_exception(ex)
        lollmsElfServer.error(ex)
        return {"status":False,"error":str(ex)}
+
+
+
+# ------------------------------------------- Files manipulation -----------------------------------------------------
+class Identification(BaseModel):
+    client_id:str
+
+@router.post("/get_discussion_files_list")
+def get_discussion_files_list(data:Identification):
+    client = check_access(lollmsElfServer, data.client_id)
+    return {"state":True, "files":[{"name":Path(f).name, "size":Path(f).stat().st_size} for f in client.discussion.text_files]+[{"name":Path(f).name, "size":Path(f).stat().st_size} for f in client.discussion.image_files]}
+
+@router.post("/clear_discussion_files_list")
+def clear_discussion_files_list(data:Identification):
+    client = check_access(lollmsElfServer, data.client_id)
+    if lollmsElfServer.personality is None:
+        return {"state":False, "error":"No personality selected"}
+    client.discussion.remove_all_files()
+    return {"state":True}
+
+class RemoveFileData(BaseModel):
+    client_id:str
+    name:str
+    
+@router.post("/remove_discussion_file")
+def remove_discussion_file(data:RemoveFileData):
+    """
+    Removes a file form the personality files
+    """
+    client = check_access(lollmsElfServer, data.client_id)
+    
+    if lollmsElfServer.personality is None:
+        return {"state":False, "error":"No personality selected"}
+    client.discussion.remove_file(data.name)
+    return {"state":True}
--- a/lollms/server/events/lollms_personality_events.py
+++ b/lollms/server/events/lollms_personality_events.py
@ -81,7 +81,7 @@ def add_events(sio:socketio):
        ext = filename.split(".")[-1].lower()
        if ext in ["wav", "mp3"]:
            path:Path = client.discussion.discussion_audio_folder
-        elif ext in [".png",".jpg",".jpeg",".gif",".bmp",".svg",".webp"]:
+        elif ext in ["png","jpg","jpeg","gif","bmp","svg","webp"]:
            path:Path = client.discussion.discussion_images_folder
        else:
            path:Path = client.discussion.discussion_text_folder
@ -108,9 +108,9 @@ def add_events(sio:socketio):
            lollmsElfServer.ShowBlockingMessage(f"File received {file_path.name}.\nVectorizing the data ...")

            if lollmsElfServer.personality.processor:
-                result = lollmsElfServer.personality.processor.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))
+                result = client.discussion.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))
            else:
-                result = lollmsElfServer.personality.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))
+                result = client.discussion.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))

            ASCIIColors.success('File processed successfully')
            run_async(partial(sio.emit,'file_received', {'status': True, 'filename': filename}))
--- a/lollms/types.py
+++ b/lollms/types.py
@ -61,6 +61,20 @@ class GenerationPresets:
        """
        return {'temperature': 0.5, 'top_k': 20, 'top_p': 0.85}

+class BindingType(Enum):
+    """Binding types."""
+    
+    TEXT_ONLY = 0
+    """This binding only supports text."""
+    
+    TEXT_IMAGE = 1
+    """This binding supports text and image."""
+
+    TEXT_IMAGE_VIDEO = 2
+    """This binding supports text, image and video."""
+
+    TEXT_AUDIO = 3
+    """This binding supports text and audio."""

 class SUMMARY_MODE(Enum):
    SUMMARY_MODE_SEQUENCIAL        = 0
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path

-data_vectorization_save_db: False # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use