From e030d32675632438891a7fd5b3084d200e14d974 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Mon, 16 Dec 2024 00:47:52 +0100 Subject: [PATCH] updates semantic code --- configs/config.yaml | 3 ++- lollms/configs/config.yaml | 3 ++- lollms/databases/discussions_database.py | 4 ++-- lollms/databases/skills_database.py | 4 ++-- lollms/personality.py | 4 ++-- lollms/server/endpoints/lollms_file_system.py | 6 +++--- lollms/server/endpoints/lollms_rag.py | 2 +- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/configs/config.yaml b/configs/config.yaml index d10478b..759692a 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 145 +version: 146 # video viewing and news recovering last_viewed_video: null @@ -283,6 +283,7 @@ rag_databases: [] # This is the list of paths to database sources. Each database rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama rag_service_url: "http://localhost:11434" # rag service url for ollama rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable +rag_vectorizer_execute_remote_code: false # do not execute remote code or do rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk rag_overlap: 0 # number of tokens of overlap diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml index d10478b..759692a 100644 --- a/lollms/configs/config.yaml +++ b/lollms/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 145 +version: 146 # video viewing and news recovering last_viewed_video: null @@ -283,6 +283,7 @@ rag_databases: [] # This is the list of paths to database sources. Each database rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama rag_service_url: "http://localhost:11434" # rag service url for ollama rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable +rag_vectorizer_execute_remote_code: false # do not execute remote code or do rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk rag_overlap: 0 # number of tokens of overlap diff --git a/lollms/databases/discussions_database.py b/lollms/databases/discussions_database.py index 0931964..5f53b83 100644 --- a/lollms/databases/discussions_database.py +++ b/lollms/databases/discussions_database.py @@ -776,7 +776,7 @@ class Discussion: if len(self.text_files)>0: if self.lollms.config.rag_vectorizer=="semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - vectorizer = SemanticVectorizer(self.lollms.config.rag_vectorizer_model) + vectorizer = SemanticVectorizer(self.lollms.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code) elif self.lollms.config.rag_vectorizer=="tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer vectorizer = TFIDFVectorizer() @@ -950,7 +950,7 @@ class Discussion: if self.vectorizer is None: if self.lollms.config.rag_vectorizer == "semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(self.lollms.config.rag_vectorizer_model) + v = SemanticVectorizer(self.lollms.config.rag_vectorizer_model, self.lollms.config.rag_vectorizer_execute_remote_code) elif self.lollms.config.rag_vectorizer == "tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer v = TFIDFVectorizer() diff --git a/lollms/databases/skills_database.py b/lollms/databases/skills_database.py index b82917d..3bcbf1a 100644 --- a/lollms/databases/skills_database.py +++ b/lollms/databases/skills_database.py @@ -14,7 +14,7 @@ class SkillsLibrary: vectorizer = self.config.rag_vectorizer if vectorizer == "semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(self.config.rag_vectorizer_model) + v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code) elif vectorizer == "tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer v = TFIDFVectorizer() @@ -27,7 +27,7 @@ class SkillsLibrary: else: from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(self.config.rag_vectorizer_model) + v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code) self.vectorizer = VectorDatabase("", v, TikTokenTokenizer(),chunk_size, overlap, n_neighbors) ASCIIColors.green("Vecorizer ready") diff --git a/lollms/personality.py b/lollms/personality.py index eef269e..09c0041 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -1684,7 +1684,7 @@ Don't forget to close the html code tag. if self.config.rag_vectorizer == "semantic": self.ShowBlockingMessage("Processing file\nPlease wait ...\nUsing semantic vectorizer") from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(self.config.rag_vectorizer_model) + v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code) elif self.config.rag_vectorizer == "tfidf": self.ShowBlockingMessage("Processing file\nPlease wait ...\nUsing tfidf vectorizer") from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer @@ -3884,7 +3884,7 @@ transition-all duration-300 ease-in-out"> vectorizer = self.config.rag_vectorizer if vectorizer == "semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(self.config.rag_vectorizer_model) + v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code) elif vectorizer == "tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer v = TFIDFVectorizer() diff --git a/lollms/server/endpoints/lollms_file_system.py b/lollms/server/endpoints/lollms_file_system.py index a7a2546..01bd31e 100644 --- a/lollms/server/endpoints/lollms_file_system.py +++ b/lollms/server/endpoints/lollms_file_system.py @@ -142,7 +142,7 @@ def select_rag_database(client) -> Optional[Dict[str, Path]]: if lollmsElfServer.config.rag_vectorizer == "semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) + v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code) elif lollmsElfServer.config.rag_vectorizer == "tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer v = TFIDFVectorizer() @@ -289,7 +289,7 @@ def toggle_mount_rag_database(database_infos: MountDatabase): if lollmsElfServer.config.rag_vectorizer == "semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) + v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code) elif lollmsElfServer.config.rag_vectorizer == "tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer v = TFIDFVectorizer() @@ -355,7 +355,7 @@ async def vectorize_folder(database_infos: FolderInfos): if lollmsElfServer.config.rag_vectorizer == "semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) + v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code) elif lollmsElfServer.config.rag_vectorizer == "tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer v = TFIDFVectorizer() diff --git a/lollms/server/endpoints/lollms_rag.py b/lollms/server/endpoints/lollms_rag.py index c525b75..0c70110 100644 --- a/lollms/server/endpoints/lollms_rag.py +++ b/lollms/server/endpoints/lollms_rag.py @@ -68,7 +68,7 @@ def get_user_vectorizer(user_key: str): from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer if lollmsElfServer.config.rag_vectorizer == "semantic": from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer - v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) + v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code) elif lollmsElfServer.config.rag_vectorizer == "tfidf": from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer v = TFIDFVectorizer()