updates semantic code

This commit is contained in:
Saifeddine ALOUI 2024-12-16 00:47:52 +01:00
parent 39e7d5ea3e
commit e030d32675
7 changed files with 14 additions and 12 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file =========================== # =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 145 version: 146
# video viewing and news recovering # video viewing and news recovering
last_viewed_video: null last_viewed_video: null
@ -283,6 +283,7 @@ rag_databases: [] # This is the list of paths to database sources. Each database
rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama
rag_service_url: "http://localhost:11434" # rag service url for ollama rag_service_url: "http://localhost:11434" # rag service url for ollama
rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable
rag_vectorizer_execute_remote_code: false # do not execute remote code or do
rag_vectorizer_parameters: null # Parameters of the model in json format rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap rag_overlap: 0 # number of tokens of overlap

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file =========================== # =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 145 version: 146
# video viewing and news recovering # video viewing and news recovering
last_viewed_video: null last_viewed_video: null
@ -283,6 +283,7 @@ rag_databases: [] # This is the list of paths to database sources. Each database
rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama
rag_service_url: "http://localhost:11434" # rag service url for ollama rag_service_url: "http://localhost:11434" # rag service url for ollama
rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable
rag_vectorizer_execute_remote_code: false # do not execute remote code or do
rag_vectorizer_parameters: null # Parameters of the model in json format rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap rag_overlap: 0 # number of tokens of overlap

View File

@ -776,7 +776,7 @@ class Discussion:
if len(self.text_files)>0: if len(self.text_files)>0:
if self.lollms.config.rag_vectorizer=="semantic": if self.lollms.config.rag_vectorizer=="semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
vectorizer = SemanticVectorizer(self.lollms.config.rag_vectorizer_model) vectorizer = SemanticVectorizer(self.lollms.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code)
elif self.lollms.config.rag_vectorizer=="tfidf": elif self.lollms.config.rag_vectorizer=="tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
vectorizer = TFIDFVectorizer() vectorizer = TFIDFVectorizer()
@ -950,7 +950,7 @@ class Discussion:
if self.vectorizer is None: if self.vectorizer is None:
if self.lollms.config.rag_vectorizer == "semantic": if self.lollms.config.rag_vectorizer == "semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(self.lollms.config.rag_vectorizer_model) v = SemanticVectorizer(self.lollms.config.rag_vectorizer_model, self.lollms.config.rag_vectorizer_execute_remote_code)
elif self.lollms.config.rag_vectorizer == "tfidf": elif self.lollms.config.rag_vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer() v = TFIDFVectorizer()

View File

@ -14,7 +14,7 @@ class SkillsLibrary:
vectorizer = self.config.rag_vectorizer vectorizer = self.config.rag_vectorizer
if vectorizer == "semantic": if vectorizer == "semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(self.config.rag_vectorizer_model) v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code)
elif vectorizer == "tfidf": elif vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer() v = TFIDFVectorizer()
@ -27,7 +27,7 @@ class SkillsLibrary:
else: else:
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(self.config.rag_vectorizer_model) v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code)
self.vectorizer = VectorDatabase("", v, TikTokenTokenizer(),chunk_size, overlap, n_neighbors) self.vectorizer = VectorDatabase("", v, TikTokenTokenizer(),chunk_size, overlap, n_neighbors)
ASCIIColors.green("Vecorizer ready") ASCIIColors.green("Vecorizer ready")

View File

@ -1684,7 +1684,7 @@ Don't forget to close the html code tag.
if self.config.rag_vectorizer == "semantic": if self.config.rag_vectorizer == "semantic":
self.ShowBlockingMessage("Processing file\nPlease wait ...\nUsing semantic vectorizer") self.ShowBlockingMessage("Processing file\nPlease wait ...\nUsing semantic vectorizer")
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(self.config.rag_vectorizer_model) v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code)
elif self.config.rag_vectorizer == "tfidf": elif self.config.rag_vectorizer == "tfidf":
self.ShowBlockingMessage("Processing file\nPlease wait ...\nUsing tfidf vectorizer") self.ShowBlockingMessage("Processing file\nPlease wait ...\nUsing tfidf vectorizer")
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
@ -3884,7 +3884,7 @@ transition-all duration-300 ease-in-out">
vectorizer = self.config.rag_vectorizer vectorizer = self.config.rag_vectorizer
if vectorizer == "semantic": if vectorizer == "semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(self.config.rag_vectorizer_model) v = SemanticVectorizer(self.config.rag_vectorizer_model, self.config.rag_vectorizer_execute_remote_code)
elif vectorizer == "tfidf": elif vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer() v = TFIDFVectorizer()

View File

@ -142,7 +142,7 @@ def select_rag_database(client) -> Optional[Dict[str, Path]]:
if lollmsElfServer.config.rag_vectorizer == "semantic": if lollmsElfServer.config.rag_vectorizer == "semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code)
elif lollmsElfServer.config.rag_vectorizer == "tfidf": elif lollmsElfServer.config.rag_vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer() v = TFIDFVectorizer()
@ -289,7 +289,7 @@ def toggle_mount_rag_database(database_infos: MountDatabase):
if lollmsElfServer.config.rag_vectorizer == "semantic": if lollmsElfServer.config.rag_vectorizer == "semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code)
elif lollmsElfServer.config.rag_vectorizer == "tfidf": elif lollmsElfServer.config.rag_vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer() v = TFIDFVectorizer()
@ -355,7 +355,7 @@ async def vectorize_folder(database_infos: FolderInfos):
if lollmsElfServer.config.rag_vectorizer == "semantic": if lollmsElfServer.config.rag_vectorizer == "semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code)
elif lollmsElfServer.config.rag_vectorizer == "tfidf": elif lollmsElfServer.config.rag_vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer() v = TFIDFVectorizer()

View File

@ -68,7 +68,7 @@ def get_user_vectorizer(user_key: str):
from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
if lollmsElfServer.config.rag_vectorizer == "semantic": if lollmsElfServer.config.rag_vectorizer == "semantic":
from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer
v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model) v = SemanticVectorizer(lollmsElfServer.config.rag_vectorizer_model, lollmsElfServer.config.rag_vectorizer_execute_remote_code)
elif lollmsElfServer.config.rag_vectorizer == "tfidf": elif lollmsElfServer.config.rag_vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer() v = TFIDFVectorizer()