This commit is contained in:
Saifeddine ALOUI 2025-01-22 00:47:31 +01:00
parent f21ad95a49
commit 06f5390f34
4 changed files with 50 additions and 36 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 148
version: 149
# video viewing and news recovering
last_viewed_video: null
@ -71,6 +71,7 @@ extensions: []
user_name: user
user_description: ""
use_assistant_name_in_discussion: false
use_user_name_in_discussions: false
use_model_name_in_discussions: false
user_avatar: null

View File

@ -1310,18 +1310,23 @@ Answer directly with the reformulation of the last prompt.
if message.content != '' and (
message.message_type <= MSG_OPERATION_TYPE.MSG_OPERATION_TYPE_SET_CONTENT_INVISIBLE_TO_USER.value and message.message_type != MSG_OPERATION_TYPE.MSG_OPERATION_TYPE_SET_CONTENT_INVISIBLE_TO_AI.value):
# Tokenize the message content
if self.config.use_model_name_in_discussions:
if message.model:
msg = f"{self.separator_template}" + f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}({message.model}){end_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI:
if self.config.use_assistant_name_in_discussion:
if self.config.use_model_name_in_discussions:
msg = self.ai_custom_header(message.sender+f"({message.model})") + message.content.strip()
else:
msg = self.ai_full_header + message.content.strip()
else:
msg = f"{self.separator_template}" + f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
message_tokenized = self.model.tokenize(msg)
if self.config.use_model_name_in_discussions:
msg = self.ai_custom_header("assistant"+f"({message.model})") + message.content.strip()
else:
msg = self.ai_custom_header("assistant") + message.content.strip()
else:
msg_value= f"{self.separator_template}" + f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
message_tokenized = self.model.tokenize(
msg_value
)
msg = self.user_full_header + message.content.strip()
message_tokenized = self.model.tokenize(msg)
# Check if adding the message will exceed the available space
if tokens_accumulated + len(message_tokenized) > available_space:
# Update the cumulative number of tokens
@ -1342,16 +1347,23 @@ Answer directly with the reformulation of the last prompt.
if message.content != '' and (
message.message_type <= MSG_OPERATION_TYPE.MSG_OPERATION_TYPE_SET_CONTENT_INVISIBLE_TO_USER.value and message.message_type != MSG_OPERATION_TYPE.MSG_OPERATION_TYPE_SET_CONTENT_INVISIBLE_TO_AI.value):
if self.config.use_model_name_in_discussions:
if message.model:
msg = f"{self.separator_template}{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}({message.model}){end_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI:
if self.config.use_assistant_name_in_discussion:
if self.config.use_model_name_in_discussions:
msg = self.ai_custom_header(message.sender+f"({message.model})") + message.content.strip()
else:
msg = self.ai_full_header + message.content.strip()
else:
msg = f"{self.separator_template}{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
message_tokenized = self.model.tokenize(msg)
if self.config.use_model_name_in_discussions:
msg = self.ai_custom_header("assistant"+f"({message.model})") + message.content.strip()
else:
msg = self.ai_custom_header("assistant") + message.content.strip()
else:
message_tokenized = self.model.tokenize(
f"{self.separator_template}{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
)
if self.config.use_user_name_in_discussions:
msg = self.user_full_header + message.content.strip()
else:
msg = self.user_custom_header("user") + message.content.strip()
message_tokenized = self.model.tokenize(msg)
# Add the tokenized message to the full_message_list
full_message_list.insert(0, message_tokenized)

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 148
version: 149
# video viewing and news recovering
last_viewed_video: null
@ -71,6 +71,7 @@ extensions: []
user_name: user
user_description: ""
use_assistant_name_in_discussion: false
use_user_name_in_discussions: false
use_model_name_in_discussions: false
user_avatar: null

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 141
version: 149
# video viewing and news recovering
last_viewed_video: null
@ -51,7 +51,7 @@ use_continue_message: true
seed: -1
ctx_size: 4084
max_n_predict: None
max_n_predict: 4084
min_n_predict: 1024
temperature: 0.9
top_k: 50
@ -71,6 +71,7 @@ extensions: []
user_name: user
user_description: ""
use_assistant_name_in_discussion: false
use_user_name_in_discussions: false
use_model_name_in_discussions: false
user_avatar: null
@ -278,13 +279,23 @@ audio_pitch: 1
audio_auto_send_input: true
audio_silenceTimer: 5000
# relmote databases
# This is the list of datalakes to be used for RAG
# Datalakes hae the following entries
#
datalakes: []
# Data vectorization
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
rag_vectorizer: tfidf # possible values semantic, tfidf, openai
rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable
rag_local_services: [] # This is the list of rag services served locally
rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama
rag_service_url: "http://localhost:11434" # rag service url for ollama
rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable
rag_vectorizer_execute_remote_code: false # do not execute remote code or do
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_min_correspondance: 0 # minimum correspondance between the query and the content
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
@ -304,20 +315,9 @@ activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
data_vectorization_save_db: true # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use
rag_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
rag_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Activate internet search
activate_internet_search: false