enhanced ui

2025-06-02 15:41:00 +00:00 · 2024-04-17 00:57:30 +02:00 · 2024-04-17 00:57:30 +02:00 · 4882b74b60
commit 4882b74b60
parent ed90273e30
8 changed files with 30 additions and 11 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 80
+version: 81
 binding_name: null
 model_name: null
 model_variant: null
@ -149,6 +149,7 @@ data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
 data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 80
+version: 81
 binding_name: null
 model_name: null
 model_variant: null
@ -149,6 +149,7 @@ data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
 data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 80
+version: 81
 binding_name: null
 model_name: null
 model_variant: null
@ -149,6 +149,7 @@ data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
 data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 80
+version: 81
 binding_name: null
 model_name: null
 model_variant: null
@ -149,6 +149,7 @@ data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
 data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
--- a/lollms/app.py
+++ b/lollms/app.py
@ -636,7 +636,11 @@ class LollmsApplication(LoLLMsCom):
                try:
                    docs, sorted_similarities, document_ids = self.personality.persona_data_vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks)
                    for doc, infos, doc_id in zip(docs, sorted_similarities, document_ids):
-                        documentation += f"!@>document chunk:\nchunk_infos:{infos}\ncontent:{doc}\n"
+                        if self.config.data_vectorization_put_chunk_informations_into_context:
+                            documentation += f"!@>document chunk:\nchunk_infos:{infos}\ncontent:{doc}\n"
+                        else:
+                            documentation += f"!@>chunk:\n{doc}\n"
+
                except Exception as ex:
                    trace_exception(ex)
                    self.warning("Couldn't add documentation to the context. Please verify the vector database")
@ -660,13 +664,21 @@ class LollmsApplication(LoLLMsCom):

                        doc_id = self.personality.vectorizer.chunks[doc_index]['document_id']
                        content = self.personality.vectorizer.chunks[doc_index]['chunk_text']
-                        documentation += f"!@>document chunk:\nchunk_infos:{doc_id}\ncontent:{content}\n"
+                        
+                        if self.config.data_vectorization_put_chunk_informations_into_context:
+                            documentation += f"!@>document chunk:\nchunk_infos:{doc_id}\ncontent:{content}\n"
+                        else:
+                            documentation += f"!@>chunk:\n{content}\n"

                    docs, sorted_similarities, document_ids = self.personality.vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks)
                    for doc, infos in zip(docs, sorted_similarities):
                        if self.config.data_vectorization_force_first_chunk and len(self.personality.vectorizer.chunks)>0 and infos[0]==doc_id:
                            continue
-                        documentation += f"!@>document chunk:\nchunk path: {infos[0]}\nchunk content:\n{doc}\n"
+                        if self.config.data_vectorization_put_chunk_informations_into_context:
+                            documentation += f"!@>document chunk:\nchunk path: {infos[0]}\nchunk content:\n{doc}\n"
+                        else:
+                            documentation += f"!@>chunk:\n{doc}\n"
+
                    documentation += "\n!@>important information: Use the documentation data to answer the user questions. If the data is not present in the documentation, please tell the user that the information he is asking for does not exist in the documentation section. It is strictly forbidden to give the user an answer without having actual proof from the documentation.\n"
                except Exception as ex:
                    trace_exception(ex)
@ -760,7 +772,7 @@ class LollmsApplication(LoLLMsCom):
            ASCIIColors.red(f"n_isearch_tk:{n_isearch_tk}")
            
            ASCIIColors.red(f"self.config.max_n_predict:{self.config.max_n_predict}")
-            self.error(f"Not enough space in context!!\nVerify that your vectorization settings for documents or internet search are realistic compared to your context size.\nYou are {available_space} short of context!")
+            self.InfoMessage(f"Not enough space in context!!\nVerify that your vectorization settings for documents or internet search are realistic compared to your context size.\nYou are {available_space} short of context!")
            raise Exception("Not enough space in context!!")

        # Accumulate messages until the cumulative number of tokens exceeds available_space
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 80
+version: 81
 binding_name: null
 model_name: null
 model_variant: null
@ -149,6 +149,7 @@ data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
 data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 80
+version: 81
 binding_name: null
 model_name: null
 model_variant: null
@ -149,6 +149,7 @@ data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
 data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 80
+version: 81
 binding_name: null
 model_name: null
 model_variant: null
@ -149,6 +149,7 @@ data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
 data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs