upgraded vector db

2024-12-19 20:57:58 +00:00 · 2024-06-26 01:27:11 +02:00 · 2024-06-26 01:27:11 +02:00 · cb32fcb474
commit cb32fcb474
parent 09d80f12da
5 changed files with 37 additions and 8 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 118
+version: 120
 binding_name: null
 model_name: null
 model_variant: null
@ -249,6 +249,8 @@ rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
+rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens

 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
--- a/lollms/app.py
+++ b/lollms/app.py
@ -860,6 +860,7 @@ class LollmsApplication(LoLLMsCom):
        Returns:
            Tuple[str, str, List[str]]: The prepared query, original message content, and tokenized query.
        """
+        documentation_entries = []
        start_header_id_template    = self.config.start_header_id_template
        end_header_id_template      = self.config.end_header_id_template

@ -1039,7 +1040,7 @@ class LollmsApplication(LoLLMsCom):
                        ])
                        query = self.personality.fast_gen(q, max_generation_size=256, show_progress=True, callback=self.personality.sink)
                        self.personality.step_end("Building vector store query")
-                        ASCIIColors.cyan(f"Query: {query}")
+                        ASCIIColors.magenta(f"Query: {query}")
                        self.personality.step(f"Query: {query}")
                    else:
                        query = current_message.content
@ -1058,6 +1059,7 @@ class LollmsApplication(LoLLMsCom):
                        results+=r
                    n_neighbors = self.active_rag_dbs[0]["vectorizer"].n_neighbors
                    sorted_results = sorted(results, key=lambda x: x.distance)[:n_neighbors]
+
                    for chunk in sorted_results:
                        document_infos = f"{separator_template}".join([
                            f"{start_header_id_template}document chunk{end_header_id_template}",
@ -1065,7 +1067,13 @@ class LollmsApplication(LoLLMsCom):
                            f"source_document_path:{chunk.doc.path}",
                            f"content:\n{chunk.text}\n"
                        ])
-
+                        documentation_entries.append({
+                            "document_title":chunk.doc.title,
+                            "document_path":chunk.doc.path,
+                            "chunk_content":chunk.text,
+                            "chunk_size":chunk.nb_tokens,
+                            "distance":chunk.distance,
+                        })
                        documentation += document_infos
                        
                if (len(client.discussion.text_files) > 0) and client.discussion.vectorizer is not None:
@ -1300,6 +1308,7 @@ class LollmsApplication(LoLLMsCom):
            "internet_search_infos":internet_search_infos,
            "internet_search_results":internet_search_results,
            "documentation":documentation,
+            "documentation_entries":documentation_entries,
            "knowledge":knowledge,
            "knowledge_infos":knowledge_infos,
            "user_description":user_description,
@ -1311,7 +1320,8 @@ class LollmsApplication(LoLLMsCom):
            "ai_prefix":ai_prefix,
            "extra":""
        }    
-
+        if self.config.debug:
+            ASCIIColors.hilight(documentation,"source_document_title", ASCIIColors.color_yellow, ASCIIColors.color_red, False)
        # Return the prepared query, original message content, and tokenized query
        return prompt_data, current_message.content, tokens, context_details, internet_search_infos                

--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 118
+version: 120
 binding_name: null
 model_name: null
 model_variant: null
@ -249,6 +249,8 @@ rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
+rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens

 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -315,6 +315,19 @@ class AIPersonality:
        if callback:
            callback(full_text, MSG_TYPE.MSG_TYPE_FULL)

+    def ui(self, ui_text:str, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
+        """This sends ui text to front end
+
+        Args:
+            ui_text (dict): The ui code to be sent to the front end
+            callback (callable, optional): A callable with this signature (str, MSG_TYPE) to send the text to. Defaults to None.
+        """
+        if not callback and self.callback:
+            callback = self.callback
+
+        if callback:
+            callback(ui_text, MSG_TYPE.MSG_TYPE_UI)
+

    def full_invisible_to_ai(self, full_text:str, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
        """This sends full text to front end (INVISIBLE to AI)
@ -690,8 +703,6 @@ class AIPersonality:
        # TODO : add show progress

        gen = self.generate(prompt, max_generation_size, temperature = temperature, top_k = top_k, top_p=top_p, repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, callback=callback, show_progress=show_progress).strip().replace("</s>", "").replace("<s>", "")
-        if debug:
-            self.print_prompt("prompt", prompt+gen)

        return gen

@ -767,6 +778,9 @@ class AIPersonality:
                                repeat_penalty=self.model_repeat_penalty if repeat_penalty is None else repeat_penalty,
                                repeat_last_n = self.model_repeat_last_n if repeat_last_n is None else repeat_last_n,
                                ).strip()
+        if debug:
+            self.print_prompt("prompt", prompt+self.bot_says)
+        
        return self.bot_says

    def setCallback(self, callback: Callable[[str, MSG_TYPE, dict, list], bool]):
--- a/lollms/server/endpoints/lollms_personalities_infos.py
+++ b/lollms/server/endpoints/lollms_personalities_infos.py
@ -678,9 +678,9 @@ async def set_active_personality_settings(request: Request):


 class PersonalityInfos(BaseModel):
+    client_id: str
    category:str
    name:str
-    language:Optional[str] = None

@router.post("/copy_to_custom_personas")
 async def copy_to_custom_personas(data: PersonalityInfos):
@ -689,6 +689,7 @@ async def copy_to_custom_personas(data: PersonalityInfos):

    """
    import shutil
+    client = check_access(lollmsElfServer, data.client_id)
    
    category = sanitize_path(data.category)
    name = sanitize_path(data.name)