upgraded

2025-04-20 16:40:48 +00:00 · 2024-12-02 00:34:57 +01:00 · 2024-12-02 00:34:57 +01:00 · 1e963ab4d4
commit 1e963ab4d4
parent 47f2d30895
7 changed files with 43 additions and 43 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 141
+version: 142

 # video viewing and news recovering
 last_viewed_video: null
@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
 rag_overlap: 0 # number of tokens of overlap
+rag_min_correspondance: 0 # minimum correspondance between the query and the content

 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
--- a/lollms/app.py
+++ b/lollms/app.py
@ -31,7 +31,7 @@ import yaml
 import time
 from lollms.utilities import PackageManager
 import socket
-import shutil
+import json
 class LollmsApplication(LoLLMsCom):
    def __init__(
                    self, 
@ -300,15 +300,16 @@ class LollmsApplication(LoLLMsCom):
            message_content += f"Rank {rank} - {sender}: {text}\n"

        return self.tasks_library.summarize_text(
-            message_content, 
+            message_content,
            "\n".join([
-                "Act as Skills library maker.",
-                "The objective is to find out important information from the discussion and store them as text that can be used in the future to remember those information.",
+                "Find out important information from the discussion and report them.",
                "Format the output as sections if applicable:",
                "Global context: Explain in a sentense or two the subject of the discussion",
                "Interesting things (if applicable): If you find interesting information or something that was discovered or built in this discussion, list it here with enough details to be reproducible just by reading this text.",
                "Code snippet (if applicable): If there are important code snippets, write them here in a markdown code tag.",
-                "Make the output easy to understand."
+                "Make the output easy to understand.",
+                "The objective is not to talk about the discussion but to store the important information for future usage. Do not report useless information.",
+                "Do not describe the discussion and focuse more on reporting the most important information from the discussion."
            ]),
            doc_name="discussion",
            callback=callback)
@ -1251,14 +1252,26 @@ class LollmsApplication(LoLLMsCom):
                        if discussion is None:
                            discussion = self.recover_discussion(client_id)
                        self.personality.step_start("Building query")
-                        query = self.personality.fast_gen(f"{self.start_header_id_template}{system_message_template}{self.end_header_id_template}Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely. Return only the reformulated request without any additional explanations, commentary, or output.{self.separator_template}{self.start_header_id_template}discussion:\n{discussion[-2048:]}{self.separator_template}{self.start_header_id_template}search query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink)
+                        query = self.personality.generate_code(f"""{self.system_full_header}
+Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely.
+The reformulation must be placed inside a json markdown tag like this:
+```json
+{{
+    "request": the reformulated request
+}}
+```
+{self.system_custom_header("discussion:")}
+{discussion[-2048:]}
+{self.system_custom_header("search query:")}""", callback=self.personality.sink)
+                        query_code = json.loads(query)
+                        query = query_code["request"]
                        self.personality.step_end("Building query")
                        self.personality.step(f"query: {query}")
                        # skills = self.skills_library.query_entry(query)
                        self.personality.step_start("Adding skills")
                        if self.config.debug:
                            ASCIIColors.info(f"Query : {query}")
-                        skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, max_dist=1000)#query_entry_fts(query)
+                        skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, min_dist=self.config.rag_min_correspondance)#query_entry_fts(query)
                        knowledge_infos={"titles":skill_titles,"contents":skills}
                        if len(skills)>0:
                            if knowledge=="":
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 141
+version: 142

 # video viewing and news recovering
 last_viewed_video: null
@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
 rag_overlap: 0 # number of tokens of overlap
+rag_min_correspondance: 0 # minimum correspondance between the query and the content

 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
--- a/lollms/databases/skills_database.py
+++ b/lollms/databases/skills_database.py
@ -124,38 +124,16 @@ class SkillsLibrary:
        conn.close()
        return res

-    def query_vector_db(self, query_, top_k=3, max_dist=1000):
-        conn = sqlite3.connect(self.db_path)
-        cursor = conn.cursor()
+    def query_vector_db(self, query_, top_k=3, min_dist=0):
        # Use direct string concatenation for the MATCH expression.
        # Ensure text is safely escaped to avoid SQL injection.
-        query = "SELECT id, title, content FROM skills_library"
-        cursor.execute(query)
-        res = cursor.fetchall()
-        cursor.close()
-        conn.close()
        skills = []
        skill_titles = []        
-        if len(res)>0:
-            for entry in res:
-                self.vectorizer.add_document(entry[1],"Title:"+entry[1]+"\n"+entry[2])
-            self.vectorizer.build_index()
-            
-            chunks = self.vectorizer.search(query_, top_k)
-            for chunk in chunks:
-                if  chunk.distance<max_dist:
-                    skills.append(chunk.text)
-                    skill_titles.append(chunk.doc.title)
-                    # conn = sqlite3.connect(self.db_path)
-                    # cursor = conn.cursor()
-                    # Use direct string concatenation for the MATCH expression.
-                    # Ensure text is safely escaped to avoid SQL injection.
-                    #query = "SELECT content FROM skills_library WHERE id = ?"
-                    #cursor.execute(query, (chunk.chunk_id,))
-                    #res = cursor.fetchall()
-                    #skills.append(res[0])
-                    #cursor.close()
-                    #conn.close()
+        chunks = self.vectorizer.search(query_, top_k)
+        for chunk in chunks:
+            if  chunk.distance>min_dist:
+                skills.append(chunk.text)
+                skill_titles.append(chunk.doc.title)
            
        return skill_titles, skills

--- a/lollms/functions/prompting/image_gen_prompts.py
+++ b/lollms/functions/prompting/image_gen_prompts.py
@ -107,6 +107,7 @@ def get_prompts_list():
        "A bear under the snow with calm eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
        "A raccoon under the snow with mischievous eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
        "A horse under the snow with soulful eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
+        "Cartoonish depiction of King Nimrod on an exaggerated golden throne, surrounded by comically oversized palace guards, vibrant colors, exaggerated facial features, Babylonian-inspired architecture in background, whimsical art style, bold outlines, flat shading, exaggerated proportions, playful details, animated expression, 2D stylized artwork, Disney-esque character design, ornate patterns on clothing and decorations.",
        "A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk street environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
        "A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk nightclub environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
        "A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk alleyway environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
--- a/lollms/media.py
+++ b/lollms/media.py
@ -17,6 +17,7 @@ from functools import partial
 import subprocess
 from collections import deque
 from scipy.signal import butter, lfilter
+import pipmaster as pm

 import os
 import threading
@ -196,8 +197,10 @@ class RTCom:
        self.transcribed_lock = threading.Condition()

    def load_and_extract_features(self, file_path):
-        if not PackageManager.check_package_installed("librosa"):
-            PackageManager.install_package(librosa)
+
+        
+        if not pm.is_installed("librosa"):
+            pm.install("librosa")
        import librosa
        y, sr = librosa.load(file_path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
@ -242,6 +245,8 @@ class RTCom:
        else:
            print(f"No match found. (distance: {distance}) 😢🤡")
            return False
+        
+
    def start_recording(self):
        self.recording = True
        self.stop_flag = False
--- a/lollms/tasks.py
+++ b/lollms/tasks.py
@ -599,7 +599,8 @@ class TasksLibrary:
        prev_len = len(tk)
        while len(tk)>max_summary_size:
            chunk_size = int(self.lollms.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
+            tc = TextChunker(chunk_size, 0, None, self.lollms.model)
+            document_chunks = tc.get_text_chunks(text,Document("","","",0),True)
            text = self.summarize_chunks(
                                            document_chunks, 
                                            data_extraction_instruction, 
@ -660,7 +661,7 @@ class TasksLibrary:
                                    f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}",
                                    f"The summary should extract required information from the current chunk to increment the previous summary.",
                                    f"Answer directly with the cumulative summary with no extra comments.",
-                                    f"{start_header_id_template}summary{end_header_id_template}",
+                                    f"{start_header_id_template}cumulative summary{end_header_id_template}",
                                    f"{answer_start}"
                                    ]),
                                    max_generation_size=max_generation_size,
@ -672,8 +673,8 @@ class TasksLibrary:
                                    f"current chunk:",
                                    f"{chunk}",
                                    f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}",
-                                    f"Answer directly with the summary with no extra comments.",
-                                    f"{start_header_id_template}summary{end_header_id_template}",
+                                    f"Answer without any extra comments.",
+                                    f"{start_header_id_template}chunk summary{end_header_id_template}",
                                    f"{answer_start}"
                                    ]),
                                    max_generation_size=max_generation_size,