diff --git a/configs/config.yaml b/configs/config.yaml index 2001ad4..86f1caf 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 141 +version: 142 # video viewing and news recovering last_viewed_video: null @@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk rag_overlap: 0 # number of tokens of overlap +rag_min_correspondance: 0 # minimum correspondance between the query and the content rag_n_chunks: 4 #Number of chunks to recover from the database rag_clean_chunks: true #Removed all uinecessary spaces and line returns diff --git a/lollms/app.py b/lollms/app.py index e8ced28..ccefc6c 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -31,7 +31,7 @@ import yaml import time from lollms.utilities import PackageManager import socket -import shutil +import json class LollmsApplication(LoLLMsCom): def __init__( self, @@ -300,15 +300,16 @@ class LollmsApplication(LoLLMsCom): message_content += f"Rank {rank} - {sender}: {text}\n" return self.tasks_library.summarize_text( - message_content, + message_content, "\n".join([ - "Act as Skills library maker.", - "The objective is to find out important information from the discussion and store them as text that can be used in the future to remember those information.", + "Find out important information from the discussion and report them.", "Format the output as sections if applicable:", "Global context: Explain in a sentense or two the subject of the discussion", "Interesting things (if applicable): If you find interesting information or something that was discovered or built in this discussion, list it here with enough details to be reproducible just by reading this text.", "Code snippet (if applicable): If there are important code snippets, write them here in a markdown code tag.", - "Make the output easy to understand." + "Make the output easy to understand.", + "The objective is not to talk about the discussion but to store the important information for future usage. Do not report useless information.", + "Do not describe the discussion and focuse more on reporting the most important information from the discussion." ]), doc_name="discussion", callback=callback) @@ -1251,14 +1252,26 @@ class LollmsApplication(LoLLMsCom): if discussion is None: discussion = self.recover_discussion(client_id) self.personality.step_start("Building query") - query = self.personality.fast_gen(f"{self.start_header_id_template}{system_message_template}{self.end_header_id_template}Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely. Return only the reformulated request without any additional explanations, commentary, or output.{self.separator_template}{self.start_header_id_template}discussion:\n{discussion[-2048:]}{self.separator_template}{self.start_header_id_template}search query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink) + query = self.personality.generate_code(f"""{self.system_full_header} +Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely. +The reformulation must be placed inside a json markdown tag like this: +```json +{{ + "request": the reformulated request +}} +``` +{self.system_custom_header("discussion:")} +{discussion[-2048:]} +{self.system_custom_header("search query:")}""", callback=self.personality.sink) + query_code = json.loads(query) + query = query_code["request"] self.personality.step_end("Building query") self.personality.step(f"query: {query}") # skills = self.skills_library.query_entry(query) self.personality.step_start("Adding skills") if self.config.debug: ASCIIColors.info(f"Query : {query}") - skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, max_dist=1000)#query_entry_fts(query) + skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, min_dist=self.config.rag_min_correspondance)#query_entry_fts(query) knowledge_infos={"titles":skill_titles,"contents":skills} if len(skills)>0: if knowledge=="": diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml index 2001ad4..86f1caf 100644 --- a/lollms/configs/config.yaml +++ b/lollms/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 141 +version: 142 # video viewing and news recovering last_viewed_video: null @@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk rag_overlap: 0 # number of tokens of overlap +rag_min_correspondance: 0 # minimum correspondance between the query and the content rag_n_chunks: 4 #Number of chunks to recover from the database rag_clean_chunks: true #Removed all uinecessary spaces and line returns diff --git a/lollms/databases/skills_database.py b/lollms/databases/skills_database.py index 3406570..ffcc465 100644 --- a/lollms/databases/skills_database.py +++ b/lollms/databases/skills_database.py @@ -124,38 +124,16 @@ class SkillsLibrary: conn.close() return res - def query_vector_db(self, query_, top_k=3, max_dist=1000): - conn = sqlite3.connect(self.db_path) - cursor = conn.cursor() + def query_vector_db(self, query_, top_k=3, min_dist=0): # Use direct string concatenation for the MATCH expression. # Ensure text is safely escaped to avoid SQL injection. - query = "SELECT id, title, content FROM skills_library" - cursor.execute(query) - res = cursor.fetchall() - cursor.close() - conn.close() skills = [] skill_titles = [] - if len(res)>0: - for entry in res: - self.vectorizer.add_document(entry[1],"Title:"+entry[1]+"\n"+entry[2]) - self.vectorizer.build_index() - - chunks = self.vectorizer.search(query_, top_k) - for chunk in chunks: - if chunk.distancemin_dist: + skills.append(chunk.text) + skill_titles.append(chunk.doc.title) return skill_titles, skills diff --git a/lollms/functions/prompting/image_gen_prompts.py b/lollms/functions/prompting/image_gen_prompts.py index 34256e3..194153f 100644 --- a/lollms/functions/prompting/image_gen_prompts.py +++ b/lollms/functions/prompting/image_gen_prompts.py @@ -107,6 +107,7 @@ def get_prompts_list(): "A bear under the snow with calm eyes, covered by snow, cinematic style, medium shot, professional photo, animal.", "A raccoon under the snow with mischievous eyes, covered by snow, cinematic style, medium shot, professional photo, animal.", "A horse under the snow with soulful eyes, covered by snow, cinematic style, medium shot, professional photo, animal.", + "Cartoonish depiction of King Nimrod on an exaggerated golden throne, surrounded by comically oversized palace guards, vibrant colors, exaggerated facial features, Babylonian-inspired architecture in background, whimsical art style, bold outlines, flat shading, exaggerated proportions, playful details, animated expression, 2D stylized artwork, Disney-esque character design, ornate patterns on clothing and decorations.", "A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk street environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.", "A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk nightclub environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.", "A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk alleyway environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.", diff --git a/lollms/media.py b/lollms/media.py index 38fc54d..9b6e3ec 100644 --- a/lollms/media.py +++ b/lollms/media.py @@ -17,6 +17,7 @@ from functools import partial import subprocess from collections import deque from scipy.signal import butter, lfilter +import pipmaster as pm import os import threading @@ -196,8 +197,10 @@ class RTCom: self.transcribed_lock = threading.Condition() def load_and_extract_features(self, file_path): - if not PackageManager.check_package_installed("librosa"): - PackageManager.install_package(librosa) + + + if not pm.is_installed("librosa"): + pm.install("librosa") import librosa y, sr = librosa.load(file_path, sr=None) mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) @@ -242,6 +245,8 @@ class RTCom: else: print(f"No match found. (distance: {distance}) 😢🤡") return False + + def start_recording(self): self.recording = True self.stop_flag = False diff --git a/lollms/tasks.py b/lollms/tasks.py index 3d8bc80..16275d2 100644 --- a/lollms/tasks.py +++ b/lollms/tasks.py @@ -599,7 +599,8 @@ class TasksLibrary: prev_len = len(tk) while len(tk)>max_summary_size: chunk_size = int(self.lollms.config.ctx_size*0.6) - document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True) + tc = TextChunker(chunk_size, 0, None, self.lollms.model) + document_chunks = tc.get_text_chunks(text,Document("","","",0),True) text = self.summarize_chunks( document_chunks, data_extraction_instruction, @@ -660,7 +661,7 @@ class TasksLibrary: f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}", f"The summary should extract required information from the current chunk to increment the previous summary.", f"Answer directly with the cumulative summary with no extra comments.", - f"{start_header_id_template}summary{end_header_id_template}", + f"{start_header_id_template}cumulative summary{end_header_id_template}", f"{answer_start}" ]), max_generation_size=max_generation_size, @@ -672,8 +673,8 @@ class TasksLibrary: f"current chunk:", f"{chunk}", f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}", - f"Answer directly with the summary with no extra comments.", - f"{start_header_id_template}summary{end_header_id_template}", + f"Answer without any extra comments.", + f"{start_header_id_template}chunk summary{end_header_id_template}", f"{answer_start}" ]), max_generation_size=max_generation_size,