This commit is contained in:
Saifeddine ALOUI 2024-12-02 00:34:57 +01:00
parent 47f2d30895
commit 1e963ab4d4
7 changed files with 43 additions and 43 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 141
version: 142
# video viewing and news recovering
last_viewed_video: null
@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_min_correspondance: 0 # minimum correspondance between the query and the content
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns

View File

@ -31,7 +31,7 @@ import yaml
import time
from lollms.utilities import PackageManager
import socket
import shutil
import json
class LollmsApplication(LoLLMsCom):
def __init__(
self,
@ -300,15 +300,16 @@ class LollmsApplication(LoLLMsCom):
message_content += f"Rank {rank} - {sender}: {text}\n"
return self.tasks_library.summarize_text(
message_content,
message_content,
"\n".join([
"Act as Skills library maker.",
"The objective is to find out important information from the discussion and store them as text that can be used in the future to remember those information.",
"Find out important information from the discussion and report them.",
"Format the output as sections if applicable:",
"Global context: Explain in a sentense or two the subject of the discussion",
"Interesting things (if applicable): If you find interesting information or something that was discovered or built in this discussion, list it here with enough details to be reproducible just by reading this text.",
"Code snippet (if applicable): If there are important code snippets, write them here in a markdown code tag.",
"Make the output easy to understand."
"Make the output easy to understand.",
"The objective is not to talk about the discussion but to store the important information for future usage. Do not report useless information.",
"Do not describe the discussion and focuse more on reporting the most important information from the discussion."
]),
doc_name="discussion",
callback=callback)
@ -1251,14 +1252,26 @@ class LollmsApplication(LoLLMsCom):
if discussion is None:
discussion = self.recover_discussion(client_id)
self.personality.step_start("Building query")
query = self.personality.fast_gen(f"{self.start_header_id_template}{system_message_template}{self.end_header_id_template}Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely. Return only the reformulated request without any additional explanations, commentary, or output.{self.separator_template}{self.start_header_id_template}discussion:\n{discussion[-2048:]}{self.separator_template}{self.start_header_id_template}search query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink)
query = self.personality.generate_code(f"""{self.system_full_header}
Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely.
The reformulation must be placed inside a json markdown tag like this:
```json
{{
"request": the reformulated request
}}
```
{self.system_custom_header("discussion:")}
{discussion[-2048:]}
{self.system_custom_header("search query:")}""", callback=self.personality.sink)
query_code = json.loads(query)
query = query_code["request"]
self.personality.step_end("Building query")
self.personality.step(f"query: {query}")
# skills = self.skills_library.query_entry(query)
self.personality.step_start("Adding skills")
if self.config.debug:
ASCIIColors.info(f"Query : {query}")
skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, max_dist=1000)#query_entry_fts(query)
skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, min_dist=self.config.rag_min_correspondance)#query_entry_fts(query)
knowledge_infos={"titles":skill_titles,"contents":skills}
if len(skills)>0:
if knowledge=="":

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 141
version: 142
# video viewing and news recovering
last_viewed_video: null
@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_min_correspondance: 0 # minimum correspondance between the query and the content
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns

View File

@ -124,38 +124,16 @@ class SkillsLibrary:
conn.close()
return res
def query_vector_db(self, query_, top_k=3, max_dist=1000):
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
def query_vector_db(self, query_, top_k=3, min_dist=0):
# Use direct string concatenation for the MATCH expression.
# Ensure text is safely escaped to avoid SQL injection.
query = "SELECT id, title, content FROM skills_library"
cursor.execute(query)
res = cursor.fetchall()
cursor.close()
conn.close()
skills = []
skill_titles = []
if len(res)>0:
for entry in res:
self.vectorizer.add_document(entry[1],"Title:"+entry[1]+"\n"+entry[2])
self.vectorizer.build_index()
chunks = self.vectorizer.search(query_, top_k)
for chunk in chunks:
if chunk.distance<max_dist:
skills.append(chunk.text)
skill_titles.append(chunk.doc.title)
# conn = sqlite3.connect(self.db_path)
# cursor = conn.cursor()
# Use direct string concatenation for the MATCH expression.
# Ensure text is safely escaped to avoid SQL injection.
#query = "SELECT content FROM skills_library WHERE id = ?"
#cursor.execute(query, (chunk.chunk_id,))
#res = cursor.fetchall()
#skills.append(res[0])
#cursor.close()
#conn.close()
chunks = self.vectorizer.search(query_, top_k)
for chunk in chunks:
if chunk.distance>min_dist:
skills.append(chunk.text)
skill_titles.append(chunk.doc.title)
return skill_titles, skills

View File

@ -107,6 +107,7 @@ def get_prompts_list():
"A bear under the snow with calm eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
"A raccoon under the snow with mischievous eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
"A horse under the snow with soulful eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
"Cartoonish depiction of King Nimrod on an exaggerated golden throne, surrounded by comically oversized palace guards, vibrant colors, exaggerated facial features, Babylonian-inspired architecture in background, whimsical art style, bold outlines, flat shading, exaggerated proportions, playful details, animated expression, 2D stylized artwork, Disney-esque character design, ornate patterns on clothing and decorations.",
"A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk street environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
"A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk nightclub environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
"A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk alleyway environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",

View File

@ -17,6 +17,7 @@ from functools import partial
import subprocess
from collections import deque
from scipy.signal import butter, lfilter
import pipmaster as pm
import os
import threading
@ -196,8 +197,10 @@ class RTCom:
self.transcribed_lock = threading.Condition()
def load_and_extract_features(self, file_path):
if not PackageManager.check_package_installed("librosa"):
PackageManager.install_package(librosa)
if not pm.is_installed("librosa"):
pm.install("librosa")
import librosa
y, sr = librosa.load(file_path, sr=None)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
@ -242,6 +245,8 @@ class RTCom:
else:
print(f"No match found. (distance: {distance}) 😢🤡")
return False
def start_recording(self):
self.recording = True
self.stop_flag = False

View File

@ -599,7 +599,8 @@ class TasksLibrary:
prev_len = len(tk)
while len(tk)>max_summary_size:
chunk_size = int(self.lollms.config.ctx_size*0.6)
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
tc = TextChunker(chunk_size, 0, None, self.lollms.model)
document_chunks = tc.get_text_chunks(text,Document("","","",0),True)
text = self.summarize_chunks(
document_chunks,
data_extraction_instruction,
@ -660,7 +661,7 @@ class TasksLibrary:
f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}",
f"The summary should extract required information from the current chunk to increment the previous summary.",
f"Answer directly with the cumulative summary with no extra comments.",
f"{start_header_id_template}summary{end_header_id_template}",
f"{start_header_id_template}cumulative summary{end_header_id_template}",
f"{answer_start}"
]),
max_generation_size=max_generation_size,
@ -672,8 +673,8 @@ class TasksLibrary:
f"current chunk:",
f"{chunk}",
f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}",
f"Answer directly with the summary with no extra comments.",
f"{start_header_id_template}summary{end_header_id_template}",
f"Answer without any extra comments.",
f"{start_header_id_template}chunk summary{end_header_id_template}",
f"{answer_start}"
]),
max_generation_size=max_generation_size,