mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
upgraded
This commit is contained in:
parent
47f2d30895
commit
1e963ab4d4
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 141
|
||||
version: 142
|
||||
|
||||
# video viewing and news recovering
|
||||
last_viewed_video: null
|
||||
@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode
|
||||
rag_vectorizer_parameters: null # Parameters of the model in json format
|
||||
rag_chunk_size: 512 # number of tokens per chunk
|
||||
rag_overlap: 0 # number of tokens of overlap
|
||||
rag_min_correspondance: 0 # minimum correspondance between the query and the content
|
||||
|
||||
rag_n_chunks: 4 #Number of chunks to recover from the database
|
||||
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
|
||||
|
@ -31,7 +31,7 @@ import yaml
|
||||
import time
|
||||
from lollms.utilities import PackageManager
|
||||
import socket
|
||||
import shutil
|
||||
import json
|
||||
class LollmsApplication(LoLLMsCom):
|
||||
def __init__(
|
||||
self,
|
||||
@ -300,15 +300,16 @@ class LollmsApplication(LoLLMsCom):
|
||||
message_content += f"Rank {rank} - {sender}: {text}\n"
|
||||
|
||||
return self.tasks_library.summarize_text(
|
||||
message_content,
|
||||
message_content,
|
||||
"\n".join([
|
||||
"Act as Skills library maker.",
|
||||
"The objective is to find out important information from the discussion and store them as text that can be used in the future to remember those information.",
|
||||
"Find out important information from the discussion and report them.",
|
||||
"Format the output as sections if applicable:",
|
||||
"Global context: Explain in a sentense or two the subject of the discussion",
|
||||
"Interesting things (if applicable): If you find interesting information or something that was discovered or built in this discussion, list it here with enough details to be reproducible just by reading this text.",
|
||||
"Code snippet (if applicable): If there are important code snippets, write them here in a markdown code tag.",
|
||||
"Make the output easy to understand."
|
||||
"Make the output easy to understand.",
|
||||
"The objective is not to talk about the discussion but to store the important information for future usage. Do not report useless information.",
|
||||
"Do not describe the discussion and focuse more on reporting the most important information from the discussion."
|
||||
]),
|
||||
doc_name="discussion",
|
||||
callback=callback)
|
||||
@ -1251,14 +1252,26 @@ class LollmsApplication(LoLLMsCom):
|
||||
if discussion is None:
|
||||
discussion = self.recover_discussion(client_id)
|
||||
self.personality.step_start("Building query")
|
||||
query = self.personality.fast_gen(f"{self.start_header_id_template}{system_message_template}{self.end_header_id_template}Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely. Return only the reformulated request without any additional explanations, commentary, or output.{self.separator_template}{self.start_header_id_template}discussion:\n{discussion[-2048:]}{self.separator_template}{self.start_header_id_template}search query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink)
|
||||
query = self.personality.generate_code(f"""{self.system_full_header}
|
||||
Your task is to carefully read the provided discussion and reformulate {self.config.user_name}'s request concisely.
|
||||
The reformulation must be placed inside a json markdown tag like this:
|
||||
```json
|
||||
{{
|
||||
"request": the reformulated request
|
||||
}}
|
||||
```
|
||||
{self.system_custom_header("discussion:")}
|
||||
{discussion[-2048:]}
|
||||
{self.system_custom_header("search query:")}""", callback=self.personality.sink)
|
||||
query_code = json.loads(query)
|
||||
query = query_code["request"]
|
||||
self.personality.step_end("Building query")
|
||||
self.personality.step(f"query: {query}")
|
||||
# skills = self.skills_library.query_entry(query)
|
||||
self.personality.step_start("Adding skills")
|
||||
if self.config.debug:
|
||||
ASCIIColors.info(f"Query : {query}")
|
||||
skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, max_dist=1000)#query_entry_fts(query)
|
||||
skill_titles, skills = self.skills_library.query_vector_db(query, top_k=3, min_dist=self.config.rag_min_correspondance)#query_entry_fts(query)
|
||||
knowledge_infos={"titles":skill_titles,"contents":skills}
|
||||
if len(skills)>0:
|
||||
if knowledge=="":
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 141
|
||||
version: 142
|
||||
|
||||
# video viewing and news recovering
|
||||
last_viewed_video: null
|
||||
@ -285,6 +285,7 @@ rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The mode
|
||||
rag_vectorizer_parameters: null # Parameters of the model in json format
|
||||
rag_chunk_size: 512 # number of tokens per chunk
|
||||
rag_overlap: 0 # number of tokens of overlap
|
||||
rag_min_correspondance: 0 # minimum correspondance between the query and the content
|
||||
|
||||
rag_n_chunks: 4 #Number of chunks to recover from the database
|
||||
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
|
||||
|
@ -124,38 +124,16 @@ class SkillsLibrary:
|
||||
conn.close()
|
||||
return res
|
||||
|
||||
def query_vector_db(self, query_, top_k=3, max_dist=1000):
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
cursor = conn.cursor()
|
||||
def query_vector_db(self, query_, top_k=3, min_dist=0):
|
||||
# Use direct string concatenation for the MATCH expression.
|
||||
# Ensure text is safely escaped to avoid SQL injection.
|
||||
query = "SELECT id, title, content FROM skills_library"
|
||||
cursor.execute(query)
|
||||
res = cursor.fetchall()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
skills = []
|
||||
skill_titles = []
|
||||
if len(res)>0:
|
||||
for entry in res:
|
||||
self.vectorizer.add_document(entry[1],"Title:"+entry[1]+"\n"+entry[2])
|
||||
self.vectorizer.build_index()
|
||||
|
||||
chunks = self.vectorizer.search(query_, top_k)
|
||||
for chunk in chunks:
|
||||
if chunk.distance<max_dist:
|
||||
skills.append(chunk.text)
|
||||
skill_titles.append(chunk.doc.title)
|
||||
# conn = sqlite3.connect(self.db_path)
|
||||
# cursor = conn.cursor()
|
||||
# Use direct string concatenation for the MATCH expression.
|
||||
# Ensure text is safely escaped to avoid SQL injection.
|
||||
#query = "SELECT content FROM skills_library WHERE id = ?"
|
||||
#cursor.execute(query, (chunk.chunk_id,))
|
||||
#res = cursor.fetchall()
|
||||
#skills.append(res[0])
|
||||
#cursor.close()
|
||||
#conn.close()
|
||||
chunks = self.vectorizer.search(query_, top_k)
|
||||
for chunk in chunks:
|
||||
if chunk.distance>min_dist:
|
||||
skills.append(chunk.text)
|
||||
skill_titles.append(chunk.doc.title)
|
||||
|
||||
return skill_titles, skills
|
||||
|
||||
|
@ -107,6 +107,7 @@ def get_prompts_list():
|
||||
"A bear under the snow with calm eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
|
||||
"A raccoon under the snow with mischievous eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
|
||||
"A horse under the snow with soulful eyes, covered by snow, cinematic style, medium shot, professional photo, animal.",
|
||||
"Cartoonish depiction of King Nimrod on an exaggerated golden throne, surrounded by comically oversized palace guards, vibrant colors, exaggerated facial features, Babylonian-inspired architecture in background, whimsical art style, bold outlines, flat shading, exaggerated proportions, playful details, animated expression, 2D stylized artwork, Disney-esque character design, ornate patterns on clothing and decorations.",
|
||||
"A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk street environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
|
||||
"A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk nightclub environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
|
||||
"A glamorous digital magazine photoshoot, a fashionable model wearing avant-garde clothing, set in a futuristic cyberpunk alleyway environment, with a neon-lit city background, intricate high fashion details, backlit by vibrant city glow, Vogue fashion photography.",
|
||||
|
@ -17,6 +17,7 @@ from functools import partial
|
||||
import subprocess
|
||||
from collections import deque
|
||||
from scipy.signal import butter, lfilter
|
||||
import pipmaster as pm
|
||||
|
||||
import os
|
||||
import threading
|
||||
@ -196,8 +197,10 @@ class RTCom:
|
||||
self.transcribed_lock = threading.Condition()
|
||||
|
||||
def load_and_extract_features(self, file_path):
|
||||
if not PackageManager.check_package_installed("librosa"):
|
||||
PackageManager.install_package(librosa)
|
||||
|
||||
|
||||
if not pm.is_installed("librosa"):
|
||||
pm.install("librosa")
|
||||
import librosa
|
||||
y, sr = librosa.load(file_path, sr=None)
|
||||
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
||||
@ -242,6 +245,8 @@ class RTCom:
|
||||
else:
|
||||
print(f"No match found. (distance: {distance}) 😢🤡")
|
||||
return False
|
||||
|
||||
|
||||
def start_recording(self):
|
||||
self.recording = True
|
||||
self.stop_flag = False
|
||||
|
@ -599,7 +599,8 @@ class TasksLibrary:
|
||||
prev_len = len(tk)
|
||||
while len(tk)>max_summary_size:
|
||||
chunk_size = int(self.lollms.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
|
||||
tc = TextChunker(chunk_size, 0, None, self.lollms.model)
|
||||
document_chunks = tc.get_text_chunks(text,Document("","","",0),True)
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
data_extraction_instruction,
|
||||
@ -660,7 +661,7 @@ class TasksLibrary:
|
||||
f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}",
|
||||
f"The summary should extract required information from the current chunk to increment the previous summary.",
|
||||
f"Answer directly with the cumulative summary with no extra comments.",
|
||||
f"{start_header_id_template}summary{end_header_id_template}",
|
||||
f"{start_header_id_template}cumulative summary{end_header_id_template}",
|
||||
f"{answer_start}"
|
||||
]),
|
||||
max_generation_size=max_generation_size,
|
||||
@ -672,8 +673,8 @@ class TasksLibrary:
|
||||
f"current chunk:",
|
||||
f"{chunk}",
|
||||
f"{start_header_id_template}{system_message_template}{end_header_id_template}{summary_instruction}",
|
||||
f"Answer directly with the summary with no extra comments.",
|
||||
f"{start_header_id_template}summary{end_header_id_template}",
|
||||
f"Answer without any extra comments.",
|
||||
f"{start_header_id_template}chunk summary{end_header_id_template}",
|
||||
f"{answer_start}"
|
||||
]),
|
||||
max_generation_size=max_generation_size,
|
||||
|
Loading…
Reference in New Issue
Block a user