upgraded summary

This commit is contained in:
Saifeddine ALOUI 2024-07-09 23:05:23 +02:00
parent 826be699b4
commit 0e176402e3
9 changed files with 57 additions and 49 deletions

View File

@ -135,7 +135,7 @@ audio_silenceTimer: 5000
# Data vectorization
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
summarize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false

View File

@ -135,7 +135,7 @@ audio_silenceTimer: 5000
# Data vectorization
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
summarize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false

View File

@ -135,7 +135,7 @@ audio_silenceTimer: 5000
# Data vectorization
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
summarize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false

View File

@ -253,7 +253,7 @@ class LollmsApplication(LoLLMsCom):
text = message.content
message_content += f"Rank {rank} - {sender}: {text}\n"
return self.tasks_library.summerize_text(
return self.tasks_library.summarize_text(
message_content,
"\n".join([
"Extract useful information from this discussion."
@ -1053,7 +1053,7 @@ class LollmsApplication(LoLLMsCom):
for doc in docs:
document=v.get_document(document_path = doc["path"])
self.personality.step_start(f"Summeryzing document {doc['path']}")
summary = self.personality.summerize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
summary = self.personality.summarize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
self.personality.step_end(f"Summeryzing document {doc['path']}")
document_infos = f"{self.separator_template}".join([
self.system_custom_header('document contextual summary'),
@ -1071,7 +1071,7 @@ class LollmsApplication(LoLLMsCom):
if summary!="":
v.add_summaries(doc['path'],[{"context":query, "summary":summary}])
full_documentation += document_infos
documentation += self.personality.summerize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
documentation += self.personality.summarize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
else:
results = []
@ -1139,7 +1139,7 @@ class LollmsApplication(LoLLMsCom):
for doc in docs:
document=v.get_document(document_path = doc["path"])
self.personality.step_start(f"Summeryzing document {doc['path']}")
summary = self.personality.summerize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
summary = self.personality.summarize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
self.personality.step_end(f"Summeryzing document {doc['path']}")
document_infos = f"{self.separator_template}".join([
self.system_custom_header('document contextual summary'),
@ -1157,7 +1157,7 @@ class LollmsApplication(LoLLMsCom):
if summary!="":
v.add_summaries(doc['path'],[{"context":query, "summary":summary}])
full_documentation += document_infos
documentation += self.personality.summerize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
documentation += self.personality.summarize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
else:
try:
chunks:List[Chunk] = client.discussion.vectorizer.search(query, int(self.config.rag_n_chunks))

View File

@ -283,7 +283,7 @@ def write_story_section(
# Summarize the current content of the story
if include_summary_between_chapters:
story_summary = llm.summerize_text(story_content, callback=llm.sink)
story_summary = llm.summarize_text(story_content, callback=llm.sink)
# Generate the current section using the LLM's fast_gen function
prompt = "\n".join([

View File

@ -30,14 +30,14 @@ from bs4 import BeautifulSoup
from lollms.databases.discussions_database import Discussion
# Core function to search for PDFs on arXiv and download them to a specified directory
def summerize_discussion(summary_request:str,llm, discussion:Discussion) -> str:
def summarize_discussion(summary_request:str,llm, discussion:Discussion) -> str:
messages = discussion.get_messages()
text = ""
for message in messages:
text += message.content
summary = llm.summerize_text(
summary = llm.summarize_text(
text,
summary_request,
doc_name="discussion"
@ -45,11 +45,11 @@ def summerize_discussion(summary_request:str,llm, discussion:Discussion) -> str:
return summary
# Metadata function
def summerize_discussion_function(llm, discussion:Discussion):
def summarize_discussion_function(llm, discussion:Discussion):
return {
"function_name": "summerize_discussion", # The function name in string
"function": partial(summerize_discussion, llm=llm, discussion=discussion), # The function to be called with partial to preset client
"function_description": "Summerizes the discussion while keeping some key information as requested by the summary_request parameter", # Description of the function
"function_name": "summarize_discussion", # The function name in string
"function": partial(summarize_discussion, llm=llm, discussion=discussion), # The function to be called with partial to preset client
"function_description": "summarizes the discussion while keeping some key information as requested by the summary_request parameter", # Description of the function
"function_parameters": [ # The set of parameters
{"name": "summary_request", "type": "str", "description": "The desired information to recover while summerizing."},
]

View File

@ -1802,10 +1802,10 @@ class AIPersonality:
max_generation_size=max_generation_size, callback=self.sink)
return translated
def summerize_text(
def summarize_text(
self,
text,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -1821,7 +1821,7 @@ class AIPersonality:
self.step_start(f"Comprerssing {doc_name}...")
chunk_size = int(self.config.ctx_size*0.6)
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
text = self.summerize_chunks(
text = self.summarize_chunks(
document_chunks,
summary_instruction,
doc_name,
@ -1843,7 +1843,7 @@ class AIPersonality:
def smart_data_extraction(
self,
text,
data_extraction_instruction=f"Summerize the current chunk.",
data_extraction_instruction=f"summarize the current chunk.",
final_task_instruction="reformulate with better wording",
doc_name="chunk",
answer_start="",
@ -1858,7 +1858,7 @@ class AIPersonality:
while len(tk)>max_summary_size:
chunk_size = int(self.config.ctx_size*0.6)
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
text = self.summerize_chunks(
text = self.summarize_chunks(
document_chunks,
data_extraction_instruction,
doc_name,
@ -1875,7 +1875,7 @@ class AIPersonality:
if dtk_ln<=10: # it is not sumlmarizing
break
self.step_start(f"Rewriting ...")
text = self.summerize_chunks(
text = self.summarize_chunks(
[text],
final_task_instruction,
doc_name, answer_start,
@ -1887,10 +1887,10 @@ class AIPersonality:
return text
def summerize_chunks(
def summarize_chunks(
self,
chunks,
summary_instruction=f"Summerize the current chunk.",
summary_instruction=f"summarize the current chunk.",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -1950,7 +1950,7 @@ class AIPersonality:
def sequencial_chunks_summary(
self,
chunks,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -2530,10 +2530,10 @@ class APScript(StateMachine):
max_generation_size=max_generation_size, callback=self.sink)
return translated
def summerize_text(
def summarize_text(
self,
text,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -2549,7 +2549,7 @@ class APScript(StateMachine):
self.step_start(f"Comprerssing {doc_name}...")
chunk_size = int(self.personality.config.ctx_size*0.6)
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
text = self.summerize_chunks(
text = self.summarize_chunks(
document_chunks,
summary_instruction,
doc_name,
@ -2571,7 +2571,7 @@ class APScript(StateMachine):
def smart_data_extraction(
self,
text,
data_extraction_instruction="summerize",
data_extraction_instruction="summarize",
final_task_instruction="reformulate with better wording",
doc_name="chunk",
answer_start="",
@ -2586,7 +2586,7 @@ class APScript(StateMachine):
while len(tk)>max_summary_size:
chunk_size = int(self.personality.config.ctx_size*0.6)
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
text = self.summerize_chunks(
text = self.summarize_chunks(
document_chunks,
data_extraction_instruction,
doc_name,
@ -2603,7 +2603,7 @@ class APScript(StateMachine):
if dtk_ln<=10: # it is not sumlmarizing
break
self.step_start(f"Rewriting ...")
text = self.summerize_chunks(
text = self.summarize_chunks(
[text],
final_task_instruction,
doc_name, answer_start,
@ -2615,10 +2615,10 @@ class APScript(StateMachine):
return text
def summerize_chunks(
def summarize_chunks(
self,
chunks,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -2678,7 +2678,7 @@ class APScript(StateMachine):
def sequencial_chunks_summary(
self,
chunks,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -3746,8 +3746,8 @@ class APScript(StateMachine):
def mix_it_up(self, prompt: str, models, master_model, max_generation_size: int= None, placeholders: dict = {}, sacrifice: list = ["previous_discussion"], debug: bool = False, callback=None, show_progress=False) -> dict:
"""
Fast generates text using multiple LLMs with round tracking. Each LLM sees the initial prompt plus the concatenated outputs of the previous round.
The master model then completes the job by creating a unique answer inspired by the last round outputs.
Fast generates text using multiple LLMs with detailed round tracking. Each LLM sees the initial prompt plus the formatted outputs of the previous rounds.
The master model then completes the job by creating a unique answer inspired by the last round outputs.
Parameters:
- prompt (str): The input prompt for text generation.
@ -3762,7 +3762,7 @@ class APScript(StateMachine):
- dict: A dictionary with the round information and the final generated text, keys: 'rounds' and 'final_output'.
"""
context = prompt
previous_output = ''
previous_outputs = []
# Dictionary to store rounds information
rounds_info = {
@ -3773,18 +3773,25 @@ class APScript(StateMachine):
for idx, model_id in enumerate(models):
binding_name, model_name = model_id.split("::")
self.select_model(binding_name, model_name)
round_prompt = context + "\n" + previous_output
# Concatenate previous outputs with formatting
formatted_previous_outputs = "\n".join([f"Model {m}: {o}" for m, o in previous_outputs])
round_prompt = context + "\n" + formatted_previous_outputs
output = self.fast_gen(prompt=round_prompt, max_generation_size=max_generation_size, placeholders=placeholders, sacrifice=sacrifice, debug=debug, callback=callback, show_progress=show_progress)
rounds_info['rounds'].append({
'model': model_id,
'round_prompt': round_prompt,
'output': output
})
previous_output = output # Update for the next round
previous_outputs.append((model_id, output)) # Update for the next round
# Final round with the master model
self.select_model(*master_model.split("::"))
final_prompt = context + "\n" + previous_output
# Last round output for the master model
formatted_previous_outputs = "\n".join([f"Model {m}: {o}" for m, o in previous_outputs])
final_prompt = context + "\n" + formatted_previous_outputs
final_output = self.fast_gen(prompt=final_prompt, max_generation_size=max_generation_size, placeholders=placeholders, sacrifice=sacrifice, debug=debug, callback=callback, show_progress=show_progress)
rounds_info['final_output'] = final_output
@ -3793,6 +3800,7 @@ class APScript(StateMachine):
def generate_with_function_calls(self, context_details: dict, functions: List[Dict[str, Any]], max_answer_length: Optional[int] = None, callback = None) -> List[Dict[str, Any]]:
"""
Performs text generation with function calls.

View File

@ -547,10 +547,10 @@ class TasksLibrary:
translated = self.fast_gen(message_translation_text, temperature=0.1, callback=self.sink)
return translated
def summerize_text(
def summarize_text(
self,
text,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -567,7 +567,7 @@ class TasksLibrary:
self.step_start(f"Comprerssing {doc_name}... [depth {depth+1}]")
chunk_size = int(self.lollms.config.ctx_size*0.6)
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
text = self.summerize_chunks(
text = self.summarize_chunks(
document_chunks,
summary_instruction,
doc_name,
@ -590,7 +590,7 @@ class TasksLibrary:
def smart_data_extraction(
self,
text,
data_extraction_instruction="summerize",
data_extraction_instruction="summarize",
final_task_instruction="reformulate with better wording",
doc_name="chunk",
answer_start="",
@ -605,7 +605,7 @@ class TasksLibrary:
while len(tk)>max_summary_size:
chunk_size = int(self.lollms.config.ctx_size*0.6)
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
text = self.summerize_chunks(
text = self.summarize_chunks(
document_chunks,
data_extraction_instruction,
doc_name,
@ -622,7 +622,7 @@ class TasksLibrary:
if dtk_ln<=10: # it is not sumlmarizing
break
self.step_start(f"Rewriting ...")
text = self.summerize_chunks(
text = self.summarize_chunks(
[text],
final_task_instruction,
doc_name, answer_start,
@ -634,10 +634,10 @@ class TasksLibrary:
return text
def summerize_chunks(
def summarize_chunks(
self,
chunks,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,
@ -711,7 +711,7 @@ class TasksLibrary:
def sequencial_chunks_summary(
self,
chunks,
summary_instruction="summerize",
summary_instruction="summarize",
doc_name="chunk",
answer_start="",
max_generation_size=3000,

View File

@ -135,7 +135,7 @@ audio_silenceTimer: 5000
# Data vectorization
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
summarize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false