mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
upgraded summary
This commit is contained in:
parent
826be699b4
commit
0e176402e3
@ -135,7 +135,7 @@ audio_silenceTimer: 5000
|
||||
# Data vectorization
|
||||
activate_skills_lib: false # Activate vectorizing previous conversations
|
||||
skills_lib_database_name: "default" # Default skills database
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
summarize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
|
@ -135,7 +135,7 @@ audio_silenceTimer: 5000
|
||||
# Data vectorization
|
||||
activate_skills_lib: false # Activate vectorizing previous conversations
|
||||
skills_lib_database_name: "default" # Default skills database
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
summarize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
|
@ -135,7 +135,7 @@ audio_silenceTimer: 5000
|
||||
# Data vectorization
|
||||
activate_skills_lib: false # Activate vectorizing previous conversations
|
||||
skills_lib_database_name: "default" # Default skills database
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
summarize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
|
@ -253,7 +253,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
text = message.content
|
||||
message_content += f"Rank {rank} - {sender}: {text}\n"
|
||||
|
||||
return self.tasks_library.summerize_text(
|
||||
return self.tasks_library.summarize_text(
|
||||
message_content,
|
||||
"\n".join([
|
||||
"Extract useful information from this discussion."
|
||||
@ -1053,7 +1053,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
for doc in docs:
|
||||
document=v.get_document(document_path = doc["path"])
|
||||
self.personality.step_start(f"Summeryzing document {doc['path']}")
|
||||
summary = self.personality.summerize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
summary = self.personality.summarize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
self.personality.step_end(f"Summeryzing document {doc['path']}")
|
||||
document_infos = f"{self.separator_template}".join([
|
||||
self.system_custom_header('document contextual summary'),
|
||||
@ -1071,7 +1071,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
if summary!="":
|
||||
v.add_summaries(doc['path'],[{"context":query, "summary":summary}])
|
||||
full_documentation += document_infos
|
||||
documentation += self.personality.summerize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
documentation += self.personality.summarize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
|
||||
else:
|
||||
results = []
|
||||
@ -1139,7 +1139,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
for doc in docs:
|
||||
document=v.get_document(document_path = doc["path"])
|
||||
self.personality.step_start(f"Summeryzing document {doc['path']}")
|
||||
summary = self.personality.summerize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
summary = self.personality.summarize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
self.personality.step_end(f"Summeryzing document {doc['path']}")
|
||||
document_infos = f"{self.separator_template}".join([
|
||||
self.system_custom_header('document contextual summary'),
|
||||
@ -1157,7 +1157,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
if summary!="":
|
||||
v.add_summaries(doc['path'],[{"context":query, "summary":summary}])
|
||||
full_documentation += document_infos
|
||||
documentation += self.personality.summerize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
documentation += self.personality.summarize_text(full_documentation, f"Extract information from the current text chunk and previous text chunks to answer the query. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
else:
|
||||
try:
|
||||
chunks:List[Chunk] = client.discussion.vectorizer.search(query, int(self.config.rag_n_chunks))
|
||||
|
@ -283,7 +283,7 @@ def write_story_section(
|
||||
|
||||
# Summarize the current content of the story
|
||||
if include_summary_between_chapters:
|
||||
story_summary = llm.summerize_text(story_content, callback=llm.sink)
|
||||
story_summary = llm.summarize_text(story_content, callback=llm.sink)
|
||||
|
||||
# Generate the current section using the LLM's fast_gen function
|
||||
prompt = "\n".join([
|
||||
|
@ -30,14 +30,14 @@ from bs4 import BeautifulSoup
|
||||
from lollms.databases.discussions_database import Discussion
|
||||
|
||||
# Core function to search for PDFs on arXiv and download them to a specified directory
|
||||
def summerize_discussion(summary_request:str,llm, discussion:Discussion) -> str:
|
||||
def summarize_discussion(summary_request:str,llm, discussion:Discussion) -> str:
|
||||
messages = discussion.get_messages()
|
||||
text = ""
|
||||
for message in messages:
|
||||
text += message.content
|
||||
|
||||
|
||||
summary = llm.summerize_text(
|
||||
summary = llm.summarize_text(
|
||||
text,
|
||||
summary_request,
|
||||
doc_name="discussion"
|
||||
@ -45,11 +45,11 @@ def summerize_discussion(summary_request:str,llm, discussion:Discussion) -> str:
|
||||
return summary
|
||||
|
||||
# Metadata function
|
||||
def summerize_discussion_function(llm, discussion:Discussion):
|
||||
def summarize_discussion_function(llm, discussion:Discussion):
|
||||
return {
|
||||
"function_name": "summerize_discussion", # The function name in string
|
||||
"function": partial(summerize_discussion, llm=llm, discussion=discussion), # The function to be called with partial to preset client
|
||||
"function_description": "Summerizes the discussion while keeping some key information as requested by the summary_request parameter", # Description of the function
|
||||
"function_name": "summarize_discussion", # The function name in string
|
||||
"function": partial(summarize_discussion, llm=llm, discussion=discussion), # The function to be called with partial to preset client
|
||||
"function_description": "summarizes the discussion while keeping some key information as requested by the summary_request parameter", # Description of the function
|
||||
"function_parameters": [ # The set of parameters
|
||||
{"name": "summary_request", "type": "str", "description": "The desired information to recover while summerizing."},
|
||||
]
|
||||
|
@ -1802,10 +1802,10 @@ class AIPersonality:
|
||||
max_generation_size=max_generation_size, callback=self.sink)
|
||||
return translated
|
||||
|
||||
def summerize_text(
|
||||
def summarize_text(
|
||||
self,
|
||||
text,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -1821,7 +1821,7 @@ class AIPersonality:
|
||||
self.step_start(f"Comprerssing {doc_name}...")
|
||||
chunk_size = int(self.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
summary_instruction,
|
||||
doc_name,
|
||||
@ -1843,7 +1843,7 @@ class AIPersonality:
|
||||
def smart_data_extraction(
|
||||
self,
|
||||
text,
|
||||
data_extraction_instruction=f"Summerize the current chunk.",
|
||||
data_extraction_instruction=f"summarize the current chunk.",
|
||||
final_task_instruction="reformulate with better wording",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
@ -1858,7 +1858,7 @@ class AIPersonality:
|
||||
while len(tk)>max_summary_size:
|
||||
chunk_size = int(self.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
data_extraction_instruction,
|
||||
doc_name,
|
||||
@ -1875,7 +1875,7 @@ class AIPersonality:
|
||||
if dtk_ln<=10: # it is not sumlmarizing
|
||||
break
|
||||
self.step_start(f"Rewriting ...")
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
[text],
|
||||
final_task_instruction,
|
||||
doc_name, answer_start,
|
||||
@ -1887,10 +1887,10 @@ class AIPersonality:
|
||||
|
||||
return text
|
||||
|
||||
def summerize_chunks(
|
||||
def summarize_chunks(
|
||||
self,
|
||||
chunks,
|
||||
summary_instruction=f"Summerize the current chunk.",
|
||||
summary_instruction=f"summarize the current chunk.",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -1950,7 +1950,7 @@ class AIPersonality:
|
||||
def sequencial_chunks_summary(
|
||||
self,
|
||||
chunks,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -2530,10 +2530,10 @@ class APScript(StateMachine):
|
||||
max_generation_size=max_generation_size, callback=self.sink)
|
||||
return translated
|
||||
|
||||
def summerize_text(
|
||||
def summarize_text(
|
||||
self,
|
||||
text,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -2549,7 +2549,7 @@ class APScript(StateMachine):
|
||||
self.step_start(f"Comprerssing {doc_name}...")
|
||||
chunk_size = int(self.personality.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
summary_instruction,
|
||||
doc_name,
|
||||
@ -2571,7 +2571,7 @@ class APScript(StateMachine):
|
||||
def smart_data_extraction(
|
||||
self,
|
||||
text,
|
||||
data_extraction_instruction="summerize",
|
||||
data_extraction_instruction="summarize",
|
||||
final_task_instruction="reformulate with better wording",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
@ -2586,7 +2586,7 @@ class APScript(StateMachine):
|
||||
while len(tk)>max_summary_size:
|
||||
chunk_size = int(self.personality.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
data_extraction_instruction,
|
||||
doc_name,
|
||||
@ -2603,7 +2603,7 @@ class APScript(StateMachine):
|
||||
if dtk_ln<=10: # it is not sumlmarizing
|
||||
break
|
||||
self.step_start(f"Rewriting ...")
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
[text],
|
||||
final_task_instruction,
|
||||
doc_name, answer_start,
|
||||
@ -2615,10 +2615,10 @@ class APScript(StateMachine):
|
||||
|
||||
return text
|
||||
|
||||
def summerize_chunks(
|
||||
def summarize_chunks(
|
||||
self,
|
||||
chunks,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -2678,7 +2678,7 @@ class APScript(StateMachine):
|
||||
def sequencial_chunks_summary(
|
||||
self,
|
||||
chunks,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -3746,8 +3746,8 @@ class APScript(StateMachine):
|
||||
|
||||
def mix_it_up(self, prompt: str, models, master_model, max_generation_size: int= None, placeholders: dict = {}, sacrifice: list = ["previous_discussion"], debug: bool = False, callback=None, show_progress=False) -> dict:
|
||||
"""
|
||||
Fast generates text using multiple LLMs with round tracking. Each LLM sees the initial prompt plus the concatenated outputs of the previous round.
|
||||
The master model then completes the job by creating a unique answer inspired by the last round outputs.
|
||||
Fast generates text using multiple LLMs with detailed round tracking. Each LLM sees the initial prompt plus the formatted outputs of the previous rounds.
|
||||
The master model then completes the job by creating a unique answer inspired by the last round outputs.
|
||||
|
||||
Parameters:
|
||||
- prompt (str): The input prompt for text generation.
|
||||
@ -3762,7 +3762,7 @@ class APScript(StateMachine):
|
||||
- dict: A dictionary with the round information and the final generated text, keys: 'rounds' and 'final_output'.
|
||||
"""
|
||||
context = prompt
|
||||
previous_output = ''
|
||||
previous_outputs = []
|
||||
|
||||
# Dictionary to store rounds information
|
||||
rounds_info = {
|
||||
@ -3773,18 +3773,25 @@ class APScript(StateMachine):
|
||||
for idx, model_id in enumerate(models):
|
||||
binding_name, model_name = model_id.split("::")
|
||||
self.select_model(binding_name, model_name)
|
||||
round_prompt = context + "\n" + previous_output
|
||||
|
||||
# Concatenate previous outputs with formatting
|
||||
formatted_previous_outputs = "\n".join([f"Model {m}: {o}" for m, o in previous_outputs])
|
||||
round_prompt = context + "\n" + formatted_previous_outputs
|
||||
output = self.fast_gen(prompt=round_prompt, max_generation_size=max_generation_size, placeholders=placeholders, sacrifice=sacrifice, debug=debug, callback=callback, show_progress=show_progress)
|
||||
|
||||
rounds_info['rounds'].append({
|
||||
'model': model_id,
|
||||
'round_prompt': round_prompt,
|
||||
'output': output
|
||||
})
|
||||
previous_output = output # Update for the next round
|
||||
previous_outputs.append((model_id, output)) # Update for the next round
|
||||
|
||||
# Final round with the master model
|
||||
self.select_model(*master_model.split("::"))
|
||||
final_prompt = context + "\n" + previous_output
|
||||
|
||||
# Last round output for the master model
|
||||
formatted_previous_outputs = "\n".join([f"Model {m}: {o}" for m, o in previous_outputs])
|
||||
final_prompt = context + "\n" + formatted_previous_outputs
|
||||
final_output = self.fast_gen(prompt=final_prompt, max_generation_size=max_generation_size, placeholders=placeholders, sacrifice=sacrifice, debug=debug, callback=callback, show_progress=show_progress)
|
||||
|
||||
rounds_info['final_output'] = final_output
|
||||
@ -3793,6 +3800,7 @@ class APScript(StateMachine):
|
||||
|
||||
|
||||
|
||||
|
||||
def generate_with_function_calls(self, context_details: dict, functions: List[Dict[str, Any]], max_answer_length: Optional[int] = None, callback = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Performs text generation with function calls.
|
||||
|
@ -547,10 +547,10 @@ class TasksLibrary:
|
||||
translated = self.fast_gen(message_translation_text, temperature=0.1, callback=self.sink)
|
||||
return translated
|
||||
|
||||
def summerize_text(
|
||||
def summarize_text(
|
||||
self,
|
||||
text,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -567,7 +567,7 @@ class TasksLibrary:
|
||||
self.step_start(f"Comprerssing {doc_name}... [depth {depth+1}]")
|
||||
chunk_size = int(self.lollms.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
summary_instruction,
|
||||
doc_name,
|
||||
@ -590,7 +590,7 @@ class TasksLibrary:
|
||||
def smart_data_extraction(
|
||||
self,
|
||||
text,
|
||||
data_extraction_instruction="summerize",
|
||||
data_extraction_instruction="summarize",
|
||||
final_task_instruction="reformulate with better wording",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
@ -605,7 +605,7 @@ class TasksLibrary:
|
||||
while len(tk)>max_summary_size:
|
||||
chunk_size = int(self.lollms.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
data_extraction_instruction,
|
||||
doc_name,
|
||||
@ -622,7 +622,7 @@ class TasksLibrary:
|
||||
if dtk_ln<=10: # it is not sumlmarizing
|
||||
break
|
||||
self.step_start(f"Rewriting ...")
|
||||
text = self.summerize_chunks(
|
||||
text = self.summarize_chunks(
|
||||
[text],
|
||||
final_task_instruction,
|
||||
doc_name, answer_start,
|
||||
@ -634,10 +634,10 @@ class TasksLibrary:
|
||||
|
||||
return text
|
||||
|
||||
def summerize_chunks(
|
||||
def summarize_chunks(
|
||||
self,
|
||||
chunks,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
@ -711,7 +711,7 @@ class TasksLibrary:
|
||||
def sequencial_chunks_summary(
|
||||
self,
|
||||
chunks,
|
||||
summary_instruction="summerize",
|
||||
summary_instruction="summarize",
|
||||
doc_name="chunk",
|
||||
answer_start="",
|
||||
max_generation_size=3000,
|
||||
|
@ -135,7 +135,7 @@ audio_silenceTimer: 5000
|
||||
# Data vectorization
|
||||
activate_skills_lib: false # Activate vectorizing previous conversations
|
||||
skills_lib_database_name: "default" # Default skills database
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
summarize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
|
Loading…
Reference in New Issue
Block a user