mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-24 14:56:44 +00:00
added new summary tools
This commit is contained in:
parent
2209934b94
commit
6d32a1bc42
@ -31,7 +31,7 @@ from lollms.types import MSG_TYPE
|
|||||||
import json
|
import json
|
||||||
from typing import Any, List, Optional, Type, Callable, Dict, Any, Union
|
from typing import Any, List, Optional, Type, Callable, Dict, Any, Union
|
||||||
import json
|
import json
|
||||||
from safe_store import TextVectorizer, GenericDataLoader, VisualizationMethod, VectorizationMethod
|
from safe_store import TextVectorizer, GenericDataLoader, VisualizationMethod, VectorizationMethod, DocumentDecomposer
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import sys
|
import sys
|
||||||
from lollms.com import LoLLMsCom
|
from lollms.com import LoLLMsCom
|
||||||
@ -2184,13 +2184,26 @@ class APScript(StateMachine):
|
|||||||
]),
|
]),
|
||||||
max_generation_size=max_generation_size))
|
max_generation_size=max_generation_size))
|
||||||
return translated
|
return translated
|
||||||
def summerize(self, chunks, summary_instruction="summerize", chunk_name="chunk", answer_start="", max_generation_size=3000, callback=None):
|
|
||||||
|
def summerize_text(self, text, summary_instruction="summerize", doc_name="chunk", answer_start="", max_generation_size=3000, max_summary_size=512, callback=None):
|
||||||
|
depth=0
|
||||||
|
tk = self.personality.model.tokenize(text)
|
||||||
|
while len(tk)>max_summary_size:
|
||||||
|
self.step_start(f"Comprerssing.. [depth {depth}]")
|
||||||
|
chunk_size = int(self.personality.config.ctx_size*0.6)
|
||||||
|
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
|
||||||
|
text = self.summerize_chunks(document_chunks,summary_instruction, doc_name, answer_start, max_generation_size, callback)
|
||||||
|
tk = self.personality.model.tokenize(text)
|
||||||
|
self.step_end(f"Comprerssing.. [depth {depth}]")
|
||||||
|
return text
|
||||||
|
|
||||||
|
def summerize_chunks(self, chunks, summary_instruction="summerize", doc_name="chunk", answer_start="", max_generation_size=3000, callback=None):
|
||||||
summeries = []
|
summeries = []
|
||||||
for i, chunk in enumerate(chunks):
|
for i, chunk in enumerate(chunks):
|
||||||
self.step_start(f"Processing chunk : {i+1}/{len(chunks)}")
|
self.step_start(f"Processing chunk : {i+1}/{len(chunks)}")
|
||||||
summary = f"```markdown\n{answer_start}"+ self.fast_gen(
|
summary = f"```markdown\n{answer_start}"+ self.fast_gen(
|
||||||
"\n".join([
|
"\n".join([
|
||||||
f"!@>Document_chunk: {chunk_name}:",
|
f"!@>Document_chunk: {doc_name}:",
|
||||||
f"{chunk}",
|
f"{chunk}",
|
||||||
f"!@>instruction: {summary_instruction}",
|
f"!@>instruction: {summary_instruction}",
|
||||||
f"!@>summary:",
|
f"!@>summary:",
|
||||||
@ -2273,6 +2286,17 @@ class APScript(StateMachine):
|
|||||||
return self.personality.internet_search_with_vectorization(query, quick_search=quick_search)
|
return self.personality.internet_search_with_vectorization(query, quick_search=quick_search)
|
||||||
|
|
||||||
|
|
||||||
|
def vectorize_and_query(self, text, query, max_chunk_size=512, overlap_size=20, internet_vectorization_nb_chunks=3):
|
||||||
|
vectorizer = TextVectorizer(VectorizationMethod.TFIDF_VECTORIZER, model = self.personality.model)
|
||||||
|
decomposer = DocumentDecomposer()
|
||||||
|
chunks = decomposer.decompose_document(text, max_chunk_size, overlap_size,self.personality.model.tokenize,self.personality.model.detokenize)
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
vectorizer.add_document(f"chunk_{i}", self.personality.model.detokenize(chunk))
|
||||||
|
vectorizer.index()
|
||||||
|
docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
|
||||||
|
return docs, sorted_similarities
|
||||||
|
|
||||||
|
|
||||||
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
|
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
|
||||||
"""This triggers a step start
|
"""This triggers a step start
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user