mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-04-20 08:30:49 +00:00
upgraded lollms utilisites
This commit is contained in:
parent
f27690c13b
commit
a0d8ced80e
@ -471,22 +471,34 @@ class TextVectorizer:
|
||||
|
||||
|
||||
|
||||
def add_document(self, document_name:Path, text:str, chunk_size: int, overlap_size:int, force_vectorize=False):
|
||||
|
||||
def add_document(self, document_name:Path, text:str, chunk_size: int, overlap_size:int, force_vectorize=False,add_as_a_bloc=False):
|
||||
if self.file_exists(document_name) and not force_vectorize:
|
||||
print(f"Document {document_name} already exists. Skipping vectorization.")
|
||||
return
|
||||
chunks_text = DocumentDecomposer.decompose_document(text, chunk_size, overlap_size, self.model.tokenize, self.model.detokenize)
|
||||
for i, chunk in enumerate(chunks_text):
|
||||
chunk_id = f"{document_name}_chunk_{i + 1}"
|
||||
chunk_dict = {
|
||||
"document_name": document_name,
|
||||
"chunk_index": i+1,
|
||||
"chunk_text":self.model.detokenize(chunk),
|
||||
"chunk_tokens": chunk,
|
||||
"embeddings":[]
|
||||
}
|
||||
self.chunks[chunk_id] = chunk_dict
|
||||
if add_as_a_bloc:
|
||||
chunks_text = [self.model.tokenize(text)]
|
||||
for i, chunk in enumerate(chunks_text):
|
||||
chunk_id = f"{document_name}_chunk_{i + 1}"
|
||||
chunk_dict = {
|
||||
"document_name": document_name,
|
||||
"chunk_index": i+1,
|
||||
"chunk_text":self.model.detokenize(chunk),
|
||||
"chunk_tokens": chunk,
|
||||
"embeddings":[]
|
||||
}
|
||||
self.chunks[chunk_id] = chunk_dict
|
||||
else:
|
||||
chunks_text = DocumentDecomposer.decompose_document(text, chunk_size, overlap_size, self.model.tokenize, self.model.detokenize)
|
||||
for i, chunk in enumerate(chunks_text):
|
||||
chunk_id = f"{document_name}_chunk_{i + 1}"
|
||||
chunk_dict = {
|
||||
"document_name": document_name,
|
||||
"chunk_index": i+1,
|
||||
"chunk_text":self.model.detokenize(chunk),
|
||||
"chunk_tokens": chunk,
|
||||
"embeddings":[]
|
||||
}
|
||||
self.chunks[chunk_id] = chunk_dict
|
||||
|
||||
def index(self):
|
||||
if self.vectorization_method=="ftidf_vectorizer":
|
||||
|
2
setup.py
2
setup.py
@ -26,7 +26,7 @@ def get_all_files(path):
|
||||
|
||||
setuptools.setup(
|
||||
name="lollms",
|
||||
version="5.5.0",
|
||||
version="5.5.1",
|
||||
author="Saifeddine ALOUI",
|
||||
author_email="aloui.saifeddine@gmail.com",
|
||||
description="A python library for AI personality definition",
|
||||
|
Loading…
x
Reference in New Issue
Block a user