upgraded to the new version

This commit is contained in:
Saifeddine ALOUI 2024-05-03 00:58:21 +02:00
parent 21384fc34b
commit 03368ef8e1
13 changed files with 285 additions and 46 deletions

View File

@ -155,7 +155,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
data_vectorization_save_db: false # For each new session, new files
data_vectorization_save_db: true # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use

View File

@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
data_vectorization_save_db: False # For each new session, new files
data_vectorization_save_db: true # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use

View File

@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
data_vectorization_save_db: False # For each new session, new files
data_vectorization_save_db: true # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use

View File

@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
data_vectorization_save_db: False # For each new session, new files
data_vectorization_save_db: true # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use

View File

@ -640,7 +640,9 @@ class LollmsApplication(LoLLMsCom):
if self.personality.callback is None:
self.personality.callback = partial(self.process_chunk, client_id=client_id)
# Get the list of messages
messages = self.session.get_client(client_id).discussion.get_messages()
client = self.session.get_client(client_id)
discussion = client.discussion
messages = discussion.get_messages()
# Find the index of the message with the specified message_id
message_index = -1
@ -769,12 +771,14 @@ class LollmsApplication(LoLLMsCom):
trace_exception(ex)
self.warning("Couldn't add documentation to the context. Please verify the vector database")
if len(self.personality.text_files) > 0 and self.personality.vectorizer:
if (len(client.discussion.text_files) > 0) and client.discussion.vectorizer is not None:
if discussion is None:
discussion = self.recover_discussion(client_id)
if documentation=="":
documentation="\n!@>important information: Use the documentation data to answer the user questions. If the data is not present in the documentation, please tell the user that the information he is asking for does not exist in the documentation section. It is strictly forbidden to give the user an answer without having actual proof from the documentation.\n!@>Documentation:\n"
if self.config.data_vectorization_build_keys_words:
discussion = self.recover_discussion(client_id)
self.personality.step_start("Building vector store query")
query = self.personality.fast_gen(f"\n!@>instruction: Read the discussion and rewrite the last prompt for someone who didn't read the entire discussion.\nDo not answer the prompt. Do not add explanations.\n!@>discussion:\n{discussion[-2048:]}\n!@>enhanced query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink)
self.personality.step_end("Building vector store query")
@ -783,20 +787,20 @@ class LollmsApplication(LoLLMsCom):
query = current_message.content
try:
if self.config.data_vectorization_force_first_chunk and len(self.personality.vectorizer.chunks)>0:
doc_index = list(self.personality.vectorizer.chunks.keys())[0]
if self.config.data_vectorization_force_first_chunk and len(client.discussion.vectorizer.chunks)>0:
doc_index = list(client.discussion.vectorizer.chunks.keys())[0]
doc_id = self.personality.vectorizer.chunks[doc_index]['document_id']
content = self.personality.vectorizer.chunks[doc_index]['chunk_text']
doc_id = client.discussion.vectorizer.chunks[doc_index]['document_id']
content = client.discussion.vectorizer.chunks[doc_index]['chunk_text']
if self.config.data_vectorization_put_chunk_informations_into_context:
documentation += f"!@>document chunk:\nchunk_infos:{doc_id}\ncontent:{content}\n"
else:
documentation += f"!@>chunk:\n{content}\n"
docs, sorted_similarities, document_ids = self.personality.vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks)
docs, sorted_similarities, document_ids = client.discussion.vectorizer.recover_text(query, top_k=self.config.data_vectorization_nb_chunks)
for doc, infos in zip(docs, sorted_similarities):
if self.config.data_vectorization_force_first_chunk and len(self.personality.vectorizer.chunks)>0 and infos[0]==doc_id:
if self.config.data_vectorization_force_first_chunk and len(client.discussion.vectorizer.chunks)>0 and infos[0]==doc_id:
continue
if self.config.data_vectorization_put_chunk_informations_into_context:
documentation += f"!@>document chunk:\nchunk path: {infos[0]}\nchunk content:\n{doc}\n"

View File

@ -26,6 +26,7 @@ from lollms.main_config import LOLLMSConfig
from lollms.com import NotificationType, NotificationDisplayType, LoLLMsCom
from lollms.security import sanitize_path
from lollms.utilities import show_message_dialog
from lollms.types import BindingType
import urllib
import inspect
@ -41,20 +42,7 @@ __author__ = "parisneo"
__github__ = "https://github.com/ParisNeo/lollms_bindings_zoo"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
class BindingType(Enum):
"""Binding types."""
TEXT_ONLY = 0
"""This binding only supports text."""
TEXT_IMAGE = 1
"""This binding supports text and image."""
TEXT_IMAGE_VIDEO = 2
"""This binding supports text, image and video."""
TEXT_AUDIO = 3
"""This binding supports text and audio."""
class LLMBinding:

View File

@ -1,4 +1,6 @@
from ascii_colors import ASCIIColors
from lollms.types import MSG_TYPE, SENDER_TYPES
from typing import Callable
import socketio
from enum import Enum
class NotificationType(Enum):
@ -96,6 +98,9 @@ class LoLLMsCom:
self.sio.sleep(1)
return infos["result"]
def close_message(self, client_id):
pass
def info(self, content, duration:int=4, client_id=None, verbose:bool=None):
self.notify(
content,
@ -136,6 +141,26 @@ class LoLLMsCom:
verbose = verbose
)
def new_message(self,
client_id,
sender=None,
content="",
parameters=None,
metadata=None,
ui=None,
message_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_FULL,
sender_type:SENDER_TYPES=SENDER_TYPES.SENDER_TYPES_AI,
open=False
):
pass
def full(self, full_text:str, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
"""This sends full text to front end
Args:
step_text (dict): The step text
callback (callable, optional): A callable with this signature (str, MSG_TYPE) to send the text to. Defaults to None.
"""
pass
def notify(
self,

View File

@ -2,10 +2,16 @@
import sqlite3
from pathlib import Path
from datetime import datetime
from lollms.helpers import ASCIIColors
from ascii_colors import ASCIIColors, trace_exception
from lollms.types import MSG_TYPE
from lollms.types import BindingType
from lollms.utilities import PackageManager, discussion_path_to_url
from lollms.paths import LollmsPaths
from lollms.databases.skills_database import SkillsLibrary
from lollms.com import LoLLMsCom
from safe_store import TextVectorizer, VisualizationMethod, GenericDataLoader
import json
import shutil
__author__ = "parisneo"
__github__ = "https://github.com/ParisNeo/lollms-webui"
@ -16,7 +22,8 @@ __license__ = "Apache 2.0"
# =================================== Database ==================================================================
class DiscussionsDB:
def __init__(self, lollms_paths:LollmsPaths, discussion_db_name="default"):
def __init__(self, lollms:LoLLMsCom, lollms_paths:LollmsPaths, discussion_db_name="default"):
self.lollms = lollms
self.lollms_paths = lollms_paths
self.discussion_db_name = discussion_db_name
@ -25,7 +32,6 @@ class DiscussionsDB:
self.discussion_db_path.mkdir(exist_ok=True, parents= True)
self.discussion_db_file_path = self.discussion_db_path/"database.db"
def create_tables(self):
db_version = 12
with sqlite3.connect(self.discussion_db_file_path) as conn:
@ -199,7 +205,7 @@ class DiscussionsDB:
else:
last_discussion_id = last_discussion_id[0]
self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (last_discussion_id,), fetch_all=False)
return Discussion(last_discussion_id, self)
return Discussion(self.lollms, last_discussion_id, self)
def create_discussion(self, title="untitled"):
"""Creates a new discussion
@ -211,10 +217,10 @@ class DiscussionsDB:
Discussion: A Discussion instance
"""
discussion_id = self.insert(f"INSERT INTO discussion (title) VALUES (?)",(title,))
return Discussion(discussion_id, self)
return Discussion(self.lollms, discussion_id, self)
def build_discussion(self, discussion_id=0):
return Discussion(discussion_id, self)
return Discussion(self.lollms, discussion_id, self)
def get_discussions(self):
rows = self.select("SELECT * FROM discussion")
@ -618,7 +624,8 @@ class Message:
return msgJson
class Discussion:
def __init__(self, discussion_id, discussions_db:DiscussionsDB):
def __init__(self, lollms:LoLLMsCom, discussion_id, discussions_db:DiscussionsDB):
self.lollms = lollms
self.discussion_id = discussion_id
self.discussions_db = discussions_db
self.discussion_folder = self.discussions_db.discussion_db_path/f"{discussion_id}"
@ -627,19 +634,181 @@ class Discussion:
self.discussion_text_folder = self.discussion_folder / "text_data"
self.discussion_skills_folder = self.discussion_folder / "skills"
self.discussion_rag_folder = self.discussion_folder / "rag"
self.discussion_view_images_folder = self.discussion_folder / "view_images"
self.discussion_folder.mkdir(exist_ok=True)
self.discussion_images_folder.mkdir(exist_ok=True)
self.discussion_text_folder.mkdir(exist_ok=True)
self.discussion_skills_folder.mkdir(exist_ok=True)
self.discussion_rag_folder.mkdir(exist_ok=True)
self.discussion_view_images_folder.mkdir(exist_ok=True)
self.messages = self.get_messages()
if len(self.messages)>0:
self.current_message = self.messages[-1]
def add_file(self, file_name):
# TODO : add file
pass
# Initialize the file lists
self.update_file_lists()
self.vectorizer = TextVectorizer(
self.lollms.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
model=self.lollms.model, #needed in case of using model_embedding
database_path=self.discussion_rag_folder/"db.json",
save_db=self.lollms.config.data_vectorization_save_db,
data_visualization_method=VisualizationMethod.PCA,
database_dict=None)
if len(self.vectorizer.chunks)==0 and len(self.text_files)>0:
for path in self.text_files:
data = GenericDataLoader.read_file(path)
self.vectorizer.add_document(path, data, self.lollms.config.data_vectorization_chunk_size, self.lollms.config.data_vectorization_overlap_size, add_first_line_to_all_chunks=True if path.suffix==".csv" else False)
self.vectorizer.index()
def update_file_lists(self):
self.text_files = [Path(file) for file in self.discussion_text_folder.glob('*')]
self.image_files = [Path(file) for file in self.discussion_images_folder.glob('*')]
self.audio_files = [Path(file) for file in self.discussion_audio_folder.glob('*')]
self.rag_db = [Path(file) for file in self.discussion_rag_folder.glob('*')]
def remove_file(self, file_name, callback=None):
try:
all_files = self.text_files+self.image_files+self.audio_files
if any(file_name == entry.name for entry in self.text_files):
fn = [entry for entry in self.text_files if entry.name == file_name][0]
self.text_files = [entry for entry in self.text_files if entry.name != file_name]
Path(fn).unlink()
if len(self.text_files)>0:
try:
self.vectorizer.remove_document(fn)
if callback is not None:
callback("File removed successfully",MSG_TYPE.MSG_TYPE_INFO)
return True
except ValueError as ve:
ASCIIColors.error(f"Couldn't remove the file")
return False
else:
self.vectorizer = None
elif any(file_name == entry.name for entry in self.image_files):
fn = [entry for entry in self.image_files if entry.name == file_name][0]
self.image_files = [entry for entry in self.image_files if entry.name != file_name]
Path(fn).unlink()
elif any(file_name == entry.name for entry in self.audio_files):
fn = [entry for entry in self.audio_files if entry.name == file_name][0]
self.audio_files = [entry for entry in self.audio_files if entry.name != file_name]
Path(fn).unlink()
except Exception as ex:
trace_exception(ex)
ASCIIColors.warning(f"Couldn't remove the file {file_name}")
def remove_all_files(self):
# Iterate over each directory and remove all files
for path in [self.discussion_images_folder, self.discussion_rag_folder, self.discussion_audio_folder, self.discussion_text_folder]:
for file in path.glob('*'):
if file.is_file(): # Ensure it's a file, not a directory
file.unlink() # Delete the file
# Clear the lists to reflect the current state (empty directories)
self.text_files.clear()
self.image_files.clear()
self.audio_files.clear()
def add_file(self, path, client, callback=None, process=True):
output = ""
path = Path(path)
if path.suffix in [".wav",".mp3"]:
self.audio_files.append(path)
if process:
self.lollms.new_messagenew_message(client.client_id if client is not None else 0, content = "", message_type = MSG_TYPE.MSG_TYPE_FULL)
self.lollms.info(f"Transcribing ... ")
if self.whisper is None:
if not PackageManager.check_package_installed("whisper"):
PackageManager.install_package("openai-whisper")
try:
import conda.cli
conda.cli.main("install", "conda-forge::ffmpeg", "-y")
except:
ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually")
import whisper
self.whisper = whisper.load_model("base")
result = self.whisper.transcribe(str(path))
transcription_fn = str(path)+".txt"
with open(transcription_fn, "w", encoding="utf-8") as f:
f.write(result["text"])
self.info(f"File saved to {transcription_fn}")
self.full(result["text"])
self.step_end("Transcribing ... ")
elif path.suffix in [".png",".jpg",".jpeg",".gif",".bmp",".svg",".webp"]:
self.image_files.append(path)
if process:
try:
view_file = self.discussion_view_images_folder/path.name
shutil.copyfile(path, view_file)
pth = str(view_file).replace("\\","/").split('/')
if "discussion_databases" in pth:
pth = discussion_path_to_url(view_file)
self.lollms.new_message(client.client_id if client is not None else 0, content = "", message_type = MSG_TYPE.MSG_TYPE_FULL)
output = f'<img src="{pth}" width="800">\n\n'
self.lollms.full(output, client_id=client.client_id)
self.lollms.close_message(client.client_id if client is not None else 0)
if self.lollms.model.binding_type not in [BindingType.TEXT_IMAGE, BindingType.TEXT_IMAGE_VIDEO]:
# self.ShowBlockingMessage("Understanding image (please wait)")
from PIL import Image
img = Image.open(str(view_file))
# Convert the image to RGB mode
img = img.convert("RGB")
output += "## image description :\n"+ self.lollms.model.interrogate_blip([img])[0]
# output += "## image description :\n"+ self.lollms.model.qna_blip([img],"q:Describe this photo with as much details as possible.\na:")[0]
self.lollms.full(output)
self.lollms.close_message(client.client_id if client is not None else 0)
self.lollms.HideBlockingMessage("Understanding image (please wait)")
if self.lollms.config.debug:
ASCIIColors.yellow(output)
else:
# self.ShowBlockingMessage("Importing image (please wait)")
self.lollms.HideBlockingMessage("Importing image (please wait)")
except Exception as ex:
trace_exception(ex)
self.lollms.HideBlockingMessage("Understanding image (please wait)", False)
ASCIIColors.error("Couldn't create new message")
ASCIIColors.info("Received image file")
if callback is not None:
callback("Image file added successfully", MSG_TYPE.MSG_TYPE_INFO)
else:
try:
# self.ShowBlockingMessage("Adding file to vector store.\nPlease stand by")
self.text_files.append(path)
ASCIIColors.info("Received text compatible file")
self.lollms.ShowBlockingMessage("Processing file\nPlease wait ...")
if process:
if self.vectorizer is None:
self.vectorizer = TextVectorizer(
self.lollms.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
model=self.lollms.model, #needed in case of using model_embedding
database_path=self.discussion_rag_folder/"db.json",
save_db=self.lollms.config.data_vectorization_save_db,
data_visualization_method=VisualizationMethod.PCA,
database_dict=None)
data = GenericDataLoader.read_file(path)
self.vectorizer.add_document(path, data, self.lollms.config.data_vectorization_chunk_size, self.lollms.config.data_vectorization_overlap_size, add_first_line_to_all_chunks=True if path.suffix==".csv" else False)
self.vectorizer.index()
if callback is not None:
callback("File added successfully",MSG_TYPE.MSG_TYPE_INFO)
self.lollms.HideBlockingMessage(client.client_id)
return True
except Exception as e:
trace_exception(e)
self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id)
return False
def load_message(self, id):
"""Gets a list of messages information

View File

@ -155,7 +155,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
data_vectorization_save_db: false # For each new session, new files
data_vectorization_save_db: true # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use

View File

@ -17,7 +17,7 @@ from lollms.security import sanitize_path, check_access
from ascii_colors import ASCIIColors
from lollms.databases.discussions_database import DiscussionsDB, Discussion
from typing import List
import shutil
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
import tqdm
from pathlib import Path
@ -69,7 +69,7 @@ def select_database(data:DatabaseSelectionParameters):
print(f'Selecting database {data.name}')
# Create database object
lollmsElfServer.db = DiscussionsDB(lollmsElfServer.lollms_paths, data.name)
lollmsElfServer.db = DiscussionsDB(lollmsElfServer, lollmsElfServer.lollms_paths, data.name)
ASCIIColors.info("Checking discussions database... ",end="")
lollmsElfServer.db.create_tables()
lollmsElfServer.db.add_missing_columns()
@ -114,7 +114,7 @@ async def make_title(discussion_title: DiscussionTitle):
try:
ASCIIColors.info("Making title")
discussion_id = discussion_title.id
discussion = Discussion(discussion_id, lollmsElfServer.db)
discussion = Discussion(lollmsElfServer, discussion_id, lollmsElfServer.db)
title = lollmsElfServer.make_discussion_title(discussion)
discussion.rename(title)
return {'status':True, 'title':title}
@ -151,10 +151,14 @@ async def delete_discussion(discussion: DiscussionDelete):
try:
client_id = discussion.client_id
discussion_id = discussion.id
lollmsElfServer.session.get_client(client_id).discussion = Discussion(discussion_id, lollmsElfServer.db)
discussion_id = sanitize_path(discussion.id)
discussion_path = lollmsElfServer.lollms_paths.personal_discussions_path/lollmsElfServer.config.discussion_db_name/discussion_id
lollmsElfServer.session.get_client(client_id).discussion = Discussion(lollmsElfServer, discussion_id, lollmsElfServer.db)
lollmsElfServer.session.get_client(client_id).discussion.delete_discussion()
lollmsElfServer.session.get_client(client_id).discussion = None
shutil.rmtree(discussion_path)
return {'status':True}
except Exception as ex:
trace_exception(ex)
@ -208,3 +212,38 @@ async def import_multiple_discussions(discussion_import: DiscussionImport):
trace_exception(ex)
lollmsElfServer.error(ex)
return {"status":False,"error":str(ex)}
# ------------------------------------------- Files manipulation -----------------------------------------------------
class Identification(BaseModel):
client_id:str
@router.post("/get_discussion_files_list")
def get_discussion_files_list(data:Identification):
client = check_access(lollmsElfServer, data.client_id)
return {"state":True, "files":[{"name":Path(f).name, "size":Path(f).stat().st_size} for f in client.discussion.text_files]+[{"name":Path(f).name, "size":Path(f).stat().st_size} for f in client.discussion.image_files]}
@router.post("/clear_discussion_files_list")
def clear_discussion_files_list(data:Identification):
client = check_access(lollmsElfServer, data.client_id)
if lollmsElfServer.personality is None:
return {"state":False, "error":"No personality selected"}
client.discussion.remove_all_files()
return {"state":True}
class RemoveFileData(BaseModel):
client_id:str
name:str
@router.post("/remove_discussion_file")
def remove_discussion_file(data:RemoveFileData):
"""
Removes a file form the personality files
"""
client = check_access(lollmsElfServer, data.client_id)
if lollmsElfServer.personality is None:
return {"state":False, "error":"No personality selected"}
client.discussion.remove_file(data.name)
return {"state":True}

View File

@ -81,7 +81,7 @@ def add_events(sio:socketio):
ext = filename.split(".")[-1].lower()
if ext in ["wav", "mp3"]:
path:Path = client.discussion.discussion_audio_folder
elif ext in [".png",".jpg",".jpeg",".gif",".bmp",".svg",".webp"]:
elif ext in ["png","jpg","jpeg","gif","bmp","svg","webp"]:
path:Path = client.discussion.discussion_images_folder
else:
path:Path = client.discussion.discussion_text_folder
@ -108,9 +108,9 @@ def add_events(sio:socketio):
lollmsElfServer.ShowBlockingMessage(f"File received {file_path.name}.\nVectorizing the data ...")
if lollmsElfServer.personality.processor:
result = lollmsElfServer.personality.processor.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))
result = client.discussion.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))
else:
result = lollmsElfServer.personality.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))
result = client.discussion.add_file(file_path, client, partial(lollmsElfServer.process_chunk, client_id=client_id))
ASCIIColors.success('File processed successfully')
run_async(partial(sio.emit,'file_received', {'status': True, 'filename': filename}))

View File

@ -61,6 +61,20 @@ class GenerationPresets:
"""
return {'temperature': 0.5, 'top_k': 20, 'top_p': 0.85}
class BindingType(Enum):
"""Binding types."""
TEXT_ONLY = 0
"""This binding only supports text."""
TEXT_IMAGE = 1
"""This binding supports text and image."""
TEXT_IMAGE_VIDEO = 2
"""This binding supports text, image and video."""
TEXT_AUDIO = 3
"""This binding supports text and audio."""
class SUMMARY_MODE(Enum):
SUMMARY_MODE_SEQUENCIAL = 0

View File

@ -145,7 +145,7 @@ data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vecto
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
data_vectorization_save_db: False # For each new session, new files
data_vectorization_save_db: true # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use