upgraded vector db

2024-12-18 20:27:58 +00:00 · 2024-07-18 01:32:11 +02:00 · 2024-07-18 01:32:11 +02:00 · 1437b2c40d
commit 1437b2c40d
parent 6f40981651
20 changed files with 513 additions and 175 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 125
+version: 127
 binding_name: null
 model_name: null
 model_variant: null
@ -153,6 +153,7 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
 xtts_freq: 22050
 # openai_whisper configuration
 openai_tts_key: ""
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -1,35 +1,53 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 81
+version: 118
 binding_name: null
 model_name: null
 model_variant: null
 model_type: null
-show_news_panel: True
+show_news_panel: true
 # Security measures
-turn_on_setting_update_validation: True
+turn_on_setting_update_validation: true
-turn_on_code_execution: True
+turn_on_code_execution: true
-turn_on_code_validation: True
+turn_on_code_validation: true
-turn_on_open_file_validation: False
+turn_on_open_file_validation: true
-turn_on_send_file_validation: False
+turn_on_send_file_validation: true
 turn_on_language_validation: true
 force_accept_remote_access: false
 # Server information
-headless_server_mode: False
+headless_server_mode: false
 allowed_origins: []
 # Host information
 host: localhost
 port: 9600
 app_custom_logo: ""
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
 start_header_id_template: "!@>"
 end_header_id_template: ": "
 separator_template: "\n"
 start_user_header_id_template: "!@>"
 end_user_header_id_template: ": "
 end_user_message_id_template: ""
 start_ai_header_id_template: "!@>"
 end_ai_header_id_template: ": "
 end_ai_message_id_template: ""
 system_message_template: "system"
 seed: -1
 ctx_size: 4084
 max_n_predict: 4096
-min_n_predict: 512
+min_n_predict: 1024
 temperature: 0.9
 top_k: 50
 top_p: 0.95
@ -50,14 +68,14 @@ user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 use_model_name_in_discussions: false
-user_avatar: default_user.svg
+user_avatar: null
 use_user_informations_in_discussion: false
 # UI parameters
 discussion_db_name: default
 # Automatic updates
-debug: False
+debug: false
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -77,23 +95,104 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false
 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
 active_tts_service: "None" # xtts (offline), openai_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
 # -------------------- Services --------------------------
 # ***************** STT *****************
 stt_input_device: 0
 # STT service
 stt_listening_threshold: 1000
 stt_silence_duration: 2
 stt_sound_threshold_percentage: 10
 stt_gain: 1.0 
 stt_rate: 44100
 stt_channels: 1
 stt_buffer_size: 10
 stt_activate_word_detection: false
 stt_word_detection_file: null
 # ASR STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000
 # openai_whisper configuration
 openai_whisper_key: ""
 openai_whisper_model: "whisper-1"
 # whisper configuration
 whisper_activate: false
 whisper_model: base
 # ***************** TTS *****************
 tts_output_device: 0
 # Voice service
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
 xtts_stream_chunk_size: 100
 xtts_temperature: 0.75
 xtts_length_penalty: 1.0
 xtts_repetition_penalty: 5.0
 xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"
 # ***************** TTI *****************
 use_negative_prompt: true
 use_ai_generated_negative_prompt: false
 negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
 default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860
 # Image generation service
 enable_fooocus_service: false
 fooocus_base_url: http://localhost:7860
 # diffuser
 diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
 diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
 # Dall e service key
 dall_e_key: ""
 dall_e_generation_engine: "dall-e-3"
 # Midjourney service key
 midjourney_key: ""
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
 comfyui_model: v1-5-pruned-emaonly.ckpt
 # Motion control service
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861
 # ***************** TTT *****************
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -107,6 +206,11 @@ petals_device: cuda
 # lollms service
 enable_lollms_service: false
 lollms_base_url: http://localhost:1234
 lollms_access_keys : "" # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
 activate_mistralai_emulator: true
 # elastic search service
 elastic_search_service: false
@ -131,13 +235,22 @@ audio_auto_send_input: true
 audio_silenceTimer: 5000
 # Data vectorization
 rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
 rag_vectorizer: bert # possible values bert, tfidf, word2vec
 rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
 summarize_discussion: false # activate discussion summary (better but adds computation time)
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
 use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,12 +267,13 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
 # Activate internet search
 activate_internet_search: false
 activate_internet_pages_judgement: true
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
+internet_vectorization_overlap_size: 0 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
+internet_vectorization_nb_chunks: 4 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
+internet_nb_search_pages: 8 # number of pages to select
-internet_quick_search: False # If active the search engine will not load and read the webpages
+internet_quick_search: false # If active the search engine will not load and read the webpages
-internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
+internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
 pdf_latex_path: null
@ -167,7 +281,7 @@ pdf_latex_path: null
 positive_boost: null
 negative_boost: null
 current_language: english
-fun_mode: False
+fun_mode: false
 # webui configurations
@ -175,5 +289,3 @@ show_code_of_conduct: true
 activate_audio_infos: true
 # whisper configuration
 whisper_model: base
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -1,35 +1,53 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 81
+version: 118
 binding_name: null
 model_name: null
 model_variant: null
 model_type: null
-show_news_panel: True
+show_news_panel: true
 # Security measures
-turn_on_setting_update_validation: True
+turn_on_setting_update_validation: true
-turn_on_code_execution: True
+turn_on_code_execution: true
-turn_on_code_validation: True
+turn_on_code_validation: true
-turn_on_open_file_validation: False
+turn_on_open_file_validation: true
-turn_on_send_file_validation: False
+turn_on_send_file_validation: true
 turn_on_language_validation: true
 force_accept_remote_access: false
 # Server information
-headless_server_mode: False
+headless_server_mode: false
 allowed_origins: []
 # Host information
 host: localhost
 port: 9600
 app_custom_logo: ""
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
 start_header_id_template: "!@>"
 end_header_id_template: ": "
 separator_template: "\n"
 start_user_header_id_template: "!@>"
 end_user_header_id_template: ": "
 end_user_message_id_template: ""
 start_ai_header_id_template: "!@>"
 end_ai_header_id_template: ": "
 end_ai_message_id_template: ""
 system_message_template: "system"
 seed: -1
 ctx_size: 4084
 max_n_predict: 4096
-min_n_predict: 512
+min_n_predict: 1024
 temperature: 0.9
 top_k: 50
 top_p: 0.95
@ -50,14 +68,14 @@ user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 use_model_name_in_discussions: false
-user_avatar: default_user.svg
+user_avatar: null
 use_user_informations_in_discussion: false
 # UI parameters
 discussion_db_name: default
 # Automatic updates
-debug: False
+debug: false
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -77,23 +95,104 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false
 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
 active_tts_service: "None" # xtts (offline), openai_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
 # -------------------- Services --------------------------
 # ***************** STT *****************
 stt_input_device: 0
 # STT service
 stt_listening_threshold: 1000
 stt_silence_duration: 2
 stt_sound_threshold_percentage: 10
 stt_gain: 1.0 
 stt_rate: 44100
 stt_channels: 1
 stt_buffer_size: 10
 stt_activate_word_detection: false
 stt_word_detection_file: null
 # ASR STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000
 # openai_whisper configuration
 openai_whisper_key: ""
 openai_whisper_model: "whisper-1"
 # whisper configuration
 whisper_activate: false
 whisper_model: base
 # ***************** TTS *****************
 tts_output_device: 0
 # Voice service
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
 xtts_stream_chunk_size: 100
 xtts_temperature: 0.75
 xtts_length_penalty: 1.0
 xtts_repetition_penalty: 5.0
 xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"
 # ***************** TTI *****************
 use_negative_prompt: true
 use_ai_generated_negative_prompt: false
 negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
 default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860
 # Image generation service
 enable_fooocus_service: false
 fooocus_base_url: http://localhost:7860
 # diffuser
 diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
 diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
 # Dall e service key
 dall_e_key: ""
 dall_e_generation_engine: "dall-e-3"
 # Midjourney service key
 midjourney_key: ""
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
 comfyui_model: v1-5-pruned-emaonly.ckpt
 # Motion control service
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861
 # ***************** TTT *****************
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -107,6 +206,11 @@ petals_device: cuda
 # lollms service
 enable_lollms_service: false
 lollms_base_url: http://localhost:1234
 lollms_access_keys : "" # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
 activate_mistralai_emulator: true
 # elastic search service
 elastic_search_service: false
@ -131,13 +235,22 @@ audio_auto_send_input: true
 audio_silenceTimer: 5000
 # Data vectorization
 rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
 rag_vectorizer: bert # possible values bert, tfidf, word2vec
 rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
 summarize_discussion: false # activate discussion summary (better but adds computation time)
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
 use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,20 +267,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
 # Activate internet search
 activate_internet_search: false
 activate_internet_pages_judgement: true
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
+internet_vectorization_overlap_size: 0 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
+internet_vectorization_nb_chunks: 4 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
+internet_nb_search_pages: 8 # number of pages to select
-internet_quick_search: False # If active the search engine will not load and read the webpages
+internet_quick_search: false # If active the search engine will not load and read the webpages
-internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
+internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
 pdf_latex_path: null
 # boosting information
 positive_boost: null
 negative_boost: null
-current_language: null
+current_language: english
-fun_mode: False
+fun_mode: false
 # webui configurations
@ -175,5 +289,3 @@ show_code_of_conduct: true
 activate_audio_infos: true
 # whisper configuration
 whisper_model: base
--- a/lollms/app.py
+++ b/lollms/app.py
@ -13,7 +13,6 @@ from lollms.utilities import PromptReshaper
 from lollms.client_session import Client, Session
 from lollms.databases.skills_database import SkillsLibrary
 from lollms.tasks import TasksLibrary
 from safe_store import TextVectorizer, VectorizationMethod, VisualizationMethod
 from lollmsvectordb.database_elements.chunk import Chunk
 from lollmsvectordb.vector_database import VectorDatabase
@ -335,7 +334,7 @@ class LollmsApplication(LoLLMsCom):
                trace_exception(ex)
        ASCIIColors.blue("Loading local TTS services")
-        if self.config.xtts_enable or self.config.active_tts_service == "xtts":
+        if self.config.active_tts_service == "xtts":
            ASCIIColors.yellow("Loading XTTS")
            try:
                from lollms.services.xtts.lollms_xtts import LollmsXTTS
@ -348,6 +347,7 @@ class LollmsApplication(LoLLMsCom):
                self.xtts = LollmsXTTS(
                                        self,
                                        voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
                                        freq=self.config.xtts_freq
                                    )
            except Exception as ex:
                trace_exception(ex)
@ -448,7 +448,7 @@ class LollmsApplication(LoLLMsCom):
                    trace_exception(ex)
            ASCIIColors.blue("Loading loacal TTS services")
-            if (self.config.xtts_enable or self.config.active_tts_service == "xtts") and self.xtts is None:
+            if self.config.active_tts_service == "xtts" and self.xtts is None:
                ASCIIColors.yellow("Loading XTTS")
                try:
                    from lollms.services.xtts.lollms_xtts import LollmsXTTS
@ -461,6 +461,7 @@ class LollmsApplication(LoLLMsCom):
                    self.xtts = LollmsXTTS(
                                            self,
                                            voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
                                            freq=self.config.xtts_freq
                                        )
                except Exception as ex:
                    trace_exception(ex)
@ -532,17 +533,6 @@ class LollmsApplication(LoLLMsCom):
            trace_exception(ex)
    def build_long_term_skills_memory(self):
        discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
        discussion_db_name.mkdir(exist_ok=True, parents=True)
        self.long_term_memory = TextVectorizer(
                vectorization_method=VectorizationMethod.TFIDF_VECTORIZER,
                model=self.model,
                database_path=discussion_db_name/"skills_memory.json",
                save_db=True,
                data_visualization_method=VisualizationMethod.PCA,
            )
        return self.long_term_memory
    def process_chunk(
                        self, 
@ -969,6 +959,7 @@ class LollmsApplication(LoLLMsCom):
                        f"{self.start_header_id_template}websearch query{self.end_header_id_template}"
                    ])
                    query = self.personality.fast_gen(q, max_generation_size=256, show_progress=True, callback=self.personality.sink)
                    query = query.replace("\"","")
                    self.personality.step_end("Crafting internet search query")
                    self.personality.step(f"web search query: {query}")
@ -979,12 +970,12 @@ class LollmsApplication(LoLLMsCom):
                    internet_search_results=f"{self.system_full_header}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{self.separator_template}{self.start_header_id_template}Web search results{self.end_header_id_template}\n"
-                    docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)
+                    chunks:List[Chunk] = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)
-                    if len(docs)>0:
+                    if len(chunks)>0:
-                        for doc, infos,document_id in zip(docs, sorted_similarities, document_ids):
+                        for chunk in chunks:
-                            internet_search_infos.append(document_id)
+                            internet_search_infos.append(chunk.doc.title)
-                            internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{document_id['url']}\nchunk_title:{document_id['title']}\ncontent:{doc}\n"
+                            internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{chunk.doc.path}\nchunk_title:{chunk.doc.title}\ncontent:{doc}\n"
                    else:
                        internet_search_results += "The search response was empty!\nFailed to recover useful information from the search engine.\n"
                    if self.config.internet_quick_search:
@ -1051,9 +1042,12 @@ class LollmsApplication(LoLLMsCom):
                            docs = v.list_documents()
                            for doc in docs:
                                document=v.get_document(document_path = doc["path"])
-                                self.personality.step_start(f"Summeryzing document {doc['path']}")
+                                self.personality.step_start(f"Summaryzing document {doc['path']}")
-                                summary = self.personality.summarize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
+                                def post_process(summary):
-                                self.personality.step_end(f"Summeryzing document {doc['path']}")
+                                    return summary
                                summary = self.personality.summarize_text(document, 
                                                                        f"Extract information from the following text chunk to answer this request.\n{self.system_custom_header('query')}{query}", chunk_summary_post_processing=post_process, callback=self.personality.sink)
                                self.personality.step_end(f"Summaryzing document {doc['path']}")
                                document_infos = f"{self.separator_template}".join([
                                    self.system_custom_header('document contextual summary'),
                                    f"source_document_title:{doc['title']}",
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 125
+version: 127
 binding_name: null
 model_name: null
 model_variant: null
@ -153,6 +153,7 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
 xtts_freq: 22050
 # openai_whisper configuration
 openai_tts_key: ""
--- a/lollms/databases/discussions_database.py
+++ b/lollms/databases/discussions_database.py
@ -7,9 +7,8 @@ from lollms.types import MSG_TYPE
 from lollms.types import BindingType
 from lollms.utilities import PackageManager, discussion_path_to_url
 from lollms.paths import LollmsPaths
 from lollms.databases.skills_database import SkillsLibrary
 from lollms.com import LoLLMsCom
-from safe_store import TextVectorizer, VisualizationMethod, GenericDataLoader
+
 from lollmsvectordb.vector_database import VectorDatabase
 from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
 from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
@ -671,7 +670,7 @@ class Discussion:
            if len(self.vectorizer.list_documents())==0 and len(self.text_files)>0:
                for path in self.text_files:
-                    data = GenericDataLoader.read_file(path)
+                    data = TextDocumentsLoader.read_file(path)
                    try:
                        self.vectorizer.add_document(path.stem, data, path, True)
                    except Exception as ex:
@ -833,7 +832,7 @@ class Discussion:
                    return True
            except Exception as e:
                trace_exception(e)
-                self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id)
+                self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {TextDocumentsLoader.get_supported_file_types()}",client_id=client.client_id)
                return False
    def load_message(self, id):
--- a/lollms/functions/knowledge/build_knowledge_db.py
+++ b/lollms/functions/knowledge/build_knowledge_db.py
@ -1,6 +1,6 @@
 from pathlib import Path
 from lollms.personality import APScript
-from safe_store.generic_data_loader import GenericDataLoader
+from lollmsvectordb.text_document_loader import TextDocumentsLoader
 from safe_store.text_vectorizer import TextVectorizer
 import json
 import re
--- a/lollms/functions/tts/read_text.py
+++ b/lollms/functions/tts/read_text.py
@ -7,7 +7,6 @@ from typing import Union
 from lollms.utilities import PackageManager
 from lollms.personality import APScript
 from lollms.tts import LollmsTTS
 from safe_store import GenericDataLoader
 from ascii_colors import trace_exception
 # Here is the core of the function to be built
--- a/lollms/functions/tts/read_text_from_file.py
+++ b/lollms/functions/tts/read_text_from_file.py
@ -7,7 +7,7 @@ from typing import Union
 from lollms.utilities import PackageManager
 from lollms.personality import APScript
 from lollms.tts import LollmsTTS
-from safe_store import GenericDataLoader
+from lollmsvectordb import TextDocumentsLoader
 from ascii_colors import trace_exception
 # Here is the core of the function to be built
@ -28,7 +28,7 @@ def read_text_from_file(file_path: Union[Path, str], tts_module:LollmsTTS, llm:A
        file_path = Path(file_path)
        # Read the text from the file
-        text = GenericDataLoader.read_file(file_path)
+        text = TextDocumentsLoader.read_file(file_path)
        # Generate audio from the text
        audio_file_path = tts_module.tts_audio(text,use_threading=True)
--- a/lollms/internet.py
+++ b/lollms/internet.py
@ -29,7 +29,7 @@ def get_root_url(url):
 def format_url_parameter(value:str):
-    encoded_value = value.strip().replace("\"","")
+    encoded_value = value.strip().replace("\"","").replace(" ","+")
    return encoded_value
@ -294,7 +294,6 @@ def internet_search(query, internet_nb_search_pages, chromedriver_path=None, qui
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod
    search_results = []
@ -349,9 +348,10 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
    nb_non_empty = 0
    # Configure Chrome options
    driver = prepare_chrome_driver(chromedriver_path)
-
+    qquery = format_url_parameter(query)
    url = f"https://duckduckgo.com/?q={qquery}&t=h_&ia=web"
    results = extract_results(
-                                f"https://duckduckgo.com/?q={format_url_parameter(query)}&t=h_&ia=web",
+                                url,
                                internet_nb_search_pages,
                                driver
                            )
@ -369,13 +369,11 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
            nb_non_empty += 1
            if nb_non_empty>=internet_nb_search_pages:
                break
        docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
        vectorizer.build_index()
        chunks = vectorizer.search(query, internet_vectorization_nb_chunks)
    else:
-        docs = ["The web search has failed. Try using another query"]
+        chunks = []
        sorted_similarities = [0]
        document_ids = ["duckduckgo.com"]
    # Close the browser
    driver.quit()
-    return docs, sorted_similarities, document_ids
+    return chunks
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -20,7 +20,7 @@ from lollmsvectordb.vector_database import VectorDatabase
 from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
 from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
 from lollmsvectordb.text_document_loader import TextDocumentsLoader
-
+from lollmsvectordb.database_elements.document import Document
 import pkg_resources
 from pathlib import Path
 from PIL import Image
@ -37,7 +37,11 @@ from lollms.types import MSG_TYPE, SUMMARY_MODE
 import json
 from typing import Any, List, Optional, Type, Callable, Dict, Any, Union
 import json
-from safe_store import TextVectorizer, GenericDataLoader, VisualizationMethod, VectorizationMethod, DocumentDecomposer
+from lollmsvectordb.vector_database import VectorDatabase
 from lollmsvectordb.text_document_loader import TextDocumentsLoader
 from lollmsvectordb.text_chunker import TextChunker
 import hashlib
 from functools import partial
 import sys
 from lollms.com import LoLLMsCom
@ -910,42 +914,34 @@ class AIPersonality:
        # Verify if the persona has a data folder
        if self.data_path.exists():
-            self.database_path = self.data_path / "db.json"
+            self.database_path = self.data_path / "db.sqlite"
-            if self.database_path.exists():
+            from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
-                ASCIIColors.info("Loading database ...",end="")
+            vectorizer = self.config.rag_vectorizer
-                self.persona_data_vectorizer = TextVectorizer(
+            if vectorizer == "bert":
-                            "tfidf_vectorizer", # self.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
+                from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
-                            model=self.model, #needed in case of using model_embedding
+                v = BERTVectorizer()
-                            save_db=True,
+            elif vectorizer == "tfidf":
-                            database_path=self.database_path,
+                from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
-                            data_visualization_method=VisualizationMethod.PCA,
+                v = TFIDFVectorizer()
-                            database_dict=None)
+            elif vectorizer == "word2vec":
-                ASCIIColors.green("Ok")
+                from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
-            else:
+                v = Word2VecVectorizer()
-                files = [f for f in self.data_path.iterdir() if f.suffix.lower() in ['.asm', '.bat', '.c', '.cpp', '.cs', '.csproj', '.css',
+
-                    '.csv', '.docx', '.h', '.hh', '.hpp', '.html', '.inc', '.ini', '.java', '.js', '.json', '.log',
+            self.persona_data_vectorizer = VectorDatabase(self.database_path, v, TikTokenTokenizer(), self.config.rag_chunk_size, self.config.rag_overlap)
-                    '.lua', '.map', '.md', '.pas', '.pdf', '.php', '.pptx', '.ps1', '.py', '.rb', '.rtf', '.s', '.se', '.sh', '.sln',
+
-                    '.snippet', '.snippets', '.sql', '.sym', '.ts', '.txt', '.xlsx', '.xml', '.yaml', '.yml', '.msg'] ]
+            files = [f for f in self.data_path.iterdir() if f.suffix.lower() in ['.asm', '.bat', '.c', '.cpp', '.cs', '.csproj', '.css',
-                if len(files)>0:
+                '.csv', '.docx', '.h', '.hh', '.hpp', '.html', '.inc', '.ini', '.java', '.js', '.json', '.log',
-                    dl = GenericDataLoader()
+                '.lua', '.map', '.md', '.pas', '.pdf', '.php', '.pptx', '.ps1', '.py', '.rb', '.rtf', '.s', '.se', '.sh', '.sln',
-                    self.persona_data_vectorizer = TextVectorizer(
+                '.snippet', '.snippets', '.sql', '.sym', '.ts', '.txt', '.xlsx', '.xml', '.yaml', '.yml', '.msg'] ]
-                                "tfidf_vectorizer", # self.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
+            dl = TextDocumentsLoader()
-                                model=self.model, #needed in case of using model_embedding
+
-                                save_db=True,
+            for f in files:
-                                database_path=self.database_path,
+                text = dl.read_file(f)
-                                data_visualization_method=VisualizationMethod.PCA,
+                self.persona_data_vectorizer.add_document(f.name, text, f)
-                                database_dict=None)
+                # data_vectorization_chunk_size: 512 # chunk size
-                    for f in files:
+                # data_vectorization_overlap_size: 128 # overlap between chunks size
-                        text = dl.read_file(f)
+                # data_vectorization_nb_chunks: 2 # number of chunks to use
-                        self.persona_data_vectorizer.add_document(f.name,text,self.config.data_vectorization_chunk_size, self.config.data_vectorization_overlap_size)
+            self.persona_data_vectorizer.build_index()
                        # data_vectorization_chunk_size: 512 # chunk size
                        # data_vectorization_overlap_size: 128 # overlap between chunks size
                        # data_vectorization_nb_chunks: 2 # number of chunks to use
                    self.persona_data_vectorizer.index()
                    self.persona_data_vectorizer.save_db()
                else:
                    self.persona_data_vectorizer = None
                    self._data = None
        else:
            self.persona_data_vectorizer = None
@ -1820,7 +1816,7 @@ class AIPersonality:
        while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
            self.step_start(f"Comprerssing {doc_name}...")
            chunk_size = int(self.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
+            document_chunks =TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks,
                                            summary_instruction, 
@ -1831,7 +1827,6 @@ class AIPersonality:
                                            chunk_summary_post_processing=chunk_summary_post_processing,
                                            summary_mode=summary_mode)
            tk = self.model.tokenize(text)
            tk = self.model.tokenize(text)
            dtk_ln=prev_len-len(tk)
            prev_len = len(tk)
            self.step(f"Current text size : {prev_len}, max summary size : {max_summary_size}")
@ -1857,7 +1852,7 @@ class AIPersonality:
        prev_len = len(tk)
        while len(tk)>max_summary_size:
            chunk_size = int(self.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
+            document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks, 
                                            data_extraction_instruction, 
@ -2548,7 +2543,7 @@ class APScript(StateMachine):
        while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
            self.step_start(f"Comprerssing {doc_name}...")
            chunk_size = int(self.personality.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
+            document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks,
                                            summary_instruction, 
@ -2585,7 +2580,7 @@ class APScript(StateMachine):
        prev_len = len(tk)
        while len(tk)>max_summary_size:
            chunk_size = int(self.personality.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
+            document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks, 
                                            data_extraction_instruction, 
@ -2893,15 +2888,25 @@ class APScript(StateMachine):
        return self.personality.internet_search_with_vectorization(query, quick_search=quick_search)
-    def vectorize_and_query(self, text, query, max_chunk_size=512, overlap_size=20, internet_vectorization_nb_chunks=3):
+    def vectorize_and_query(self, title, url, text, query, max_chunk_size=512, overlap_size=20, internet_vectorization_nb_chunks=3):
-        vectorizer = TextVectorizer(VectorizationMethod.TFIDF_VECTORIZER, model = self.personality.model)
+        
-        decomposer = DocumentDecomposer()
+        from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
-        chunks = decomposer.decompose_document(text, max_chunk_size, overlap_size,self.personality.model.tokenize,self.personality.model.detokenize)
+        vectorizer = self.config.rag_vectorizer
-        for i, chunk in enumerate(chunks):
+        if vectorizer == "bert":
-            vectorizer.add_document(f"chunk_{i}", self.personality.model.detokenize(chunk))
+            from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
-        vectorizer.index()
+            v = BERTVectorizer()
-        docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
+        elif vectorizer == "tfidf":
-        return docs, sorted_similarities
+            from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
            v = TFIDFVectorizer()
        elif vectorizer == "word2vec":
            from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
            v = Word2VecVectorizer()
        vectorizer = VectorDatabase("", v, TikTokenTokenizer(), self.config.rag_chunk_size, self.config.rag_overlap)
        vectorizer.add_document(title, text, url)
        vectorizer.build_index()
        chunks = vectorizer.search(query, internet_vectorization_nb_chunks)
        return chunks
    def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
--- a/lollms/server/endpoints/lollms_discussion.py
+++ b/lollms/server/endpoints/lollms_discussion.py
@ -18,7 +18,6 @@ from ascii_colors import ASCIIColors
 from lollms.databases.discussions_database import DiscussionsDB, Discussion
 from typing import List
 import shutil
 from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
 import tqdm
 from pathlib import Path
 class GenerateRequest(BaseModel):
--- a/lollms/server/endpoints/lollms_tts.py
+++ b/lollms/server/endpoints/lollms_tts.py
@ -14,7 +14,7 @@ from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
 from lollms.main_config import BaseConfig
-from lollms.utilities import output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
+from lollms.utilities import find_next_available_filename, output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
 from lollms.security import sanitize_path, validate_path, check_access
 from pathlib import Path
 from ascii_colors import ASCIIColors
@ -176,8 +176,7 @@ async def text2Wave(request: LollmsText2AudioRequest):
        request.fn = (lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn
        validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
    else:
-        request.fn = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out"/"tts2audio.wav"
+        request.fn = find_next_available_filename(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out", "tts_out","wave")
    # Verify the path exists
    request.fn.parent.mkdir(exist_ok=True, parents=True)
@ -236,6 +235,7 @@ def start_xtts():
            lollmsElfServer.tts = LollmsXTTS(
                lollmsElfServer, 
                voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path],                                        
                freq=lollmsElfServer.config.xtts_freq
            )
        lollmsElfServer.HideBlockingMessage()
    except Exception as ex:
--- a/lollms/server/endpoints/lollms_user.py
+++ b/lollms/server/endpoints/lollms_user.py
@ -18,7 +18,6 @@ from ascii_colors import ASCIIColors
 from lollms.databases.discussions_database import DiscussionsDB
 from lollms.security import check_access
 from pathlib import Path
 from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
 import tqdm
 from fastapi import FastAPI, UploadFile, File
 import shutil
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -34,8 +34,9 @@ from queue import Queue
 import re
 class LollmsXTTS(LollmsTTS):
-    def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]):
+    def __init__(self, app: LollmsApplication, voices_folders: List[str|Path], freq = 22050):
        super().__init__("lollms_xtts", app)
        self.freq = freq
        self.generation_threads = {}
        self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"]
        self.stop_event = threading.Event()
@ -75,7 +76,7 @@ class LollmsXTTS(LollmsTTS):
    def get(app: LollmsApplication) -> 'LollmsXTTS':
        # Verify if the service is installed and if true then return an instance of LollmsXTTS
        if LollmsXTTS.verify(app.lollms_paths):
-            return LollmsXTTS(app, app.lollms_paths.custom_voices_path)
+            return LollmsXTTS(app, app.lollms_paths.custom_voices_path, freq=app.config.xtts_freq)
        else:
            raise Exception("LollmsXTTS service is not installed properly.")
    def get_speaker_wav(self, speaker) -> Path:
@ -147,7 +148,7 @@ class LollmsXTTS(LollmsTTS):
            if wav is None:
                # Play any remaining buffered sentences
                for buffered_wav in buffer:
-                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
+                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, self.freq)
                    self.play_obj.wait_done()
                    time.sleep(0.5)  # Pause between sentences
                ASCIIColors.green("Audio done")
@ -156,7 +157,7 @@ class LollmsXTTS(LollmsTTS):
            buffered_sentences += 1
            if buffered_sentences >= 2:
                for buffered_wav in buffer:
-                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
+                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, self.freq)
                    self.play_obj.wait_done()
                    time.sleep(0.5)  # Pause between sentences
                buffer = []
@ -166,7 +167,7 @@ class LollmsXTTS(LollmsTTS):
        with wave.open(str(file_name_or_path), 'wb') as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
-            wf.setframerate(22050)
+            wf.setframerate(self.freq)
            for wav in wav_data:
                wf.writeframes(wav.tobytes())
--- a/lollms/tasks.py
+++ b/lollms/tasks.py
@ -7,7 +7,10 @@ from ascii_colors import ASCIIColors
 from lollms.types import MSG_TYPE, SUMMARY_MODE
 from lollms.com import LoLLMsCom
 from lollms.utilities import PromptReshaper, remove_text_from_string, process_ai_output
-from safe_store import DocumentDecomposer
+from lollmsvectordb.text_chunker import TextChunker
 from lollmsvectordb.database_elements.document import Document
 from lollmsvectordb.directory_binding import DirectoryBinding
 import hashlib
 import json
 class TasksLibrary:
    def __init__(self, lollms:LoLLMsCom, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None) -> None:
@ -566,7 +569,11 @@ class TasksLibrary:
        while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
            self.step_start(f"Comprerssing {doc_name}... [depth {depth+1}]")
            chunk_size = int(self.lollms.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
+            tc = TextChunker(chunk_size, 0, model= self.lollms.model)
            hasher = hashlib.md5()
            hasher.update(text.encode("utf8"))
            document_chunks = tc.get_text_chunks(text, Document(hasher.hexdigest(), doc_name ) )
            text = self.summarize_chunks(
                                            document_chunks,
                                            summary_instruction, 
@ -577,7 +584,6 @@ class TasksLibrary:
                                            chunk_summary_post_processing=chunk_summary_post_processing,
                                            summary_mode=summary_mode)
            tk = self.lollms.model.tokenize(text)
            tk = self.lollms.model.tokenize(text)
            dtk_ln=prev_len-len(tk)
            prev_len = len(tk)
            self.step(f"Current text size : {prev_len}, max summary size : {max_summary_size}")
--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@ -608,7 +608,7 @@ def add_period(text):
    processed_text = '\n'.join(processed_lines)
    return processed_text
-def find_next_available_filename(folder_path, prefix):
+def find_next_available_filename(folder_path, prefix, extension="png"):
    folder = Path(folder_path)
    if not folder.exists():
@ -616,7 +616,7 @@ def find_next_available_filename(folder_path, prefix):
    index = 1
    while True:
-        next_filename = f"{prefix}_{index}.png"
+        next_filename = f"{prefix}_{index}.{extension}"
        potential_file = folder / next_filename
        if not potential_file.exists():
            return potential_file
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -1,35 +1,53 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 81
+version: 118
 binding_name: null
 model_name: null
 model_variant: null
 model_type: null
-show_news_panel: True
+show_news_panel: true
 # Security measures
-turn_on_setting_update_validation: True
+turn_on_setting_update_validation: true
-turn_on_code_execution: True
+turn_on_code_execution: true
-turn_on_code_validation: True
+turn_on_code_validation: true
-turn_on_open_file_validation: False
+turn_on_open_file_validation: true
-turn_on_send_file_validation: False
+turn_on_send_file_validation: true
 turn_on_language_validation: true
 force_accept_remote_access: false
 # Server information
-headless_server_mode: False
+headless_server_mode: false
 allowed_origins: []
 # Host information
 host: localhost
 port: 9600
 app_custom_logo: ""
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
 start_header_id_template: "!@>"
 end_header_id_template: ": "
 separator_template: "\n"
 start_user_header_id_template: "!@>"
 end_user_header_id_template: ": "
 end_user_message_id_template: ""
 start_ai_header_id_template: "!@>"
 end_ai_header_id_template: ": "
 end_ai_message_id_template: ""
 system_message_template: "system"
 seed: -1
 ctx_size: 4084
 max_n_predict: 4096
-min_n_predict: 512
+min_n_predict: 1024
 temperature: 0.9
 top_k: 50
 top_p: 0.95
@ -50,14 +68,14 @@ user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 use_model_name_in_discussions: false
-user_avatar: default_user.svg
+user_avatar: null
 use_user_informations_in_discussion: false
 # UI parameters
 discussion_db_name: default
 # Automatic updates
-debug: False
+debug: false
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -77,23 +95,104 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false
 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
 active_tts_service: "None" # xtts (offline), openai_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
 # -------------------- Services --------------------------
 # ***************** STT *****************
 stt_input_device: 0
 # STT service
 stt_listening_threshold: 1000
 stt_silence_duration: 2
 stt_sound_threshold_percentage: 10
 stt_gain: 1.0 
 stt_rate: 44100
 stt_channels: 1
 stt_buffer_size: 10
 stt_activate_word_detection: false
 stt_word_detection_file: null
 # ASR STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000
 # openai_whisper configuration
 openai_whisper_key: ""
 openai_whisper_model: "whisper-1"
 # whisper configuration
 whisper_activate: false
 whisper_model: base
 # ***************** TTS *****************
 tts_output_device: 0
 # Voice service
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
 xtts_stream_chunk_size: 100
 xtts_temperature: 0.75
 xtts_length_penalty: 1.0
 xtts_repetition_penalty: 5.0
 xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"
 # ***************** TTI *****************
 use_negative_prompt: true
 use_ai_generated_negative_prompt: false
 negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
 default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860
 # Image generation service
 enable_fooocus_service: false
 fooocus_base_url: http://localhost:7860
 # diffuser
 diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
 diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
 # Dall e service key
 dall_e_key: ""
 dall_e_generation_engine: "dall-e-3"
 # Midjourney service key
 midjourney_key: ""
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
 comfyui_model: v1-5-pruned-emaonly.ckpt
 # Motion control service
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861
 # ***************** TTT *****************
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -107,6 +206,11 @@ petals_device: cuda
 # lollms service
 enable_lollms_service: false
 lollms_base_url: http://localhost:1234
 lollms_access_keys : "" # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
 activate_mistralai_emulator: true
 # elastic search service
 elastic_search_service: false
@ -131,13 +235,22 @@ audio_auto_send_input: true
 audio_silenceTimer: 5000
 # Data vectorization
 rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
 rag_vectorizer: bert # possible values bert, tfidf, word2vec
 rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
 summarize_discussion: false # activate discussion summary (better but adds computation time)
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
 use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,20 +267,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
 # Activate internet search
 activate_internet_search: false
 activate_internet_pages_judgement: true
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
+internet_vectorization_overlap_size: 0 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
+internet_vectorization_nb_chunks: 4 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
+internet_nb_search_pages: 8 # number of pages to select
-internet_quick_search: False # If active the search engine will not load and read the webpages
+internet_quick_search: false # If active the search engine will not load and read the webpages
-internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
+internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
 pdf_latex_path: null
 # boosting information
 positive_boost: null
 negative_boost: null
-current_language: null
+current_language: english
-fun_mode: False
+fun_mode: false
 # webui configurations
@ -175,5 +289,3 @@ show_code_of_conduct: true
 activate_audio_infos: true
 # whisper configuration
 whisper_model: base
--- a/requirements.txt
+++ b/requirements.txt
@ -6,7 +6,7 @@ setuptools
 requests
-safe_store
+lollmsvectordb
 pipmaster
 ascii_colors>=0.1.3
 beautifulsoup4
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@ -5,8 +5,8 @@ wget
 setuptools
 requests
 safe_store
 ascii_colors>=0.1.3
 lollmsvectordb
 autopep8