upgraded vector db

2024-12-18 20:27:58 +00:00 · 2024-07-18 01:32:11 +02:00 · 2024-07-18 01:32:11 +02:00 · 1437b2c40d
commit 1437b2c40d
parent 6f40981651
20 changed files with 513 additions and 175 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 125
+version: 127
 binding_name: null
 model_name: null
 model_variant: null
@ -153,6 +153,7 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
+xtts_freq: 22050

 # openai_whisper configuration
 openai_tts_key: ""
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -1,35 +1,53 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 81
+version: 118
 binding_name: null
 model_name: null
 model_variant: null
 model_type: null

-show_news_panel: True
+show_news_panel: true

 # Security measures
-turn_on_setting_update_validation: True
-turn_on_code_execution: True
-turn_on_code_validation: True
-turn_on_open_file_validation: False
-turn_on_send_file_validation: False
+turn_on_setting_update_validation: true
+turn_on_code_execution: true
+turn_on_code_validation: true
+turn_on_open_file_validation: true
+turn_on_send_file_validation: true
+turn_on_language_validation: true

 force_accept_remote_access: false

 # Server information
-headless_server_mode: False
+headless_server_mode: false
 allowed_origins: []

 # Host information
 host: localhost
 port: 9600

+app_custom_logo: ""
+
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
+start_header_id_template: "!@>"
+end_header_id_template: ": "
+
+separator_template: "\n"
+
+start_user_header_id_template: "!@>"
+end_user_header_id_template: ": "
+end_user_message_id_template: ""
+
+start_ai_header_id_template: "!@>"
+end_ai_header_id_template: ": "
+end_ai_message_id_template: ""
+
+system_message_template: "system"
+
 seed: -1
 ctx_size: 4084
 max_n_predict: 4096
-min_n_predict: 512
+min_n_predict: 1024
 temperature: 0.9
 top_k: 50
 top_p: 0.95
@ -50,14 +68,14 @@ user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 use_model_name_in_discussions: false
-user_avatar: default_user.svg
+user_avatar: null
 use_user_informations_in_discussion: false

 # UI parameters
 discussion_db_name: default

 # Automatic updates
-debug: False
+debug: false
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -77,23 +95,104 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false

+# -------------------- Services global configurations --------------------------
+# Select the active test to speach, text to image and speach to text services
+active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tti_service: "None" # autosd (offline), dall-e (online)
+active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
+active_ttm_service: "None" # musicgen (offline)
+# -------------------- Services --------------------------
+
+# ***************** STT *****************
+stt_input_device: 0
+
+
+# STT service
+stt_listening_threshold: 1000
+stt_silence_duration: 2
+stt_sound_threshold_percentage: 10
+stt_gain: 1.0 
+stt_rate: 44100
+stt_channels: 1
+stt_buffer_size: 10
+
+stt_activate_word_detection: false
+stt_word_detection_file: null
+
+
+
+# ASR STT service 
+asr_enable: false
+asr_base_url: http://localhost:9000
+
+# openai_whisper configuration
+openai_whisper_key: ""
+openai_whisper_model: "whisper-1"
+
+
+# whisper configuration
+whisper_activate: false
+whisper_model: base
+
+
+# ***************** TTS *****************
+tts_output_device: 0
+
 # Voice service
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
+xtts_stream_chunk_size: 100
+xtts_temperature: 0.75
+xtts_length_penalty: 1.0
+xtts_repetition_penalty: 5.0
+xtts_top_k: 50
+xtts_top_p: 0.85
+xtts_speed: 1
+xtts_enable_text_splitting: true
+
+# openai_whisper configuration
+openai_tts_key: ""
+openai_tts_model: "tts-1"
+openai_tts_voice: "alloy"
+
+# ***************** TTI *****************
+
+use_negative_prompt: true
+use_ai_generated_negative_prompt: false
+negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
+default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))

 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860

+# Image generation service
+enable_fooocus_service: false
+fooocus_base_url: http://localhost:7860
+
+# diffuser
+diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
+diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
+
+# Dall e service key
+dall_e_key: ""
+dall_e_generation_engine: "dall-e-3"
+
+# Midjourney service key
+midjourney_key: ""
+
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
+comfyui_model: v1-5-pruned-emaonly.ckpt

 # Motion control service
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861

+# ***************** TTT *****************
+
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -107,6 +206,11 @@ petals_device: cuda
 # lollms service
 enable_lollms_service: false
 lollms_base_url: http://localhost:1234
+lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+activate_lollms_server: true
+activate_ollama_emulator: true
+activate_openai_emulator: true
+activate_mistralai_emulator: true

 # elastic search service
 elastic_search_service: false
@ -131,13 +235,22 @@ audio_auto_send_input: true
 audio_silenceTimer: 5000

 # Data vectorization
+rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
+rag_vectorizer: bert # possible values bert, tfidf, word2vec
+rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
+rag_vectorizer_parameters: null # Parameters of the model in json format
+rag_chunk_size: 512 # number of tokens per chunk
+rag_n_chunks: 4 #Number of chunks to recover from the database
+rag_clean_chunks: true #Removed all uinecessary spaces and line returns
+rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
+rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
+rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summarize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,12 +267,13 @@ data_vectorization_make_persistance: false # If true, the data will be persistan

 # Activate internet search
 activate_internet_search: false
+activate_internet_pages_judgement: true
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
-internet_quick_search: False # If active the search engine will not load and read the webpages
-internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
+internet_vectorization_overlap_size: 0 # overlap between chunks size
+internet_vectorization_nb_chunks: 4 # number of chunks to use
+internet_nb_search_pages: 8 # number of pages to select
+internet_quick_search: false # If active the search engine will not load and read the webpages
+internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
 pdf_latex_path: null

@ -167,7 +281,7 @@ pdf_latex_path: null
 positive_boost: null
 negative_boost: null
 current_language: english
-fun_mode: False
+fun_mode: false


 # webui configurations
@ -175,5 +289,3 @@ show_code_of_conduct: true
 activate_audio_infos: true


-# whisper configuration
-whisper_model: base
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -1,35 +1,53 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 81
+version: 118
 binding_name: null
 model_name: null
 model_variant: null
 model_type: null

-show_news_panel: True
+show_news_panel: true

 # Security measures
-turn_on_setting_update_validation: True
-turn_on_code_execution: True
-turn_on_code_validation: True
-turn_on_open_file_validation: False
-turn_on_send_file_validation: False
+turn_on_setting_update_validation: true
+turn_on_code_execution: true
+turn_on_code_validation: true
+turn_on_open_file_validation: true
+turn_on_send_file_validation: true
+turn_on_language_validation: true

 force_accept_remote_access: false

 # Server information
-headless_server_mode: False
+headless_server_mode: false
 allowed_origins: []

 # Host information
 host: localhost
 port: 9600

+app_custom_logo: ""
+
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
+start_header_id_template: "!@>"
+end_header_id_template: ": "
+
+separator_template: "\n"
+
+start_user_header_id_template: "!@>"
+end_user_header_id_template: ": "
+end_user_message_id_template: ""
+
+start_ai_header_id_template: "!@>"
+end_ai_header_id_template: ": "
+end_ai_message_id_template: ""
+
+system_message_template: "system"
+
 seed: -1
 ctx_size: 4084
 max_n_predict: 4096
-min_n_predict: 512
+min_n_predict: 1024
 temperature: 0.9
 top_k: 50
 top_p: 0.95
@ -50,14 +68,14 @@ user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 use_model_name_in_discussions: false
-user_avatar: default_user.svg
+user_avatar: null
 use_user_informations_in_discussion: false

 # UI parameters
 discussion_db_name: default

 # Automatic updates
-debug: False
+debug: false
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -77,23 +95,104 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false

+# -------------------- Services global configurations --------------------------
+# Select the active test to speach, text to image and speach to text services
+active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tti_service: "None" # autosd (offline), dall-e (online)
+active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
+active_ttm_service: "None" # musicgen (offline)
+# -------------------- Services --------------------------
+
+# ***************** STT *****************
+stt_input_device: 0
+
+
+# STT service
+stt_listening_threshold: 1000
+stt_silence_duration: 2
+stt_sound_threshold_percentage: 10
+stt_gain: 1.0 
+stt_rate: 44100
+stt_channels: 1
+stt_buffer_size: 10
+
+stt_activate_word_detection: false
+stt_word_detection_file: null
+
+
+
+# ASR STT service 
+asr_enable: false
+asr_base_url: http://localhost:9000
+
+# openai_whisper configuration
+openai_whisper_key: ""
+openai_whisper_model: "whisper-1"
+
+
+# whisper configuration
+whisper_activate: false
+whisper_model: base
+
+
+# ***************** TTS *****************
+tts_output_device: 0
+
 # Voice service
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
+xtts_stream_chunk_size: 100
+xtts_temperature: 0.75
+xtts_length_penalty: 1.0
+xtts_repetition_penalty: 5.0
+xtts_top_k: 50
+xtts_top_p: 0.85
+xtts_speed: 1
+xtts_enable_text_splitting: true
+
+# openai_whisper configuration
+openai_tts_key: ""
+openai_tts_model: "tts-1"
+openai_tts_voice: "alloy"
+
+# ***************** TTI *****************
+
+use_negative_prompt: true
+use_ai_generated_negative_prompt: false
+negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
+default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))

 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860

+# Image generation service
+enable_fooocus_service: false
+fooocus_base_url: http://localhost:7860
+
+# diffuser
+diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
+diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
+
+# Dall e service key
+dall_e_key: ""
+dall_e_generation_engine: "dall-e-3"
+
+# Midjourney service key
+midjourney_key: ""
+
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
+comfyui_model: v1-5-pruned-emaonly.ckpt

 # Motion control service
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861

+# ***************** TTT *****************
+
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -107,6 +206,11 @@ petals_device: cuda
 # lollms service
 enable_lollms_service: false
 lollms_base_url: http://localhost:1234
+lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+activate_lollms_server: true
+activate_ollama_emulator: true
+activate_openai_emulator: true
+activate_mistralai_emulator: true

 # elastic search service
 elastic_search_service: false
@ -131,13 +235,22 @@ audio_auto_send_input: true
 audio_silenceTimer: 5000

 # Data vectorization
+rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
+rag_vectorizer: bert # possible values bert, tfidf, word2vec
+rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
+rag_vectorizer_parameters: null # Parameters of the model in json format
+rag_chunk_size: 512 # number of tokens per chunk
+rag_n_chunks: 4 #Number of chunks to recover from the database
+rag_clean_chunks: true #Removed all uinecessary spaces and line returns
+rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
+rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
+rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summarize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,20 +267,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan

 # Activate internet search
 activate_internet_search: false
+activate_internet_pages_judgement: true
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
-internet_quick_search: False # If active the search engine will not load and read the webpages
-internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
+internet_vectorization_overlap_size: 0 # overlap between chunks size
+internet_vectorization_nb_chunks: 4 # number of chunks to use
+internet_nb_search_pages: 8 # number of pages to select
+internet_quick_search: false # If active the search engine will not load and read the webpages
+internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
 pdf_latex_path: null

 # boosting information
 positive_boost: null
 negative_boost: null
-current_language: null
-fun_mode: False
+current_language: english
+fun_mode: false


 # webui configurations
@ -175,5 +289,3 @@ show_code_of_conduct: true
 activate_audio_infos: true


-# whisper configuration
-whisper_model: base
--- a/lollms/app.py
+++ b/lollms/app.py
@ -13,7 +13,6 @@ from lollms.utilities import PromptReshaper
 from lollms.client_session import Client, Session
 from lollms.databases.skills_database import SkillsLibrary
 from lollms.tasks import TasksLibrary
-from safe_store import TextVectorizer, VectorizationMethod, VisualizationMethod

 from lollmsvectordb.database_elements.chunk import Chunk
 from lollmsvectordb.vector_database import VectorDatabase
@ -335,7 +334,7 @@ class LollmsApplication(LoLLMsCom):
                trace_exception(ex)

        ASCIIColors.blue("Loading local TTS services")
-        if self.config.xtts_enable or self.config.active_tts_service == "xtts":
+        if self.config.active_tts_service == "xtts":
            ASCIIColors.yellow("Loading XTTS")
            try:
                from lollms.services.xtts.lollms_xtts import LollmsXTTS
@ -348,6 +347,7 @@ class LollmsApplication(LoLLMsCom):
                self.xtts = LollmsXTTS(
                                        self,
                                        voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
+                                        freq=self.config.xtts_freq
                                    )
            except Exception as ex:
                trace_exception(ex)
@ -448,7 +448,7 @@ class LollmsApplication(LoLLMsCom):
                    trace_exception(ex)
                    
            ASCIIColors.blue("Loading loacal TTS services")
-            if (self.config.xtts_enable or self.config.active_tts_service == "xtts") and self.xtts is None:
+            if self.config.active_tts_service == "xtts" and self.xtts is None:
                ASCIIColors.yellow("Loading XTTS")
                try:
                    from lollms.services.xtts.lollms_xtts import LollmsXTTS
@ -461,6 +461,7 @@ class LollmsApplication(LoLLMsCom):
                    self.xtts = LollmsXTTS(
                                            self,
                                            voices_folders=[voices_folder, self.lollms_paths.custom_voices_path], 
+                                            freq=self.config.xtts_freq
                                        )
                except Exception as ex:
                    trace_exception(ex)
@ -532,17 +533,6 @@ class LollmsApplication(LoLLMsCom):
            trace_exception(ex)
            

-    def build_long_term_skills_memory(self):
-        discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
-        discussion_db_name.mkdir(exist_ok=True, parents=True)
-        self.long_term_memory = TextVectorizer(
-                vectorization_method=VectorizationMethod.TFIDF_VECTORIZER,
-                model=self.model,
-                database_path=discussion_db_name/"skills_memory.json",
-                save_db=True,
-                data_visualization_method=VisualizationMethod.PCA,
-            )
-        return self.long_term_memory
    
    def process_chunk(
                        self, 
@ -969,6 +959,7 @@ class LollmsApplication(LoLLMsCom):
                        f"{self.start_header_id_template}websearch query{self.end_header_id_template}"
                    ])
                    query = self.personality.fast_gen(q, max_generation_size=256, show_progress=True, callback=self.personality.sink)
+                    query = query.replace("\"","")
                    self.personality.step_end("Crafting internet search query")
                    self.personality.step(f"web search query: {query}")

@ -979,12 +970,12 @@ class LollmsApplication(LoLLMsCom):

                    internet_search_results=f"{self.system_full_header}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{self.separator_template}{self.start_header_id_template}Web search results{self.end_header_id_template}\n"

-                    docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)
+                    chunks:List[Chunk] = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)
                    
-                    if len(docs)>0:
-                        for doc, infos,document_id in zip(docs, sorted_similarities, document_ids):
-                            internet_search_infos.append(document_id)
-                            internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{document_id['url']}\nchunk_title:{document_id['title']}\ncontent:{doc}\n"
+                    if len(chunks)>0:
+                        for chunk in chunks:
+                            internet_search_infos.append(chunk.doc.title)
+                            internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{chunk.doc.path}\nchunk_title:{chunk.doc.title}\ncontent:{doc}\n"
                    else:
                        internet_search_results += "The search response was empty!\nFailed to recover useful information from the search engine.\n"
                    if self.config.internet_quick_search:
@ -1051,9 +1042,12 @@ class LollmsApplication(LoLLMsCom):
                            docs = v.list_documents()
                            for doc in docs:
                                document=v.get_document(document_path = doc["path"])
-                                self.personality.step_start(f"Summeryzing document {doc['path']}")
-                                summary = self.personality.summarize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
-                                self.personality.step_end(f"Summeryzing document {doc['path']}")
+                                self.personality.step_start(f"Summaryzing document {doc['path']}")
+                                def post_process(summary):
+                                    return summary
+                                summary = self.personality.summarize_text(document, 
+                                                                        f"Extract information from the following text chunk to answer this request.\n{self.system_custom_header('query')}{query}", chunk_summary_post_processing=post_process, callback=self.personality.sink)
+                                self.personality.step_end(f"Summaryzing document {doc['path']}")
                                document_infos = f"{self.separator_template}".join([
                                    self.system_custom_header('document contextual summary'),
                                    f"source_document_title:{doc['title']}",
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 125
+version: 127
 binding_name: null
 model_name: null
 model_variant: null
@ -153,6 +153,7 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
+xtts_freq: 22050

 # openai_whisper configuration
 openai_tts_key: ""
--- a/lollms/databases/discussions_database.py
+++ b/lollms/databases/discussions_database.py
@ -7,9 +7,8 @@ from lollms.types import MSG_TYPE
 from lollms.types import BindingType
 from lollms.utilities import PackageManager, discussion_path_to_url
 from lollms.paths import LollmsPaths
-from lollms.databases.skills_database import SkillsLibrary
 from lollms.com import LoLLMsCom
-from safe_store import TextVectorizer, VisualizationMethod, GenericDataLoader
+
 from lollmsvectordb.vector_database import VectorDatabase
 from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
 from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
@ -671,7 +670,7 @@ class Discussion:
            
            if len(self.vectorizer.list_documents())==0 and len(self.text_files)>0:
                for path in self.text_files:
-                    data = GenericDataLoader.read_file(path)
+                    data = TextDocumentsLoader.read_file(path)
                    try:
                        self.vectorizer.add_document(path.stem, data, path, True)
                    except Exception as ex:
@ -833,7 +832,7 @@ class Discussion:
                    return True
            except Exception as e:
                trace_exception(e)
-                self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id)
+                self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {TextDocumentsLoader.get_supported_file_types()}",client_id=client.client_id)
                return False

    def load_message(self, id):
--- a/lollms/functions/knowledge/build_knowledge_db.py
+++ b/lollms/functions/knowledge/build_knowledge_db.py
@ -1,6 +1,6 @@
 from pathlib import Path
 from lollms.personality import APScript
-from safe_store.generic_data_loader import GenericDataLoader
+from lollmsvectordb.text_document_loader import TextDocumentsLoader
 from safe_store.text_vectorizer import TextVectorizer
 import json
 import re
--- a/lollms/functions/tts/read_text.py
+++ b/lollms/functions/tts/read_text.py
@ -7,7 +7,6 @@ from typing import Union
 from lollms.utilities import PackageManager
 from lollms.personality import APScript
 from lollms.tts import LollmsTTS
-from safe_store import GenericDataLoader
 from ascii_colors import trace_exception

 # Here is the core of the function to be built
--- a/lollms/functions/tts/read_text_from_file.py
+++ b/lollms/functions/tts/read_text_from_file.py
@ -7,7 +7,7 @@ from typing import Union
 from lollms.utilities import PackageManager
 from lollms.personality import APScript
 from lollms.tts import LollmsTTS
-from safe_store import GenericDataLoader
+from lollmsvectordb import TextDocumentsLoader
 from ascii_colors import trace_exception

 # Here is the core of the function to be built
@ -28,7 +28,7 @@ def read_text_from_file(file_path: Union[Path, str], tts_module:LollmsTTS, llm:A
        file_path = Path(file_path)
        
        # Read the text from the file
-        text = GenericDataLoader.read_file(file_path)
+        text = TextDocumentsLoader.read_file(file_path)
        
        # Generate audio from the text
        audio_file_path = tts_module.tts_audio(text,use_threading=True)
--- a/lollms/internet.py
+++ b/lollms/internet.py
@ -29,7 +29,7 @@ def get_root_url(url):


 def format_url_parameter(value:str):
-    encoded_value = value.strip().replace("\"","")
+    encoded_value = value.strip().replace("\"","").replace(" ","+")
    return encoded_value


@ -294,7 +294,6 @@ def internet_search(query, internet_nb_search_pages, chromedriver_path=None, qui

    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
-    from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod

    search_results = []

@ -349,9 +348,10 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
    nb_non_empty = 0
    # Configure Chrome options
    driver = prepare_chrome_driver(chromedriver_path)
-
+    qquery = format_url_parameter(query)
+    url = f"https://duckduckgo.com/?q={qquery}&t=h_&ia=web"
    results = extract_results(
-                                f"https://duckduckgo.com/?q={format_url_parameter(query)}&t=h_&ia=web",
+                                url,
                                internet_nb_search_pages,
                                driver
                            )
@ -369,13 +369,11 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
            nb_non_empty += 1
            if nb_non_empty>=internet_nb_search_pages:
                break
-        docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
        vectorizer.build_index()
+        chunks = vectorizer.search(query, internet_vectorization_nb_chunks)
    else:
-        docs = ["The web search has failed. Try using another query"]
-        sorted_similarities = [0]
-        document_ids = ["duckduckgo.com"]
+        chunks = []
    # Close the browser
    driver.quit()

-    return docs, sorted_similarities, document_ids
+    return chunks
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -20,7 +20,7 @@ from lollmsvectordb.vector_database import VectorDatabase
 from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
 from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
 from lollmsvectordb.text_document_loader import TextDocumentsLoader
-
+from lollmsvectordb.database_elements.document import Document
 import pkg_resources
 from pathlib import Path
 from PIL import Image
@ -37,7 +37,11 @@ from lollms.types import MSG_TYPE, SUMMARY_MODE
 import json
 from typing import Any, List, Optional, Type, Callable, Dict, Any, Union
 import json
-from safe_store import TextVectorizer, GenericDataLoader, VisualizationMethod, VectorizationMethod, DocumentDecomposer
+from lollmsvectordb.vector_database import VectorDatabase
+from lollmsvectordb.text_document_loader import TextDocumentsLoader
+from lollmsvectordb.text_chunker import TextChunker
+import hashlib
+
 from functools import partial
 import sys
 from lollms.com import LoLLMsCom
@ -910,42 +914,34 @@ class AIPersonality:

        # Verify if the persona has a data folder
        if self.data_path.exists():
-            self.database_path = self.data_path / "db.json"
-            if self.database_path.exists():
-                ASCIIColors.info("Loading database ...",end="")
-                self.persona_data_vectorizer = TextVectorizer(
-                            "tfidf_vectorizer", # self.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
-                            model=self.model, #needed in case of using model_embedding
-                            save_db=True,
-                            database_path=self.database_path,
-                            data_visualization_method=VisualizationMethod.PCA,
-                            database_dict=None)
-                ASCIIColors.green("Ok")
-            else:
+            self.database_path = self.data_path / "db.sqlite"
+            from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
+            vectorizer = self.config.rag_vectorizer
+            if vectorizer == "bert":
+                from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
+                v = BERTVectorizer()
+            elif vectorizer == "tfidf":
+                from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
+                v = TFIDFVectorizer()
+            elif vectorizer == "word2vec":
+                from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
+                v = Word2VecVectorizer()
+
+            self.persona_data_vectorizer = VectorDatabase(self.database_path, v, TikTokenTokenizer(), self.config.rag_chunk_size, self.config.rag_overlap)
+
            files = [f for f in self.data_path.iterdir() if f.suffix.lower() in ['.asm', '.bat', '.c', '.cpp', '.cs', '.csproj', '.css',
                '.csv', '.docx', '.h', '.hh', '.hpp', '.html', '.inc', '.ini', '.java', '.js', '.json', '.log',
                '.lua', '.map', '.md', '.pas', '.pdf', '.php', '.pptx', '.ps1', '.py', '.rb', '.rtf', '.s', '.se', '.sh', '.sln',
                '.snippet', '.snippets', '.sql', '.sym', '.ts', '.txt', '.xlsx', '.xml', '.yaml', '.yml', '.msg'] ]
-                if len(files)>0:
-                    dl = GenericDataLoader()
-                    self.persona_data_vectorizer = TextVectorizer(
-                                "tfidf_vectorizer", # self.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
-                                model=self.model, #needed in case of using model_embedding
-                                save_db=True,
-                                database_path=self.database_path,
-                                data_visualization_method=VisualizationMethod.PCA,
-                                database_dict=None)
+            dl = TextDocumentsLoader()
+
            for f in files:
                text = dl.read_file(f)
-                        self.persona_data_vectorizer.add_document(f.name,text,self.config.data_vectorization_chunk_size, self.config.data_vectorization_overlap_size)
+                self.persona_data_vectorizer.add_document(f.name, text, f)
                # data_vectorization_chunk_size: 512 # chunk size
                # data_vectorization_overlap_size: 128 # overlap between chunks size
                # data_vectorization_nb_chunks: 2 # number of chunks to use
-                    self.persona_data_vectorizer.index()
-                    self.persona_data_vectorizer.save_db()
-                else:
-                    self.persona_data_vectorizer = None
-                    self._data = None
+            self.persona_data_vectorizer.build_index()

        else:
            self.persona_data_vectorizer = None
@ -1820,7 +1816,7 @@ class AIPersonality:
        while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
            self.step_start(f"Comprerssing {doc_name}...")
            chunk_size = int(self.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
+            document_chunks =TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks,
                                            summary_instruction, 
@ -1831,7 +1827,6 @@ class AIPersonality:
                                            chunk_summary_post_processing=chunk_summary_post_processing,
                                            summary_mode=summary_mode)
            tk = self.model.tokenize(text)
-            tk = self.model.tokenize(text)
            dtk_ln=prev_len-len(tk)
            prev_len = len(tk)
            self.step(f"Current text size : {prev_len}, max summary size : {max_summary_size}")
@ -1857,7 +1852,7 @@ class AIPersonality:
        prev_len = len(tk)
        while len(tk)>max_summary_size:
            chunk_size = int(self.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
+            document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks, 
                                            data_extraction_instruction, 
@ -2548,7 +2543,7 @@ class APScript(StateMachine):
        while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
            self.step_start(f"Comprerssing {doc_name}...")
            chunk_size = int(self.personality.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
+            document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks,
                                            summary_instruction, 
@ -2585,7 +2580,7 @@ class APScript(StateMachine):
        prev_len = len(tk)
        while len(tk)>max_summary_size:
            chunk_size = int(self.personality.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
+            document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
            text = self.summarize_chunks(
                                            document_chunks, 
                                            data_extraction_instruction, 
@ -2893,15 +2888,25 @@ class APScript(StateMachine):
        return self.personality.internet_search_with_vectorization(query, quick_search=quick_search)


-    def vectorize_and_query(self, text, query, max_chunk_size=512, overlap_size=20, internet_vectorization_nb_chunks=3):
-        vectorizer = TextVectorizer(VectorizationMethod.TFIDF_VECTORIZER, model = self.personality.model)
-        decomposer = DocumentDecomposer()
-        chunks = decomposer.decompose_document(text, max_chunk_size, overlap_size,self.personality.model.tokenize,self.personality.model.detokenize)
-        for i, chunk in enumerate(chunks):
-            vectorizer.add_document(f"chunk_{i}", self.personality.model.detokenize(chunk))
-        vectorizer.index()
-        docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
-        return docs, sorted_similarities
+    def vectorize_and_query(self, title, url, text, query, max_chunk_size=512, overlap_size=20, internet_vectorization_nb_chunks=3):
+        
+        from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
+        vectorizer = self.config.rag_vectorizer
+        if vectorizer == "bert":
+            from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
+            v = BERTVectorizer()
+        elif vectorizer == "tfidf":
+            from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
+            v = TFIDFVectorizer()
+        elif vectorizer == "word2vec":
+            from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
+            v = Word2VecVectorizer()
+
+        vectorizer = VectorDatabase("", v, TikTokenTokenizer(), self.config.rag_chunk_size, self.config.rag_overlap)
+        vectorizer.add_document(title, text, url)
+        vectorizer.build_index()
+        chunks = vectorizer.search(query, internet_vectorization_nb_chunks)
+        return chunks


    def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
--- a/lollms/server/endpoints/lollms_discussion.py
+++ b/lollms/server/endpoints/lollms_discussion.py
@ -18,7 +18,6 @@ from ascii_colors import ASCIIColors
 from lollms.databases.discussions_database import DiscussionsDB, Discussion
 from typing import List
 import shutil
-from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
 import tqdm
 from pathlib import Path
 class GenerateRequest(BaseModel):
--- a/lollms/server/endpoints/lollms_tts.py
+++ b/lollms/server/endpoints/lollms_tts.py
@ -14,7 +14,7 @@ from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
 from lollms.main_config import BaseConfig
-from lollms.utilities import output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
+from lollms.utilities import find_next_available_filename, output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
 from lollms.security import sanitize_path, validate_path, check_access
 from pathlib import Path
 from ascii_colors import ASCIIColors
@ -176,8 +176,7 @@ async def text2Wave(request: LollmsText2AudioRequest):
        request.fn = (lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn
        validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
    else:
-        request.fn = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out"/"tts2audio.wav"
-
+        request.fn = find_next_available_filename(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out", "tts_out","wave")
    # Verify the path exists
    request.fn.parent.mkdir(exist_ok=True, parents=True)

@ -236,6 +235,7 @@ def start_xtts():
            lollmsElfServer.tts = LollmsXTTS(
                lollmsElfServer, 
                voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path],                                        
+                freq=lollmsElfServer.config.xtts_freq
            )
        lollmsElfServer.HideBlockingMessage()
    except Exception as ex:
--- a/lollms/server/endpoints/lollms_user.py
+++ b/lollms/server/endpoints/lollms_user.py
@ -18,7 +18,6 @@ from ascii_colors import ASCIIColors
 from lollms.databases.discussions_database import DiscussionsDB
 from lollms.security import check_access
 from pathlib import Path
-from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
 import tqdm
 from fastapi import FastAPI, UploadFile, File
 import shutil
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -34,8 +34,9 @@ from queue import Queue
 import re

 class LollmsXTTS(LollmsTTS):
-    def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]):
+    def __init__(self, app: LollmsApplication, voices_folders: List[str|Path], freq = 22050):
        super().__init__("lollms_xtts", app)
+        self.freq = freq
        self.generation_threads = {}
        self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"]
        self.stop_event = threading.Event()
@ -75,7 +76,7 @@ class LollmsXTTS(LollmsTTS):
    def get(app: LollmsApplication) -> 'LollmsXTTS':
        # Verify if the service is installed and if true then return an instance of LollmsXTTS
        if LollmsXTTS.verify(app.lollms_paths):
-            return LollmsXTTS(app, app.lollms_paths.custom_voices_path)
+            return LollmsXTTS(app, app.lollms_paths.custom_voices_path, freq=app.config.xtts_freq)
        else:
            raise Exception("LollmsXTTS service is not installed properly.")
    def get_speaker_wav(self, speaker) -> Path:
@ -147,7 +148,7 @@ class LollmsXTTS(LollmsTTS):
            if wav is None:
                # Play any remaining buffered sentences
                for buffered_wav in buffer:
-                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
+                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, self.freq)
                    self.play_obj.wait_done()
                    time.sleep(0.5)  # Pause between sentences
                ASCIIColors.green("Audio done")
@ -156,7 +157,7 @@ class LollmsXTTS(LollmsTTS):
            buffered_sentences += 1
            if buffered_sentences >= 2:
                for buffered_wav in buffer:
-                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
+                    self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, self.freq)
                    self.play_obj.wait_done()
                    time.sleep(0.5)  # Pause between sentences
                buffer = []
@ -166,7 +167,7 @@ class LollmsXTTS(LollmsTTS):
        with wave.open(str(file_name_or_path), 'wb') as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
-            wf.setframerate(22050)
+            wf.setframerate(self.freq)
            for wav in wav_data:
                wf.writeframes(wav.tobytes())

--- a/lollms/tasks.py
+++ b/lollms/tasks.py
@ -7,7 +7,10 @@ from ascii_colors import ASCIIColors
 from lollms.types import MSG_TYPE, SUMMARY_MODE
 from lollms.com import LoLLMsCom
 from lollms.utilities import PromptReshaper, remove_text_from_string, process_ai_output
-from safe_store import DocumentDecomposer
+from lollmsvectordb.text_chunker import TextChunker
+from lollmsvectordb.database_elements.document import Document
+from lollmsvectordb.directory_binding import DirectoryBinding
+import hashlib
 import json
 class TasksLibrary:
    def __init__(self, lollms:LoLLMsCom, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None) -> None:
@ -566,7 +569,11 @@ class TasksLibrary:
        while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
            self.step_start(f"Comprerssing {doc_name}... [depth {depth+1}]")
            chunk_size = int(self.lollms.config.ctx_size*0.6)
-            document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
+            tc = TextChunker(chunk_size, 0, model= self.lollms.model)
+            hasher = hashlib.md5()
+            hasher.update(text.encode("utf8"))
+            
+            document_chunks = tc.get_text_chunks(text, Document(hasher.hexdigest(), doc_name ) )
            text = self.summarize_chunks(
                                            document_chunks,
                                            summary_instruction, 
@ -577,7 +584,6 @@ class TasksLibrary:
                                            chunk_summary_post_processing=chunk_summary_post_processing,
                                            summary_mode=summary_mode)
            tk = self.lollms.model.tokenize(text)
-            tk = self.lollms.model.tokenize(text)
            dtk_ln=prev_len-len(tk)
            prev_len = len(tk)
            self.step(f"Current text size : {prev_len}, max summary size : {max_summary_size}")
--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@ -608,7 +608,7 @@ def add_period(text):
    processed_text = '\n'.join(processed_lines)
    return processed_text

-def find_next_available_filename(folder_path, prefix):
+def find_next_available_filename(folder_path, prefix, extension="png"):
    folder = Path(folder_path)

    if not folder.exists():
@ -616,7 +616,7 @@ def find_next_available_filename(folder_path, prefix):

    index = 1
    while True:
-        next_filename = f"{prefix}_{index}.png"
+        next_filename = f"{prefix}_{index}.{extension}"
        potential_file = folder / next_filename
        if not potential_file.exists():
            return potential_file
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -1,35 +1,53 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 81
+version: 118
 binding_name: null
 model_name: null
 model_variant: null
 model_type: null

-show_news_panel: True
+show_news_panel: true

 # Security measures
-turn_on_setting_update_validation: True
-turn_on_code_execution: True
-turn_on_code_validation: True
-turn_on_open_file_validation: False
-turn_on_send_file_validation: False
+turn_on_setting_update_validation: true
+turn_on_code_execution: true
+turn_on_code_validation: true
+turn_on_open_file_validation: true
+turn_on_send_file_validation: true
+turn_on_language_validation: true

 force_accept_remote_access: false

 # Server information
-headless_server_mode: False
+headless_server_mode: false
 allowed_origins: []

 # Host information
 host: localhost
 port: 9600

+app_custom_logo: ""
+
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
+start_header_id_template: "!@>"
+end_header_id_template: ": "
+
+separator_template: "\n"
+
+start_user_header_id_template: "!@>"
+end_user_header_id_template: ": "
+end_user_message_id_template: ""
+
+start_ai_header_id_template: "!@>"
+end_ai_header_id_template: ": "
+end_ai_message_id_template: ""
+
+system_message_template: "system"
+
 seed: -1
 ctx_size: 4084
 max_n_predict: 4096
-min_n_predict: 512
+min_n_predict: 1024
 temperature: 0.9
 top_k: 50
 top_p: 0.95
@ -50,14 +68,14 @@ user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 use_model_name_in_discussions: false
-user_avatar: default_user.svg
+user_avatar: null
 use_user_informations_in_discussion: false

 # UI parameters
 discussion_db_name: default

 # Automatic updates
-debug: False
+debug: false
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -77,23 +95,104 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false

+# -------------------- Services global configurations --------------------------
+# Select the active test to speach, text to image and speach to text services
+active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tti_service: "None" # autosd (offline), dall-e (online)
+active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
+active_ttm_service: "None" # musicgen (offline)
+# -------------------- Services --------------------------
+
+# ***************** STT *****************
+stt_input_device: 0
+
+
+# STT service
+stt_listening_threshold: 1000
+stt_silence_duration: 2
+stt_sound_threshold_percentage: 10
+stt_gain: 1.0 
+stt_rate: 44100
+stt_channels: 1
+stt_buffer_size: 10
+
+stt_activate_word_detection: false
+stt_word_detection_file: null
+
+
+
+# ASR STT service 
+asr_enable: false
+asr_base_url: http://localhost:9000
+
+# openai_whisper configuration
+openai_whisper_key: ""
+openai_whisper_model: "whisper-1"
+
+
+# whisper configuration
+whisper_activate: false
+whisper_model: base
+
+
+# ***************** TTS *****************
+tts_output_device: 0
+
 # Voice service
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
+xtts_stream_chunk_size: 100
+xtts_temperature: 0.75
+xtts_length_penalty: 1.0
+xtts_repetition_penalty: 5.0
+xtts_top_k: 50
+xtts_top_p: 0.85
+xtts_speed: 1
+xtts_enable_text_splitting: true
+
+# openai_whisper configuration
+openai_tts_key: ""
+openai_tts_model: "tts-1"
+openai_tts_voice: "alloy"
+
+# ***************** TTI *****************
+
+use_negative_prompt: true
+use_ai_generated_negative_prompt: false
+negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
+default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))

 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860

+# Image generation service
+enable_fooocus_service: false
+fooocus_base_url: http://localhost:7860
+
+# diffuser
+diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
+diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
+
+# Dall e service key
+dall_e_key: ""
+dall_e_generation_engine: "dall-e-3"
+
+# Midjourney service key
+midjourney_key: ""
+
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
+comfyui_model: v1-5-pruned-emaonly.ckpt

 # Motion control service
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861

+# ***************** TTT *****************
+
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -107,6 +206,11 @@ petals_device: cuda
 # lollms service
 enable_lollms_service: false
 lollms_base_url: http://localhost:1234
+lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+activate_lollms_server: true
+activate_ollama_emulator: true
+activate_openai_emulator: true
+activate_mistralai_emulator: true

 # elastic search service
 elastic_search_service: false
@ -131,13 +235,22 @@ audio_auto_send_input: true
 audio_silenceTimer: 5000

 # Data vectorization
+rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
+rag_vectorizer: bert # possible values bert, tfidf, word2vec
+rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
+rag_vectorizer_parameters: null # Parameters of the model in json format
+rag_chunk_size: 512 # number of tokens per chunk
+rag_n_chunks: 4 #Number of chunks to recover from the database
+rag_clean_chunks: true #Removed all uinecessary spaces and line returns
+rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
+rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
+rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summarize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,20 +267,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan

 # Activate internet search
 activate_internet_search: false
+activate_internet_pages_judgement: true
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
-internet_quick_search: False # If active the search engine will not load and read the webpages
-internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
+internet_vectorization_overlap_size: 0 # overlap between chunks size
+internet_vectorization_nb_chunks: 4 # number of chunks to use
+internet_nb_search_pages: 8 # number of pages to select
+internet_quick_search: false # If active the search engine will not load and read the webpages
+internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
 pdf_latex_path: null

 # boosting information
 positive_boost: null
 negative_boost: null
-current_language: null
-fun_mode: False
+current_language: english
+fun_mode: false


 # webui configurations
@ -175,5 +289,3 @@ show_code_of_conduct: true
 activate_audio_infos: true


-# whisper configuration
-whisper_model: base
--- a/requirements.txt
+++ b/requirements.txt
@ -6,7 +6,7 @@ setuptools
 requests


-safe_store
+lollmsvectordb
 pipmaster
 ascii_colors>=0.1.3
 beautifulsoup4
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@ -5,8 +5,8 @@ wget
 setuptools
 requests

-safe_store
 ascii_colors>=0.1.3
+lollmsvectordb

 autopep8