Added smart model routing

2025-04-16 06:56:33 +00:00 · 2024-08-06 11:46:42 +02:00 · 2024-08-06 11:46:42 +02:00 · 53edc1e8ce
commit 53edc1e8ce
parent 501ad14f66
12 changed files with 626 additions and 50 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 129
+version: 131
 binding_name: null
 model_name: null
 model_variant: null
@ -163,7 +163,7 @@ openai_tts_voice: "alloy"


 elevenlabs_tts_key: ""
-elevenlabs_tts_model_id: "eleven_monolingual_v1"
+elevenlabs_tts_model_id: "eleven_monolingual_v2"
 elevenlabs_tts_voice_stability: 0.5
 elevenlabs_tts_voice_boost: 0.5
 elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
@ -218,13 +218,23 @@ petals_device: cuda

 # lollms service
 enable_lollms_service: false
-lollms_base_url: http://localhost:1234
-lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+lollms_access_keys : [] # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
+activate_lollms_rag_server: true
+activate_lollms_tts_server: true
+activate_lollms_stt_server: true
+activate_lollms_tti_server: true
+activate_lollms_itt_server: true
+activate_lollms_ttm_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
 activate_mistralai_emulator: true

+use_smart_routing: false
+smart_routing_router_model : ""
+smart_routing_models_by_power : []
+
+
 # elastic search service
 elastic_search_service: false
 elastic_search_url: http://localhost:9200
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -1,35 +1,54 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 81
+version: 130
 binding_name: null
 model_name: null
 model_variant: null
 model_type: null

-show_news_panel: True
+show_news_panel: true

 # Security measures
-turn_on_setting_update_validation: True
-turn_on_code_execution: True
-turn_on_code_validation: True
-turn_on_open_file_validation: False
-turn_on_send_file_validation: False
+turn_on_setting_update_validation: true
+turn_on_code_execution: true
+turn_on_code_validation: true
+turn_on_open_file_validation: true
+turn_on_send_file_validation: true
+turn_on_language_validation: true

 force_accept_remote_access: false

 # Server information
-headless_server_mode: False
+headless_server_mode: false
 allowed_origins: []

 # Host information
 host: localhost
 port: 9600

+app_custom_logo: ""
+
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
+start_header_id_template: "!@>"
+end_header_id_template: ": "
+
+separator_template: "\n"
+
+start_user_header_id_template: "!@>"
+end_user_header_id_template: ": "
+end_user_message_id_template: ""
+
+start_ai_header_id_template: "!@>"
+end_ai_header_id_template: ": "
+end_ai_message_id_template: ""
+
+system_message_template: "system"
+use_continue_message: true
+
 seed: -1
 ctx_size: 4084
 max_n_predict: 4096
-min_n_predict: 512
+min_n_predict: 1024
 temperature: 0.9
 top_k: 50
 top_p: 0.95
@ -50,14 +69,17 @@ user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 use_model_name_in_discussions: false
-user_avatar: default_user.svg
+user_avatar: null
 use_user_informations_in_discussion: false

 # UI parameters
 discussion_db_name: default

 # Automatic updates
-debug: False
+debug: false
+debug_show_final_full_prompt: false
+debug_show_chunks: false
+
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -77,23 +99,113 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false

+# -------------------- Services global configurations --------------------------
+# Select the active test to speach, text to image and speach to text services
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
+active_tti_service: "None" # autosd (offline), dall-e (online)
+active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
+active_ttm_service: "None" # musicgen (offline)
+# -------------------- Services --------------------------
+
+# ***************** STT *****************
+stt_input_device: 0
+
+
+# STT service
+stt_listening_threshold: 1000
+stt_silence_duration: 2
+stt_sound_threshold_percentage: 10
+stt_gain: 1.0 
+stt_rate: 44100
+stt_channels: 1
+stt_buffer_size: 10
+
+stt_activate_word_detection: false
+stt_word_detection_file: null
+
+
+
+# ASR STT service 
+asr_enable: false
+asr_base_url: http://localhost:9000
+
+# openai_whisper configuration
+openai_whisper_key: ""
+openai_whisper_model: "whisper-1"
+
+
+# whisper configuration
+whisper_activate: false
+whisper_model: base
+
+
+# ***************** TTS *****************
+tts_output_device: 0
+
 # Voice service
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
+xtts_stream_chunk_size: 100
+xtts_temperature: 0.75
+xtts_length_penalty: 1.0
+xtts_repetition_penalty: 5.0
+xtts_top_k: 50
+xtts_top_p: 0.85
+xtts_speed: 1
+xtts_enable_text_splitting: true
+xtts_freq: 22050
+
+# openai_whisper configuration
+openai_tts_key: ""
+openai_tts_model: "tts-1"
+openai_tts_voice: "alloy"
+
+
+elevenlabs_tts_key: ""
+elevenlabs_tts_model_id: "eleven_monolingual_v2"
+elevenlabs_tts_voice_stability: 0.5
+elevenlabs_tts_voice_boost: 0.5
+elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
+# ***************** TTI *****************
+
+use_negative_prompt: true
+use_ai_generated_negative_prompt: false
+negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
+default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))

 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860

+# Image generation service
+enable_fooocus_service: false
+fooocus_base_url: http://localhost:7860
+
+# diffuser
+diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
+diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
+
+# Dall e service key
+dall_e_key: ""
+dall_e_generation_engine: "dall-e-3"
+
+# Midjourney service key
+midjourney_key: ""
+midjourney_timeout: 300
+midjourney_retries: 1
+
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
+comfyui_model: v1-5-pruned-emaonly.ckpt

 # Motion control service
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861

+# ***************** TTT *****************
+
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -106,7 +218,11 @@ petals_device: cuda

 # lollms service
 enable_lollms_service: false
-lollms_base_url: http://localhost:1234
+lollms_access_keys : [] # set a list of keys separated by coma to restrict access
+activate_lollms_server: true
+activate_ollama_emulator: true
+activate_openai_emulator: true
+activate_mistralai_emulator: true

 # elastic search service
 elastic_search_service: false
@ -131,13 +247,29 @@ audio_auto_send_input: true
 audio_silenceTimer: 5000

 # Data vectorization
+rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
+rag_vectorizer: bert # possible values bert, tfidf, word2vec
+rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
+rag_vectorizer_parameters: null # Parameters of the model in json format
+rag_chunk_size: 512 # number of tokens per chunk
+rag_overlap: 0 # number of tokens of overlap
+
+rag_n_chunks: 4 #Number of chunks to recover from the database
+rag_clean_chunks: true #Removed all uinecessary spaces and line returns
+rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
+rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
+rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
+rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
+rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
+
+contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary
+
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summarize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,20 +286,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan

 # Activate internet search
 activate_internet_search: false
+activate_internet_pages_judgement: true
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
-internet_quick_search: False # If active the search engine will not load and read the webpages
-internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
+internet_vectorization_overlap_size: 0 # overlap between chunks size
+internet_vectorization_nb_chunks: 4 # number of chunks to use
+internet_nb_search_pages: 8 # number of pages to select
+internet_quick_search: false # If active the search engine will not load and read the webpages
+internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
 pdf_latex_path: null

 # boosting information
 positive_boost: null
 negative_boost: null
-current_language: null
-fun_mode: False
+current_language: english
+fun_mode: false


 # webui configurations
@ -175,5 +308,3 @@ show_code_of_conduct: true
 activate_audio_infos: true


-# whisper configuration
-whisper_model: base
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 118
+version: 130
 binding_name: null
 model_name: null
 model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
 end_ai_message_id_template: ""

 system_message_template: "system"
+use_continue_message: true

 seed: -1
 ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default

 # Automatic updates
 debug: false
+debug_show_final_full_prompt: false
+debug_show_chunks: false
+
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
+xtts_freq: 22050

 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"

+
+elevenlabs_tts_key: ""
+elevenlabs_tts_model_id: "eleven_monolingual_v2"
+elevenlabs_tts_voice_stability: 0.5
+elevenlabs_tts_voice_boost: 0.5
+elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
 # ***************** TTI *****************

 use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"

 # Midjourney service key
 midjourney_key: ""
+midjourney_timeout: 300
+midjourney_retries: 1

 # Image generation service comfyui
 enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda

 # lollms service
 enable_lollms_service: false
-lollms_base_url: http://localhost:1234
-lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+lollms_access_keys : [] # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
 rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
+rag_overlap: 0 # number of tokens of overlap
+
 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
+rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
+rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
+
+contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary

 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 118
+version: 130
 binding_name: null
 model_name: null
 model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
 end_ai_message_id_template: ""

 system_message_template: "system"
+use_continue_message: true

 seed: -1
 ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default

 # Automatic updates
 debug: false
+debug_show_final_full_prompt: false
+debug_show_chunks: false
+
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
+xtts_freq: 22050

 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"

+
+elevenlabs_tts_key: ""
+elevenlabs_tts_model_id: "eleven_monolingual_v2"
+elevenlabs_tts_voice_stability: 0.5
+elevenlabs_tts_voice_boost: 0.5
+elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
 # ***************** TTI *****************

 use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"

 # Midjourney service key
 midjourney_key: ""
+midjourney_timeout: 300
+midjourney_retries: 1

 # Image generation service comfyui
 enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda

 # lollms service
 enable_lollms_service: false
-lollms_base_url: http://localhost:1234
-lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+lollms_access_keys : [] # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
 rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
+rag_overlap: 0 # number of tokens of overlap
+
 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
+rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
+rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
+
+contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary

 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
--- a/lollms/app.py
+++ b/lollms/app.py
@ -176,7 +176,15 @@ class LollmsApplication(LoLLMsCom):
    def restore_trust_store(self):
        if self.bk_store is not None:
            os.environ['REQUESTS_CA_BUNDLE'] = self.bk_store
-            
+
+    def model_path_to_binding_model(self, model_path:str):
+        parts = model_path.strip().split("::")
+        if len(parts)<2:
+            raise Exception("Model path is not in the format binding:model_name!")
+        binding = parts[0]
+        model_name = parts[1]
+        return binding, model_name
+      
    def select_model(self, binding_name, model_name):
        self.config["binding_name"] = binding_name
        self.config["model_name"] = model_name
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 129
+version: 131
 binding_name: null
 model_name: null
 model_variant: null
@ -163,7 +163,7 @@ openai_tts_voice: "alloy"


 elevenlabs_tts_key: ""
-elevenlabs_tts_model_id: "eleven_monolingual_v1"
+elevenlabs_tts_model_id: "eleven_monolingual_v2"
 elevenlabs_tts_voice_stability: 0.5
 elevenlabs_tts_voice_boost: 0.5
 elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
@ -218,13 +218,23 @@ petals_device: cuda

 # lollms service
 enable_lollms_service: false
-lollms_base_url: http://localhost:1234
-lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+lollms_access_keys : [] # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
+activate_lollms_rag_server: true
+activate_lollms_tts_server: true
+activate_lollms_stt_server: true
+activate_lollms_tti_server: true
+activate_lollms_itt_server: true
+activate_lollms_ttm_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
 activate_mistralai_emulator: true

+use_smart_routing: false
+smart_routing_router_model : ""
+smart_routing_models_by_power : []
+
+
 # elastic search service
 elastic_search_service: false
 elastic_search_url: http://localhost:9200
--- a/lollms/databases/discussions_database.py
+++ b/lollms/databases/discussions_database.py
@ -211,6 +211,16 @@ class DiscussionsDB:
        self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (last_discussion_id,), fetch_all=False)
        return Discussion(self.lollms, last_discussion_id, self)
    
+    def load_discussion_by_id(self, discussion_id):
+        # Fetch the discussion by the provided discussion_id
+        discussion_data = self.select("SELECT * FROM discussion WHERE id=?", (discussion_id,), fetch_all=False)
+        if discussion_data is None:
+            raise ValueError("Discussion not found with the provided ID.")
+        
+        # Assuming discussion_data returns a tuple or list with the necessary data
+        self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (discussion_id,), fetch_all=False)
+        return Discussion(self.lollms, discussion_id, self)
+        
    def create_discussion(self, title="untitled"):
        """Creates a new discussion

--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -3404,6 +3404,23 @@ class APScript(StateMachine):

        return paths

+    def update_section(self, content, section_name, new_code):
+        # Define patterns for HTML, JavaScript, and CSS sections
+        html_pattern = re.compile(f"<!-- section_start: {section_name} -->.*?<!-- section_end: {section_name} -->", re.DOTALL)
+        js_css_pattern = re.compile(f"// section_start: {section_name}.*?// section_end: {section_name}", re.DOTALL)
+
+        # Try to replace HTML section
+        updated_content, html_replacements = re.subn(html_pattern, f"<!-- section_start: {section_name} -->\n{new_code}\n<!-- section_end: {section_name} -->", content)
+
+        # If HTML replacement didn't occur, try JavaScript/CSS section
+        if html_replacements == 0:
+            updated_content, js_css_replacements = re.subn(js_css_pattern, f"// section_start: {section_name}\n{new_code}\n// section_end: {section_name}", content)
+            
+            if js_css_replacements == 0:
+                return content, False  # Section not found
+        
+        return updated_content, True  # Section updated successfully
+
    def extract_code_blocks(self, text: str) -> List[dict]:
        """
        This function extracts code blocks from a given text.
@ -3445,6 +3462,7 @@ class APScript(StateMachine):
            block_infos = {
                'index': index,
                'file_name': "",
+                'section': "",
                'content': "",
                'type': ""
            }
@ -3452,10 +3470,13 @@ class APScript(StateMachine):
                # Check the preceding line for file name
                preceding_text = text[:code_delimiter_position].strip().splitlines()
                if preceding_text:
-                    last_line = preceding_text[-1]
+                    last_line = preceding_text[-1].strip()
                    if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
                        file_name = last_line[len("<file_name>"):-len("</file_name>")].strip()
                        block_infos['file_name'] = file_name
+                    if last_line.startswith("<section>") and last_line.endswith("</section>"):
+                        section = last_line[len("<section>"):-len("</section>")].strip()
+                        block_infos['section'] = section

                sub_text = text[code_delimiter_position + 3:]
                if len(sub_text) > 0:
@ -3477,7 +3498,7 @@ class APScript(StateMachine):
                        block_infos["type"] = sub_text[:next_index]

                    next_pos = indices[index + 1] - code_delimiter_position
-                    if sub_text[next_pos - 3] == "`":
+                    if next_pos - 3<len(sub_text) and sub_text[next_pos - 3] == "`":
                        block_infos["content"] = sub_text[start_pos:next_pos - 3].strip()
                    else:
                        block_infos["content"] = sub_text[start_pos:next_pos].strip()
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 118
+version: 130
 binding_name: null
 model_name: null
 model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
 end_ai_message_id_template: ""

 system_message_template: "system"
+use_continue_message: true

 seed: -1
 ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default

 # Automatic updates
 debug: false
+debug_show_final_full_prompt: false
+debug_show_chunks: false
+
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
+xtts_freq: 22050

 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"

+
+elevenlabs_tts_key: ""
+elevenlabs_tts_model_id: "eleven_monolingual_v2"
+elevenlabs_tts_voice_stability: 0.5
+elevenlabs_tts_voice_boost: 0.5
+elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
 # ***************** TTI *****************

 use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"

 # Midjourney service key
 midjourney_key: ""
+midjourney_timeout: 300
+midjourney_retries: 1

 # Image generation service comfyui
 enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda

 # lollms service
 enable_lollms_service: false
-lollms_base_url: http://localhost:1234
-lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+lollms_access_keys : [] # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
 rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
+rag_overlap: 0 # number of tokens of overlap
+
 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
+rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
+rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
+
+contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary

 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
--- a/lollms/server/endpoints/lollms_authentication.py
+++ b/lollms/server/endpoints/lollms_authentication.py
@ -0,0 +1,157 @@
+"""
+project: lollms_webui
+file: lollms_authentication.py 
+author: ParisNeo
+description: 
+    This module contains a set of FastAPI routes that manages users authentication.
+"""
+
+from fastapi import APIRouter, Request, HTTPException, Depends, Header
+from lollms_webui import LOLLMSWebUI
+from pydantic import BaseModel, Field
+from starlette.responses import StreamingResponse
+from lollms.types import MSG_TYPE
+from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception
+from lollms.security import sanitize_path, check_access
+from ascii_colors import ASCIColors
+from lollms.databases.discussions_database import DiscussionsDB, Discussion
+from typing import List, Optional, Union
+from pathlib import Path
+from fastapi.security import APIKeyHeader
+
+import sqlite3
+import secrets
+import time
+import shutil
+import os
+from datetime import datetime, timedelta
+import asyncio
+from contextlib import asynccontextmanager
+
+# ----------------------- Defining router and main class ------------------------------
+
+router = APIRouter()
+lollmsElfServer: LOLLMSWebUI = LOLLMSWebUI.get_instance()
+
+# ----------------------- User Authentication and Management ------------------------------
+
+class User(BaseModel):
+    id: int
+    username: str
+    email: str
+    password: str
+    last_activity: float
+    database_name: str  # Added field for database name
+
+class UserAuth(BaseModel):
+    username: str
+    password: str
+    email: str
+
+class UserToken(BaseModel):
+    token: str
+    expiry: float
+
+users_db_path = lollmsElfServer.lollms_paths.personal_configuration_path / "users.sqlite"
+user_tokens = {}
+
+def init_users_db():
+    conn = sqlite3.connect(str(users_db_path))
+    cursor = conn.cursor()
+    cursor.execute('''CREATE TABLE IF NOT EXISTS users
+                      (id INTEGER PRIMARY KEY, username TEXT UNIQUE, email TEXT, password TEXT, last_activity REAL, database_name TEXT)''')
+    conn.commit()
+    conn.close()
+
+def get_user(username: str) -> Optional[User]:
+    conn = sqlite3.connect(str(users_db_path))
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM users WHERE username = ?", (username,))
+    user_data = cursor.fetchone()
+    conn.close()
+    if user_data:
+        return User(id=user_data[0], username=user_data[1], email=user_data[2], password=user_data[3], last_activity=user_data[4], database_name=user_data[5])
+    return None
+
+def create_user(username: str, email: str, password: str, database_name: str):
+    conn = sqlite3.connect(str(users_db_path))
+    cursor = conn.cursor()
+    try:
+        cursor.execute("INSERT INTO users (username, email, password, last_activity, database_name) VALUES (?, ?, ?, ?, ?)",
+                       (username, email, password, time.time(), database_name))
+        conn.commit()
+    except sqlite3.IntegrityError:
+        conn.close()
+        raise HTTPException(status_code=400, detail="Username already exists")
+    conn.close()
+
+def update_user_activity(username: str):
+    conn = sqlite3.connect(str(users_db_path))
+    cursor = conn.cursor()
+    cursor.execute("UPDATE users SET last_activity = ? WHERE username = ?", (time.time(), username))
+    conn.commit()
+    conn.close()
+
+def authenticate_user(username: str, password: str) -> Optional[str]:
+    user = get_user(username)
+    if user and user.password == password:
+        token = secrets.token_urlsafe(32)
+        expiry = time.time() + 3600  # Token valid for 1 hour
+        user_tokens[token] = UserToken(token=token, expiry=expiry)
+        update_user_activity(username)
+        return token
+    return None
+
+async def get_current_user(token: str = Header(...)):
+    if token not in user_tokens or user_tokens[token].expiry < time.time():
+        raise HTTPException(status_code=401, detail="Invalid or expired token")
+    return token
+
+# ----------------------- Lifespan Event Handler ------------------------------
+
+@asynccontextmanager
+async def lifespan(app):
+    # Startup
+    init_users_db()
+    yield
+
+# Add this lifespan event handler to your FastAPI app
+# app.router.lifespan_context = lifespan
+
+# ----------------------- Endpoints ------------------------------
+
+@router.post("/register", response_model=User)
+async def register(user: UserAuth):
+    # Generate a unique database name for the user
+    database_name = f"{user.username}_db.sqlite"
+    create_user(user.username, user.email, user.password, database_name)
+    return get_user(user.username)
+
+@router.post("/login", response_model=UserToken)
+async def login(user: UserAuth):
+    token = authenticate_user(user.username, user.password)
+    if not token:
+        raise HTTPException(status_code=401, detail="Invalid username or password")
+    
+    user_data = get_user(user.username)
+    if not user_data:
+        raise HTTPException(status_code=404, detail="User not found")
+
+    # Load the user's discussion database
+    user_discussion_db = DiscussionsDB(lollmsElfServer.lollms_paths, user_data.database_name)
+    discussion = user_discussion_db.load_discussion_by_id(user_data.id)  # Assuming ID is used to load the discussion
+    lollmsElfServer.session.add_client(token, 0, discussion, user_discussion_db)
+    
+    return UserToken(token=token, expiry=user_tokens[token].expiry)
+
+@router.get("/current_user", response_model=User)
+async def current_user(token: str = Depends(get_current_user)):
+    for user_token in user_tokens.values():
+        if user_token.token == token:
+            user = get_user(user_token.token)  # Assuming token is the username for simplicity
+            if user:
+                return user
+    raise HTTPException(status_code=404, detail="User not found")
+
+# Add the router to your FastAPI app
+# app.include_router(router)
--- a/lollms/server/endpoints/lollms_rag.py
+++ b/lollms/server/endpoints/lollms_rag.py
@ -0,0 +1,153 @@
+"""
+project: lollms_webui
+file: lollms_rag.py 
+author: ParisNeo
+description: 
+    This module contains a set of FastAPI routes that allow users to interact with the RAG (Retrieval-Augmented Generation) library.
+
+Usage:
+    1. Initialize the RAG system by adding documents using the /add_document endpoint.
+    2. Build the index using the /index_database endpoint.
+    3. Perform searches using the /search endpoint.
+    4. Remove documents using the /remove_document/{document_id} endpoint.
+    5. Wipe the entire database using the /wipe_database endpoint.
+
+Authentication:
+    - If lollms_access_keys are specified in the configuration, API key authentication is required.
+    - If no keys are specified, authentication is bypassed, and all users are treated as user ID 1.
+
+User Management:
+    - Each user gets a unique vectorizer based on their API key.
+    - If no API keys are specified, all requests are treated as coming from user ID 1.
+
+Note: Ensure proper security measures are in place when deploying this API in a production environment.
+"""
+
+from fastapi import APIRouter, Request, HTTPException, Depends, Header
+from lollms_webui import LOLLMSWebUI
+from pydantic import BaseModel, Field
+from starlette.responses import StreamingResponse
+from lollms.types import MSG_TYPE
+from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception
+from lollms.security import sanitize_path, check_access
+from ascii_colors import ASCIIColors
+from lollms.databases.discussions_database import DiscussionsDB, Discussion
+from typing import List, Optional, Union
+from pathlib import Path
+from fastapi.security import APIKeyHeader
+from lollmsvectordb.database_elements.chunk import Chunk
+from lollmsvectordb.vector_database import VectorDatabase
+from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
+from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
+import sqlite3
+import secrets
+import time
+import shutil
+import os
+from datetime import datetime, timedelta
+import asyncio
+from contextlib import asynccontextmanager
+import hashlib
+
+# ----------------------- Defining router and main class ------------------------------
+
+router = APIRouter()
+lollmsElfServer: LOLLMSWebUI = LOLLMSWebUI.get_instance()
+api_key_header = APIKeyHeader(name="Authorization")
+
+# ----------------------- RAG System ------------------------------
+
+class RAGQuery(BaseModel):
+    query: str = Field(..., description="The query to process using RAG")
+
+class RAGResponse(BaseModel):
+    answer: str = Field(..., description="The generated answer")
+    sources: List[str] = Field(..., description="List of sources used for the answer")
+
+class IndexDocument(BaseModel):
+    title: str = Field(..., description="The title of the document")
+    content: str = Field(..., description="The content to be indexed")
+    path: str = Field(default="unknown", description="The path of the document")
+
+class IndexResponse(BaseModel):
+    success: bool = Field(..., description="Indicates if the indexing was successful")
+    message: str = Field(..., description="Additional information about the indexing process")
+
+class DocumentResponse(BaseModel):
+    success: bool
+    message: str
+
+class RAGChunk(BaseModel):
+    id : int
+    chunk_id : int
+    doc_title : str
+    doc_path : str
+    text : str
+    nb_tokens : int
+    distance : float
+
+def get_user_id(bearer_key: str) -> int:
+    """
+    Determine the user ID based on the bearer key.
+    If no keys are specified in the configuration, always return 1.
+    """
+    if not lollmsElfServer.config.lollms_access_keys:
+        return 1
+    # Use the index of the key in the list as the user ID
+    try:
+        return lollmsElfServer.config.lollms_access_keys.index(bearer_key) + 1
+    except ValueError:
+        raise HTTPException(status_code=403, detail="Invalid API Key")
+
+def get_user_vectorizer(user_id: int, bearer_key: str):
+    small_key = hashlib.md5(bearer_key.encode()).hexdigest()[:8]
+    user_folder = lollmsElfServer.lollms_paths / str(user_id)
+    user_folder.mkdir(parents=True, exist_ok=True)
+    return VectorDatabase(
+        str(user_folder / f"rag_db_{small_key}.sqlite"),
+        BERTVectorizer(lollmsElfServer.config.rag_vectorizer_model) if lollmsElfServer.config.rag_vectorizer == "bert" else TFIDFVectorizer(),
+        lollmsElfServer.model,
+        chunk_size=lollmsElfServer.config.rag_chunk_size,
+        overlap=lollmsElfServer.config.rag_overlap
+    )
+
+async def get_current_user(bearer_token: str = Depends(api_key_header)):
+    if lollmsElfServer.config.lollms_access_keys:
+        if bearer_token not in lollmsElfServer.config.lollms_access_keys:
+            raise HTTPException(status_code=403, detail="Invalid API Key")
+    return bearer_token
+
+@router.post("/add_document", response_model=DocumentResponse)
+async def add_document(doc: IndexDocument, user: str = Depends(get_current_user)):
+    user_id = get_user_id(user)
+    vectorizer = get_user_vectorizer(user_id, user)
+    vectorizer.add_document(title=doc.title, text=doc.content, path=doc.path)
+    return DocumentResponse(success=True, message="Document added successfully.")
+
+@router.post("/remove_document/{document_id}", response_model=DocumentResponse)
+async def remove_document(document_id: int, user: str = Depends(get_current_user)):
+    user_id = get_user_id(user)
+    vectorizer = get_user_vectorizer(user_id, user)
+    # Logic to remove the document by ID
+    return DocumentResponse(success=True, message="Document removed successfully.")
+
+@router.post("/index_database", response_model=DocumentResponse)
+async def index_database(user: str = Depends(get_current_user)):
+    user_id = get_user_id(user)
+    vectorizer = get_user_vectorizer(user_id, user)
+    vectorizer.build_index()
+    return DocumentResponse(success=True, message="Database indexed successfully.")
+
+@router.post("/search", response_model=List[RAGChunk])
+async def search(query: RAGQuery, user: str = Depends(get_current_user)):
+    user_id = get_user_id(user)
+    vectorizer = get_user_vectorizer(user_id, user)
+    chunks = vectorizer.search(query.query)
+    return [RAGChunk(c.id,c.chunk_id, c.doc.title, c.doc.path, c.text, c.nb_tokens, c.distance) for c in chunks]
+
+@router.delete("/wipe_database", response_model=DocumentResponse)
+async def wipe_database(user: str = Depends(get_current_user)):
+    user_id = get_user_id(user)
+    user_folder = lollmsElfServer.lollms_paths / str(user_id)
+    shutil.rmtree(user_folder, ignore_errors=True)
+    return DocumentResponse(success=True, message="Database wiped successfully.")
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 118
+version: 130
 binding_name: null
 model_name: null
 model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
 end_ai_message_id_template: ""

 system_message_template: "system"
+use_continue_message: true

 seed: -1
 ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default

 # Automatic updates
 debug: false
+debug_show_final_full_prompt: false
+debug_show_chunks: false
+
 debug_log_file_path: ""
 auto_update: true
 auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
 active_tti_service: "None" # autosd (offline), dall-e (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
 xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true
+xtts_freq: 22050

 # openai_whisper configuration
 openai_tts_key: ""
 openai_tts_model: "tts-1"
 openai_tts_voice: "alloy"

+
+elevenlabs_tts_key: ""
+elevenlabs_tts_model_id: "eleven_monolingual_v2"
+elevenlabs_tts_voice_stability: 0.5
+elevenlabs_tts_voice_boost: 0.5
+elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
 # ***************** TTI *****************

 use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"

 # Midjourney service key
 midjourney_key: ""
+midjourney_timeout: 300
+midjourney_retries: 1

 # Image generation service comfyui
 enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda

 # lollms service
 enable_lollms_service: false
-lollms_base_url: http://localhost:1234
-lollms_access_keys : "" # set a list of keys separated by coma to restrict access
+lollms_access_keys : [] # set a list of keys separated by coma to restrict access
 activate_lollms_server: true
 activate_ollama_emulator: true
 activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
 rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
 rag_vectorizer_parameters: null # Parameters of the model in json format
 rag_chunk_size: 512 # number of tokens per chunk
+rag_overlap: 0 # number of tokens of overlap
+
 rag_n_chunks: 4 #Number of chunks to recover from the database
 rag_clean_chunks: true #Removed all uinecessary spaces and line returns
 rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
 rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
 rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
+rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
+rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
+rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
+
+contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary

 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database