From 53edc1e8ceb20b565267feefb6f626bd6748ade7 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 6 Aug 2024 11:46:42 +0200 Subject: [PATCH] Added smart model routing --- configs/config.yaml | 18 +- .../personal/configs/lollms_elf_config.yaml | 177 +++++++++++++++--- .../personal/configs/lollms_elf_config.yaml | 27 ++- .../configs/lollms_elf_local_config.yaml | 27 ++- lollms/app.py | 10 +- lollms/configs/config.yaml | 18 +- lollms/databases/discussions_database.py | 10 + lollms/personality.py | 25 ++- lollms/server/configs/config.yaml | 27 ++- .../server/endpoints/lollms_authentication.py | 157 ++++++++++++++++ lollms/server/endpoints/lollms_rag.py | 153 +++++++++++++++ .../configs/lollms_discord_local_config.yaml | 27 ++- 12 files changed, 626 insertions(+), 50 deletions(-) create mode 100644 lollms/server/endpoints/lollms_authentication.py create mode 100644 lollms/server/endpoints/lollms_rag.py diff --git a/configs/config.yaml b/configs/config.yaml index 49a783a..5994723 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 129 +version: 131 binding_name: null model_name: null model_variant: null @@ -163,7 +163,7 @@ openai_tts_voice: "alloy" elevenlabs_tts_key: "" -elevenlabs_tts_model_id: "eleven_monolingual_v1" +elevenlabs_tts_model_id: "eleven_monolingual_v2" elevenlabs_tts_voice_stability: 0.5 elevenlabs_tts_voice_boost: 0.5 elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL @@ -218,13 +218,23 @@ petals_device: cuda # lollms service enable_lollms_service: false -lollms_base_url: http://localhost:1234 -lollms_access_keys : "" # set a list of keys separated by coma to restrict access +lollms_access_keys : [] # set a list of keys separated by coma to restrict access activate_lollms_server: true +activate_lollms_rag_server: true +activate_lollms_tts_server: true +activate_lollms_stt_server: true +activate_lollms_tti_server: true +activate_lollms_itt_server: true +activate_lollms_ttm_server: true activate_ollama_emulator: true activate_openai_emulator: true activate_mistralai_emulator: true +use_smart_routing: false +smart_routing_router_model : "" +smart_routing_models_by_power : [] + + # elastic search service elastic_search_service: false elastic_search_url: http://localhost:9200 diff --git a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml index b9e36f5..90094fb 100644 --- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml @@ -1,35 +1,54 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 81 +version: 130 binding_name: null model_name: null model_variant: null model_type: null -show_news_panel: True +show_news_panel: true # Security measures -turn_on_setting_update_validation: True -turn_on_code_execution: True -turn_on_code_validation: True -turn_on_open_file_validation: False -turn_on_send_file_validation: False +turn_on_setting_update_validation: true +turn_on_code_execution: true +turn_on_code_validation: true +turn_on_open_file_validation: true +turn_on_send_file_validation: true +turn_on_language_validation: true force_accept_remote_access: false # Server information -headless_server_mode: False +headless_server_mode: false allowed_origins: [] # Host information host: localhost port: 9600 +app_custom_logo: "" + # Genreration parameters discussion_prompt_separator: "!@>" +start_header_id_template: "!@>" +end_header_id_template: ": " + +separator_template: "\n" + +start_user_header_id_template: "!@>" +end_user_header_id_template: ": " +end_user_message_id_template: "" + +start_ai_header_id_template: "!@>" +end_ai_header_id_template: ": " +end_ai_message_id_template: "" + +system_message_template: "system" +use_continue_message: true + seed: -1 ctx_size: 4084 max_n_predict: 4096 -min_n_predict: 512 +min_n_predict: 1024 temperature: 0.9 top_k: 50 top_p: 0.95 @@ -50,14 +69,17 @@ user_name: user user_description: "" use_user_name_in_discussions: false use_model_name_in_discussions: false -user_avatar: default_user.svg +user_avatar: null use_user_informations_in_discussion: false # UI parameters discussion_db_name: default # Automatic updates -debug: False +debug: false +debug_show_final_full_prompt: false +debug_show_chunks: false + debug_log_file_path: "" auto_update: true auto_sync_personalities: true @@ -77,23 +99,113 @@ auto_show_browser: true # copy to clipboard copy_to_clipboard_add_all_details: false +# -------------------- Services global configurations -------------------------- +# Select the active test to speach, text to image and speach to text services +active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required) +active_tti_service: "None" # autosd (offline), dall-e (online) +active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required) +active_ttm_service: "None" # musicgen (offline) +# -------------------- Services -------------------------- + +# ***************** STT ***************** +stt_input_device: 0 + + +# STT service +stt_listening_threshold: 1000 +stt_silence_duration: 2 +stt_sound_threshold_percentage: 10 +stt_gain: 1.0 +stt_rate: 44100 +stt_channels: 1 +stt_buffer_size: 10 + +stt_activate_word_detection: false +stt_word_detection_file: null + + + +# ASR STT service +asr_enable: false +asr_base_url: http://localhost:9000 + +# openai_whisper configuration +openai_whisper_key: "" +openai_whisper_model: "whisper-1" + + +# whisper configuration +whisper_activate: false +whisper_model: base + + +# ***************** TTS ***************** +tts_output_device: 0 + # Voice service auto_read: false xtts_current_voice: null xtts_current_language: en +xtts_stream_chunk_size: 100 +xtts_temperature: 0.75 +xtts_length_penalty: 1.0 +xtts_repetition_penalty: 5.0 +xtts_top_k: 50 +xtts_top_p: 0.85 +xtts_speed: 1 +xtts_enable_text_splitting: true +xtts_freq: 22050 + +# openai_whisper configuration +openai_tts_key: "" +openai_tts_model: "tts-1" +openai_tts_voice: "alloy" + + +elevenlabs_tts_key: "" +elevenlabs_tts_model_id: "eleven_monolingual_v2" +elevenlabs_tts_voice_stability: 0.5 +elevenlabs_tts_voice_boost: 0.5 +elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL +# ***************** TTI ***************** + +use_negative_prompt: true +use_ai_generated_negative_prompt: false +negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image. +default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes)) # Image generation service enable_sd_service: false sd_base_url: http://localhost:7860 +# Image generation service +enable_fooocus_service: false +fooocus_base_url: http://localhost:7860 + +# diffuser +diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload +diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS + +# Dall e service key +dall_e_key: "" +dall_e_generation_engine: "dall-e-3" + +# Midjourney service key +midjourney_key: "" +midjourney_timeout: 300 +midjourney_retries: 1 + # Image generation service comfyui enable_comfyui_service: false comfyui_base_url: http://127.0.0.1:8188/ +comfyui_model: v1-5-pruned-emaonly.ckpt # Motion control service enable_motion_ctrl_service: false motion_ctrl_base_url: http://localhost:7861 +# ***************** TTT ***************** + # ollama service enable_ollama_service: false ollama_base_url: http://localhost:11434 @@ -106,7 +218,11 @@ petals_device: cuda # lollms service enable_lollms_service: false -lollms_base_url: http://localhost:1234 +lollms_access_keys : [] # set a list of keys separated by coma to restrict access +activate_lollms_server: true +activate_ollama_emulator: true +activate_openai_emulator: true +activate_mistralai_emulator: true # elastic search service elastic_search_service: false @@ -131,13 +247,29 @@ audio_auto_send_input: true audio_silenceTimer: 5000 # Data vectorization +rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data +rag_vectorizer: bert # possible values bert, tfidf, word2vec +rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable +rag_vectorizer_parameters: null # Parameters of the model in json format +rag_chunk_size: 512 # number of tokens per chunk +rag_overlap: 0 # number of tokens of overlap + +rag_n_chunks: 4 #Number of chunks to recover from the database +rag_clean_chunks: true #Removed all uinecessary spaces and line returns +rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too +rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database +rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format +rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data +rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens +rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag + +contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary + activate_skills_lib: false # Activate vectorizing previous conversations skills_lib_database_name: "default" # Default skills database -summarize_discussion: false # activate discussion summary (better but adds computation time) max_summary_size: 512 # in tokens data_vectorization_visualize_on_vectorization: false -use_files: true # Activate using files data_vectorization_activate: true # To activate/deactivate data vectorization data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer" data_visualization_method: "PCA" #"PCA" or "TSNE" @@ -154,20 +286,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan # Activate internet search activate_internet_search: false +activate_internet_pages_judgement: true internet_vectorization_chunk_size: 512 # chunk size -internet_vectorization_overlap_size: 128 # overlap between chunks size -internet_vectorization_nb_chunks: 2 # number of chunks to use -internet_nb_search_pages: 3 # number of pages to select -internet_quick_search: False # If active the search engine will not load and read the webpages -internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search +internet_vectorization_overlap_size: 0 # overlap between chunks size +internet_vectorization_nb_chunks: 4 # number of chunks to use +internet_nb_search_pages: 8 # number of pages to select +internet_quick_search: false # If active the search engine will not load and read the webpages +internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search # Helpers pdf_latex_path: null # boosting information positive_boost: null negative_boost: null -current_language: null -fun_mode: False +current_language: english +fun_mode: false # webui configurations @@ -175,5 +308,3 @@ show_code_of_conduct: true activate_audio_infos: true -# whisper configuration -whisper_model: base \ No newline at end of file diff --git a/elf_test_cfg/personal/configs/lollms_elf_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_config.yaml index a7c26d1..90094fb 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 118 +version: 130 binding_name: null model_name: null model_variant: null @@ -43,6 +43,7 @@ end_ai_header_id_template: ": " end_ai_message_id_template: "" system_message_template: "system" +use_continue_message: true seed: -1 ctx_size: 4084 @@ -76,6 +77,9 @@ discussion_db_name: default # Automatic updates debug: false +debug_show_final_full_prompt: false +debug_show_chunks: false + debug_log_file_path: "" auto_update: true auto_sync_personalities: true @@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false # -------------------- Services global configurations -------------------------- # Select the active test to speach, text to image and speach to text services -active_tts_service: "None" # xtts (offline), openai_tts (API key required) +active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required) active_tti_service: "None" # autosd (offline), dall-e (online) active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required) active_ttm_service: "None" # musicgen (offline) @@ -150,12 +154,19 @@ xtts_top_k: 50 xtts_top_p: 0.85 xtts_speed: 1 xtts_enable_text_splitting: true +xtts_freq: 22050 # openai_whisper configuration openai_tts_key: "" openai_tts_model: "tts-1" openai_tts_voice: "alloy" + +elevenlabs_tts_key: "" +elevenlabs_tts_model_id: "eleven_monolingual_v2" +elevenlabs_tts_voice_stability: 0.5 +elevenlabs_tts_voice_boost: 0.5 +elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL # ***************** TTI ***************** use_negative_prompt: true @@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3" # Midjourney service key midjourney_key: "" +midjourney_timeout: 300 +midjourney_retries: 1 # Image generation service comfyui enable_comfyui_service: false @@ -205,8 +218,7 @@ petals_device: cuda # lollms service enable_lollms_service: false -lollms_base_url: http://localhost:1234 -lollms_access_keys : "" # set a list of keys separated by coma to restrict access +lollms_access_keys : [] # set a list of keys separated by coma to restrict access activate_lollms_server: true activate_ollama_emulator: true activate_openai_emulator: true @@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk +rag_overlap: 0 # number of tokens of overlap + rag_n_chunks: 4 #Number of chunks to recover from the database rag_clean_chunks: true #Removed all uinecessary spaces and line returns rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format +rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data +rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens +rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag + +contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary activate_skills_lib: false # Activate vectorizing previous conversations skills_lib_database_name: "default" # Default skills database diff --git a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml index a7c26d1..90094fb 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 118 +version: 130 binding_name: null model_name: null model_variant: null @@ -43,6 +43,7 @@ end_ai_header_id_template: ": " end_ai_message_id_template: "" system_message_template: "system" +use_continue_message: true seed: -1 ctx_size: 4084 @@ -76,6 +77,9 @@ discussion_db_name: default # Automatic updates debug: false +debug_show_final_full_prompt: false +debug_show_chunks: false + debug_log_file_path: "" auto_update: true auto_sync_personalities: true @@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false # -------------------- Services global configurations -------------------------- # Select the active test to speach, text to image and speach to text services -active_tts_service: "None" # xtts (offline), openai_tts (API key required) +active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required) active_tti_service: "None" # autosd (offline), dall-e (online) active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required) active_ttm_service: "None" # musicgen (offline) @@ -150,12 +154,19 @@ xtts_top_k: 50 xtts_top_p: 0.85 xtts_speed: 1 xtts_enable_text_splitting: true +xtts_freq: 22050 # openai_whisper configuration openai_tts_key: "" openai_tts_model: "tts-1" openai_tts_voice: "alloy" + +elevenlabs_tts_key: "" +elevenlabs_tts_model_id: "eleven_monolingual_v2" +elevenlabs_tts_voice_stability: 0.5 +elevenlabs_tts_voice_boost: 0.5 +elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL # ***************** TTI ***************** use_negative_prompt: true @@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3" # Midjourney service key midjourney_key: "" +midjourney_timeout: 300 +midjourney_retries: 1 # Image generation service comfyui enable_comfyui_service: false @@ -205,8 +218,7 @@ petals_device: cuda # lollms service enable_lollms_service: false -lollms_base_url: http://localhost:1234 -lollms_access_keys : "" # set a list of keys separated by coma to restrict access +lollms_access_keys : [] # set a list of keys separated by coma to restrict access activate_lollms_server: true activate_ollama_emulator: true activate_openai_emulator: true @@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk +rag_overlap: 0 # number of tokens of overlap + rag_n_chunks: 4 #Number of chunks to recover from the database rag_clean_chunks: true #Removed all uinecessary spaces and line returns rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format +rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data +rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens +rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag + +contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary activate_skills_lib: false # Activate vectorizing previous conversations skills_lib_database_name: "default" # Default skills database diff --git a/lollms/app.py b/lollms/app.py index 30e08ed..95987f3 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -176,7 +176,15 @@ class LollmsApplication(LoLLMsCom): def restore_trust_store(self): if self.bk_store is not None: os.environ['REQUESTS_CA_BUNDLE'] = self.bk_store - + + def model_path_to_binding_model(self, model_path:str): + parts = model_path.strip().split("::") + if len(parts)<2: + raise Exception("Model path is not in the format binding:model_name!") + binding = parts[0] + model_name = parts[1] + return binding, model_name + def select_model(self, binding_name, model_name): self.config["binding_name"] = binding_name self.config["model_name"] = model_name diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml index 49a783a..5994723 100644 --- a/lollms/configs/config.yaml +++ b/lollms/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 129 +version: 131 binding_name: null model_name: null model_variant: null @@ -163,7 +163,7 @@ openai_tts_voice: "alloy" elevenlabs_tts_key: "" -elevenlabs_tts_model_id: "eleven_monolingual_v1" +elevenlabs_tts_model_id: "eleven_monolingual_v2" elevenlabs_tts_voice_stability: 0.5 elevenlabs_tts_voice_boost: 0.5 elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL @@ -218,13 +218,23 @@ petals_device: cuda # lollms service enable_lollms_service: false -lollms_base_url: http://localhost:1234 -lollms_access_keys : "" # set a list of keys separated by coma to restrict access +lollms_access_keys : [] # set a list of keys separated by coma to restrict access activate_lollms_server: true +activate_lollms_rag_server: true +activate_lollms_tts_server: true +activate_lollms_stt_server: true +activate_lollms_tti_server: true +activate_lollms_itt_server: true +activate_lollms_ttm_server: true activate_ollama_emulator: true activate_openai_emulator: true activate_mistralai_emulator: true +use_smart_routing: false +smart_routing_router_model : "" +smart_routing_models_by_power : [] + + # elastic search service elastic_search_service: false elastic_search_url: http://localhost:9200 diff --git a/lollms/databases/discussions_database.py b/lollms/databases/discussions_database.py index a9e52e4..f7a78f1 100644 --- a/lollms/databases/discussions_database.py +++ b/lollms/databases/discussions_database.py @@ -211,6 +211,16 @@ class DiscussionsDB: self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (last_discussion_id,), fetch_all=False) return Discussion(self.lollms, last_discussion_id, self) + def load_discussion_by_id(self, discussion_id): + # Fetch the discussion by the provided discussion_id + discussion_data = self.select("SELECT * FROM discussion WHERE id=?", (discussion_id,), fetch_all=False) + if discussion_data is None: + raise ValueError("Discussion not found with the provided ID.") + + # Assuming discussion_data returns a tuple or list with the necessary data + self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (discussion_id,), fetch_all=False) + return Discussion(self.lollms, discussion_id, self) + def create_discussion(self, title="untitled"): """Creates a new discussion diff --git a/lollms/personality.py b/lollms/personality.py index 26595ce..fe4ef87 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -3404,6 +3404,23 @@ class APScript(StateMachine): return paths + def update_section(self, content, section_name, new_code): + # Define patterns for HTML, JavaScript, and CSS sections + html_pattern = re.compile(f".*?", re.DOTALL) + js_css_pattern = re.compile(f"// section_start: {section_name}.*?// section_end: {section_name}", re.DOTALL) + + # Try to replace HTML section + updated_content, html_replacements = re.subn(html_pattern, f"\n{new_code}\n", content) + + # If HTML replacement didn't occur, try JavaScript/CSS section + if html_replacements == 0: + updated_content, js_css_replacements = re.subn(js_css_pattern, f"// section_start: {section_name}\n{new_code}\n// section_end: {section_name}", content) + + if js_css_replacements == 0: + return content, False # Section not found + + return updated_content, True # Section updated successfully + def extract_code_blocks(self, text: str) -> List[dict]: """ This function extracts code blocks from a given text. @@ -3445,6 +3462,7 @@ class APScript(StateMachine): block_infos = { 'index': index, 'file_name': "", + 'section': "", 'content': "", 'type': "" } @@ -3452,10 +3470,13 @@ class APScript(StateMachine): # Check the preceding line for file name preceding_text = text[:code_delimiter_position].strip().splitlines() if preceding_text: - last_line = preceding_text[-1] + last_line = preceding_text[-1].strip() if last_line.startswith("") and last_line.endswith(""): file_name = last_line[len(""):-len("")].strip() block_infos['file_name'] = file_name + if last_line.startswith("
") and last_line.endswith("
"): + section = last_line[len("
"):-len("
")].strip() + block_infos['section'] = section sub_text = text[code_delimiter_position + 3:] if len(sub_text) > 0: @@ -3477,7 +3498,7 @@ class APScript(StateMachine): block_infos["type"] = sub_text[:next_index] next_pos = indices[index + 1] - code_delimiter_position - if sub_text[next_pos - 3] == "`": + if next_pos - 3 Optional[User]: + conn = sqlite3.connect(str(users_db_path)) + cursor = conn.cursor() + cursor.execute("SELECT * FROM users WHERE username = ?", (username,)) + user_data = cursor.fetchone() + conn.close() + if user_data: + return User(id=user_data[0], username=user_data[1], email=user_data[2], password=user_data[3], last_activity=user_data[4], database_name=user_data[5]) + return None + +def create_user(username: str, email: str, password: str, database_name: str): + conn = sqlite3.connect(str(users_db_path)) + cursor = conn.cursor() + try: + cursor.execute("INSERT INTO users (username, email, password, last_activity, database_name) VALUES (?, ?, ?, ?, ?)", + (username, email, password, time.time(), database_name)) + conn.commit() + except sqlite3.IntegrityError: + conn.close() + raise HTTPException(status_code=400, detail="Username already exists") + conn.close() + +def update_user_activity(username: str): + conn = sqlite3.connect(str(users_db_path)) + cursor = conn.cursor() + cursor.execute("UPDATE users SET last_activity = ? WHERE username = ?", (time.time(), username)) + conn.commit() + conn.close() + +def authenticate_user(username: str, password: str) -> Optional[str]: + user = get_user(username) + if user and user.password == password: + token = secrets.token_urlsafe(32) + expiry = time.time() + 3600 # Token valid for 1 hour + user_tokens[token] = UserToken(token=token, expiry=expiry) + update_user_activity(username) + return token + return None + +async def get_current_user(token: str = Header(...)): + if token not in user_tokens or user_tokens[token].expiry < time.time(): + raise HTTPException(status_code=401, detail="Invalid or expired token") + return token + +# ----------------------- Lifespan Event Handler ------------------------------ + +@asynccontextmanager +async def lifespan(app): + # Startup + init_users_db() + yield + +# Add this lifespan event handler to your FastAPI app +# app.router.lifespan_context = lifespan + +# ----------------------- Endpoints ------------------------------ + +@router.post("/register", response_model=User) +async def register(user: UserAuth): + # Generate a unique database name for the user + database_name = f"{user.username}_db.sqlite" + create_user(user.username, user.email, user.password, database_name) + return get_user(user.username) + +@router.post("/login", response_model=UserToken) +async def login(user: UserAuth): + token = authenticate_user(user.username, user.password) + if not token: + raise HTTPException(status_code=401, detail="Invalid username or password") + + user_data = get_user(user.username) + if not user_data: + raise HTTPException(status_code=404, detail="User not found") + + # Load the user's discussion database + user_discussion_db = DiscussionsDB(lollmsElfServer.lollms_paths, user_data.database_name) + discussion = user_discussion_db.load_discussion_by_id(user_data.id) # Assuming ID is used to load the discussion + lollmsElfServer.session.add_client(token, 0, discussion, user_discussion_db) + + return UserToken(token=token, expiry=user_tokens[token].expiry) + +@router.get("/current_user", response_model=User) +async def current_user(token: str = Depends(get_current_user)): + for user_token in user_tokens.values(): + if user_token.token == token: + user = get_user(user_token.token) # Assuming token is the username for simplicity + if user: + return user + raise HTTPException(status_code=404, detail="User not found") + +# Add the router to your FastAPI app +# app.include_router(router) diff --git a/lollms/server/endpoints/lollms_rag.py b/lollms/server/endpoints/lollms_rag.py new file mode 100644 index 0000000..4d32342 --- /dev/null +++ b/lollms/server/endpoints/lollms_rag.py @@ -0,0 +1,153 @@ +""" +project: lollms_webui +file: lollms_rag.py +author: ParisNeo +description: + This module contains a set of FastAPI routes that allow users to interact with the RAG (Retrieval-Augmented Generation) library. + +Usage: + 1. Initialize the RAG system by adding documents using the /add_document endpoint. + 2. Build the index using the /index_database endpoint. + 3. Perform searches using the /search endpoint. + 4. Remove documents using the /remove_document/{document_id} endpoint. + 5. Wipe the entire database using the /wipe_database endpoint. + +Authentication: + - If lollms_access_keys are specified in the configuration, API key authentication is required. + - If no keys are specified, authentication is bypassed, and all users are treated as user ID 1. + +User Management: + - Each user gets a unique vectorizer based on their API key. + - If no API keys are specified, all requests are treated as coming from user ID 1. + +Note: Ensure proper security measures are in place when deploying this API in a production environment. +""" + +from fastapi import APIRouter, Request, HTTPException, Depends, Header +from lollms_webui import LOLLMSWebUI +from pydantic import BaseModel, Field +from starlette.responses import StreamingResponse +from lollms.types import MSG_TYPE +from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception +from lollms.security import sanitize_path, check_access +from ascii_colors import ASCIIColors +from lollms.databases.discussions_database import DiscussionsDB, Discussion +from typing import List, Optional, Union +from pathlib import Path +from fastapi.security import APIKeyHeader +from lollmsvectordb.database_elements.chunk import Chunk +from lollmsvectordb.vector_database import VectorDatabase +from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer +from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer +import sqlite3 +import secrets +import time +import shutil +import os +from datetime import datetime, timedelta +import asyncio +from contextlib import asynccontextmanager +import hashlib + +# ----------------------- Defining router and main class ------------------------------ + +router = APIRouter() +lollmsElfServer: LOLLMSWebUI = LOLLMSWebUI.get_instance() +api_key_header = APIKeyHeader(name="Authorization") + +# ----------------------- RAG System ------------------------------ + +class RAGQuery(BaseModel): + query: str = Field(..., description="The query to process using RAG") + +class RAGResponse(BaseModel): + answer: str = Field(..., description="The generated answer") + sources: List[str] = Field(..., description="List of sources used for the answer") + +class IndexDocument(BaseModel): + title: str = Field(..., description="The title of the document") + content: str = Field(..., description="The content to be indexed") + path: str = Field(default="unknown", description="The path of the document") + +class IndexResponse(BaseModel): + success: bool = Field(..., description="Indicates if the indexing was successful") + message: str = Field(..., description="Additional information about the indexing process") + +class DocumentResponse(BaseModel): + success: bool + message: str + +class RAGChunk(BaseModel): + id : int + chunk_id : int + doc_title : str + doc_path : str + text : str + nb_tokens : int + distance : float + +def get_user_id(bearer_key: str) -> int: + """ + Determine the user ID based on the bearer key. + If no keys are specified in the configuration, always return 1. + """ + if not lollmsElfServer.config.lollms_access_keys: + return 1 + # Use the index of the key in the list as the user ID + try: + return lollmsElfServer.config.lollms_access_keys.index(bearer_key) + 1 + except ValueError: + raise HTTPException(status_code=403, detail="Invalid API Key") + +def get_user_vectorizer(user_id: int, bearer_key: str): + small_key = hashlib.md5(bearer_key.encode()).hexdigest()[:8] + user_folder = lollmsElfServer.lollms_paths / str(user_id) + user_folder.mkdir(parents=True, exist_ok=True) + return VectorDatabase( + str(user_folder / f"rag_db_{small_key}.sqlite"), + BERTVectorizer(lollmsElfServer.config.rag_vectorizer_model) if lollmsElfServer.config.rag_vectorizer == "bert" else TFIDFVectorizer(), + lollmsElfServer.model, + chunk_size=lollmsElfServer.config.rag_chunk_size, + overlap=lollmsElfServer.config.rag_overlap + ) + +async def get_current_user(bearer_token: str = Depends(api_key_header)): + if lollmsElfServer.config.lollms_access_keys: + if bearer_token not in lollmsElfServer.config.lollms_access_keys: + raise HTTPException(status_code=403, detail="Invalid API Key") + return bearer_token + +@router.post("/add_document", response_model=DocumentResponse) +async def add_document(doc: IndexDocument, user: str = Depends(get_current_user)): + user_id = get_user_id(user) + vectorizer = get_user_vectorizer(user_id, user) + vectorizer.add_document(title=doc.title, text=doc.content, path=doc.path) + return DocumentResponse(success=True, message="Document added successfully.") + +@router.post("/remove_document/{document_id}", response_model=DocumentResponse) +async def remove_document(document_id: int, user: str = Depends(get_current_user)): + user_id = get_user_id(user) + vectorizer = get_user_vectorizer(user_id, user) + # Logic to remove the document by ID + return DocumentResponse(success=True, message="Document removed successfully.") + +@router.post("/index_database", response_model=DocumentResponse) +async def index_database(user: str = Depends(get_current_user)): + user_id = get_user_id(user) + vectorizer = get_user_vectorizer(user_id, user) + vectorizer.build_index() + return DocumentResponse(success=True, message="Database indexed successfully.") + +@router.post("/search", response_model=List[RAGChunk]) +async def search(query: RAGQuery, user: str = Depends(get_current_user)): + user_id = get_user_id(user) + vectorizer = get_user_vectorizer(user_id, user) + chunks = vectorizer.search(query.query) + return [RAGChunk(c.id,c.chunk_id, c.doc.title, c.doc.path, c.text, c.nb_tokens, c.distance) for c in chunks] + +@router.delete("/wipe_database", response_model=DocumentResponse) +async def wipe_database(user: str = Depends(get_current_user)): + user_id = get_user_id(user) + user_folder = lollmsElfServer.lollms_paths / str(user_id) + shutil.rmtree(user_folder, ignore_errors=True) + return DocumentResponse(success=True, message="Database wiped successfully.") diff --git a/personal_data/configs/lollms_discord_local_config.yaml b/personal_data/configs/lollms_discord_local_config.yaml index a7c26d1..90094fb 100644 --- a/personal_data/configs/lollms_discord_local_config.yaml +++ b/personal_data/configs/lollms_discord_local_config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 118 +version: 130 binding_name: null model_name: null model_variant: null @@ -43,6 +43,7 @@ end_ai_header_id_template: ": " end_ai_message_id_template: "" system_message_template: "system" +use_continue_message: true seed: -1 ctx_size: 4084 @@ -76,6 +77,9 @@ discussion_db_name: default # Automatic updates debug: false +debug_show_final_full_prompt: false +debug_show_chunks: false + debug_log_file_path: "" auto_update: true auto_sync_personalities: true @@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false # -------------------- Services global configurations -------------------------- # Select the active test to speach, text to image and speach to text services -active_tts_service: "None" # xtts (offline), openai_tts (API key required) +active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required) active_tti_service: "None" # autosd (offline), dall-e (online) active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required) active_ttm_service: "None" # musicgen (offline) @@ -150,12 +154,19 @@ xtts_top_k: 50 xtts_top_p: 0.85 xtts_speed: 1 xtts_enable_text_splitting: true +xtts_freq: 22050 # openai_whisper configuration openai_tts_key: "" openai_tts_model: "tts-1" openai_tts_voice: "alloy" + +elevenlabs_tts_key: "" +elevenlabs_tts_model_id: "eleven_monolingual_v2" +elevenlabs_tts_voice_stability: 0.5 +elevenlabs_tts_voice_boost: 0.5 +elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL # ***************** TTI ***************** use_negative_prompt: true @@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3" # Midjourney service key midjourney_key: "" +midjourney_timeout: 300 +midjourney_retries: 1 # Image generation service comfyui enable_comfyui_service: false @@ -205,8 +218,7 @@ petals_device: cuda # lollms service enable_lollms_service: false -lollms_base_url: http://localhost:1234 -lollms_access_keys : "" # set a list of keys separated by coma to restrict access +lollms_access_keys : [] # set a list of keys separated by coma to restrict access activate_lollms_server: true activate_ollama_emulator: true activate_openai_emulator: true @@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk +rag_overlap: 0 # number of tokens of overlap + rag_n_chunks: 4 #Number of chunks to recover from the database rag_clean_chunks: true #Removed all uinecessary spaces and line returns rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format +rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data +rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens +rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag + +contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary activate_skills_lib: false # Activate vectorizing previous conversations skills_lib_database_name: "default" # Default skills database