Added smart model routing

This commit is contained in:
Saifeddine ALOUI 2024-08-06 11:46:42 +02:00
parent 501ad14f66
commit 53edc1e8ce
12 changed files with 626 additions and 50 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 129
version: 131
binding_name: null
model_name: null
model_variant: null
@ -163,7 +163,7 @@ openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v1"
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
@ -218,13 +218,23 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_lollms_rag_server: true
activate_lollms_tts_server: true
activate_lollms_stt_server: true
activate_lollms_tti_server: true
activate_lollms_itt_server: true
activate_lollms_ttm_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
activate_mistralai_emulator: true
use_smart_routing: false
smart_routing_router_model : ""
smart_routing_models_by_power : []
# elastic search service
elastic_search_service: false
elastic_search_url: http://localhost:9200

View File

@ -1,35 +1,54 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 81
version: 130
binding_name: null
model_name: null
model_variant: null
model_type: null
show_news_panel: True
show_news_panel: true
# Security measures
turn_on_setting_update_validation: True
turn_on_code_execution: True
turn_on_code_validation: True
turn_on_open_file_validation: False
turn_on_send_file_validation: False
turn_on_setting_update_validation: true
turn_on_code_execution: true
turn_on_code_validation: true
turn_on_open_file_validation: true
turn_on_send_file_validation: true
turn_on_language_validation: true
force_accept_remote_access: false
# Server information
headless_server_mode: False
headless_server_mode: false
allowed_origins: []
# Host information
host: localhost
port: 9600
app_custom_logo: ""
# Genreration parameters
discussion_prompt_separator: "!@>"
start_header_id_template: "!@>"
end_header_id_template: ": "
separator_template: "\n"
start_user_header_id_template: "!@>"
end_user_header_id_template: ": "
end_user_message_id_template: ""
start_ai_header_id_template: "!@>"
end_ai_header_id_template: ": "
end_ai_message_id_template: ""
system_message_template: "system"
use_continue_message: true
seed: -1
ctx_size: 4084
max_n_predict: 4096
min_n_predict: 512
min_n_predict: 1024
temperature: 0.9
top_k: 50
top_p: 0.95
@ -50,14 +69,17 @@ user_name: user
user_description: ""
use_user_name_in_discussions: false
use_model_name_in_discussions: false
user_avatar: default_user.svg
user_avatar: null
use_user_informations_in_discussion: false
# UI parameters
discussion_db_name: default
# Automatic updates
debug: False
debug: false
debug_show_final_full_prompt: false
debug_show_chunks: false
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
@ -77,23 +99,113 @@ auto_show_browser: true
# copy to clipboard
copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tti_service: "None" # autosd (offline), dall-e (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
# -------------------- Services --------------------------
# ***************** STT *****************
stt_input_device: 0
# STT service
stt_listening_threshold: 1000
stt_silence_duration: 2
stt_sound_threshold_percentage: 10
stt_gain: 1.0
stt_rate: 44100
stt_channels: 1
stt_buffer_size: 10
stt_activate_word_detection: false
stt_word_detection_file: null
# ASR STT service
asr_enable: false
asr_base_url: http://localhost:9000
# openai_whisper configuration
openai_whisper_key: ""
openai_whisper_model: "whisper-1"
# whisper configuration
whisper_activate: false
whisper_model: base
# ***************** TTS *****************
tts_output_device: 0
# Voice service
auto_read: false
xtts_current_voice: null
xtts_current_language: en
xtts_stream_chunk_size: 100
xtts_temperature: 0.75
xtts_length_penalty: 1.0
xtts_repetition_penalty: 5.0
xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
xtts_freq: 22050
# openai_whisper configuration
openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
# ***************** TTI *****************
use_negative_prompt: true
use_ai_generated_negative_prompt: false
negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))
# Image generation service
enable_sd_service: false
sd_base_url: http://localhost:7860
# Image generation service
enable_fooocus_service: false
fooocus_base_url: http://localhost:7860
# diffuser
diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
# Dall e service key
dall_e_key: ""
dall_e_generation_engine: "dall-e-3"
# Midjourney service key
midjourney_key: ""
midjourney_timeout: 300
midjourney_retries: 1
# Image generation service comfyui
enable_comfyui_service: false
comfyui_base_url: http://127.0.0.1:8188/
comfyui_model: v1-5-pruned-emaonly.ckpt
# Motion control service
enable_motion_ctrl_service: false
motion_ctrl_base_url: http://localhost:7861
# ***************** TTT *****************
# ollama service
enable_ollama_service: false
ollama_base_url: http://localhost:11434
@ -106,7 +218,11 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
activate_mistralai_emulator: true
# elastic search service
elastic_search_service: false
@ -131,13 +247,29 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
rag_vectorizer: bert # possible values bert, tfidf, word2vec
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summarize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"
@ -154,20 +286,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
# Activate internet search
activate_internet_search: false
activate_internet_pages_judgement: true
internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select
internet_quick_search: False # If active the search engine will not load and read the webpages
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
internet_vectorization_overlap_size: 0 # overlap between chunks size
internet_vectorization_nb_chunks: 4 # number of chunks to use
internet_nb_search_pages: 8 # number of pages to select
internet_quick_search: false # If active the search engine will not load and read the webpages
internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
# Helpers
pdf_latex_path: null
# boosting information
positive_boost: null
negative_boost: null
current_language: null
fun_mode: False
current_language: english
fun_mode: false
# webui configurations
@ -175,5 +308,3 @@ show_code_of_conduct: true
activate_audio_infos: true
# whisper configuration
whisper_model: base

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 118
version: 130
binding_name: null
model_name: null
model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
end_ai_message_id_template: ""
system_message_template: "system"
use_continue_message: true
seed: -1
ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default
# Automatic updates
debug: false
debug_show_final_full_prompt: false
debug_show_chunks: false
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tti_service: "None" # autosd (offline), dall-e (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
xtts_freq: 22050
# openai_whisper configuration
openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
# ***************** TTI *****************
use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"
# Midjourney service key
midjourney_key: ""
midjourney_timeout: 300
midjourney_retries: 1
# Image generation service comfyui
enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 118
version: 130
binding_name: null
model_name: null
model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
end_ai_message_id_template: ""
system_message_template: "system"
use_continue_message: true
seed: -1
ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default
# Automatic updates
debug: false
debug_show_final_full_prompt: false
debug_show_chunks: false
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tti_service: "None" # autosd (offline), dall-e (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
xtts_freq: 22050
# openai_whisper configuration
openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
# ***************** TTI *****************
use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"
# Midjourney service key
midjourney_key: ""
midjourney_timeout: 300
midjourney_retries: 1
# Image generation service comfyui
enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

View File

@ -176,7 +176,15 @@ class LollmsApplication(LoLLMsCom):
def restore_trust_store(self):
if self.bk_store is not None:
os.environ['REQUESTS_CA_BUNDLE'] = self.bk_store
def model_path_to_binding_model(self, model_path:str):
parts = model_path.strip().split("::")
if len(parts)<2:
raise Exception("Model path is not in the format binding:model_name!")
binding = parts[0]
model_name = parts[1]
return binding, model_name
def select_model(self, binding_name, model_name):
self.config["binding_name"] = binding_name
self.config["model_name"] = model_name

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 129
version: 131
binding_name: null
model_name: null
model_variant: null
@ -163,7 +163,7 @@ openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v1"
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
@ -218,13 +218,23 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_lollms_rag_server: true
activate_lollms_tts_server: true
activate_lollms_stt_server: true
activate_lollms_tti_server: true
activate_lollms_itt_server: true
activate_lollms_ttm_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
activate_mistralai_emulator: true
use_smart_routing: false
smart_routing_router_model : ""
smart_routing_models_by_power : []
# elastic search service
elastic_search_service: false
elastic_search_url: http://localhost:9200

View File

@ -211,6 +211,16 @@ class DiscussionsDB:
self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (last_discussion_id,), fetch_all=False)
return Discussion(self.lollms, last_discussion_id, self)
def load_discussion_by_id(self, discussion_id):
# Fetch the discussion by the provided discussion_id
discussion_data = self.select("SELECT * FROM discussion WHERE id=?", (discussion_id,), fetch_all=False)
if discussion_data is None:
raise ValueError("Discussion not found with the provided ID.")
# Assuming discussion_data returns a tuple or list with the necessary data
self.current_message_id = self.select("SELECT id FROM message WHERE discussion_id=? ORDER BY id DESC LIMIT 1", (discussion_id,), fetch_all=False)
return Discussion(self.lollms, discussion_id, self)
def create_discussion(self, title="untitled"):
"""Creates a new discussion

View File

@ -3404,6 +3404,23 @@ class APScript(StateMachine):
return paths
def update_section(self, content, section_name, new_code):
# Define patterns for HTML, JavaScript, and CSS sections
html_pattern = re.compile(f"<!-- section_start: {section_name} -->.*?<!-- section_end: {section_name} -->", re.DOTALL)
js_css_pattern = re.compile(f"// section_start: {section_name}.*?// section_end: {section_name}", re.DOTALL)
# Try to replace HTML section
updated_content, html_replacements = re.subn(html_pattern, f"<!-- section_start: {section_name} -->\n{new_code}\n<!-- section_end: {section_name} -->", content)
# If HTML replacement didn't occur, try JavaScript/CSS section
if html_replacements == 0:
updated_content, js_css_replacements = re.subn(js_css_pattern, f"// section_start: {section_name}\n{new_code}\n// section_end: {section_name}", content)
if js_css_replacements == 0:
return content, False # Section not found
return updated_content, True # Section updated successfully
def extract_code_blocks(self, text: str) -> List[dict]:
"""
This function extracts code blocks from a given text.
@ -3445,6 +3462,7 @@ class APScript(StateMachine):
block_infos = {
'index': index,
'file_name': "",
'section': "",
'content': "",
'type': ""
}
@ -3452,10 +3470,13 @@ class APScript(StateMachine):
# Check the preceding line for file name
preceding_text = text[:code_delimiter_position].strip().splitlines()
if preceding_text:
last_line = preceding_text[-1]
last_line = preceding_text[-1].strip()
if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
file_name = last_line[len("<file_name>"):-len("</file_name>")].strip()
block_infos['file_name'] = file_name
if last_line.startswith("<section>") and last_line.endswith("</section>"):
section = last_line[len("<section>"):-len("</section>")].strip()
block_infos['section'] = section
sub_text = text[code_delimiter_position + 3:]
if len(sub_text) > 0:
@ -3477,7 +3498,7 @@ class APScript(StateMachine):
block_infos["type"] = sub_text[:next_index]
next_pos = indices[index + 1] - code_delimiter_position
if sub_text[next_pos - 3] == "`":
if next_pos - 3<len(sub_text) and sub_text[next_pos - 3] == "`":
block_infos["content"] = sub_text[start_pos:next_pos - 3].strip()
else:
block_infos["content"] = sub_text[start_pos:next_pos].strip()

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 118
version: 130
binding_name: null
model_name: null
model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
end_ai_message_id_template: ""
system_message_template: "system"
use_continue_message: true
seed: -1
ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default
# Automatic updates
debug: false
debug_show_final_full_prompt: false
debug_show_chunks: false
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tti_service: "None" # autosd (offline), dall-e (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
xtts_freq: 22050
# openai_whisper configuration
openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
# ***************** TTI *****************
use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"
# Midjourney service key
midjourney_key: ""
midjourney_timeout: 300
midjourney_retries: 1
# Image generation service comfyui
enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

View File

@ -0,0 +1,157 @@
"""
project: lollms_webui
file: lollms_authentication.py
author: ParisNeo
description:
This module contains a set of FastAPI routes that manages users authentication.
"""
from fastapi import APIRouter, Request, HTTPException, Depends, Header
from lollms_webui import LOLLMSWebUI
from pydantic import BaseModel, Field
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception
from lollms.security import sanitize_path, check_access
from ascii_colors import ASCIColors
from lollms.databases.discussions_database import DiscussionsDB, Discussion
from typing import List, Optional, Union
from pathlib import Path
from fastapi.security import APIKeyHeader
import sqlite3
import secrets
import time
import shutil
import os
from datetime import datetime, timedelta
import asyncio
from contextlib import asynccontextmanager
# ----------------------- Defining router and main class ------------------------------
router = APIRouter()
lollmsElfServer: LOLLMSWebUI = LOLLMSWebUI.get_instance()
# ----------------------- User Authentication and Management ------------------------------
class User(BaseModel):
id: int
username: str
email: str
password: str
last_activity: float
database_name: str # Added field for database name
class UserAuth(BaseModel):
username: str
password: str
email: str
class UserToken(BaseModel):
token: str
expiry: float
users_db_path = lollmsElfServer.lollms_paths.personal_configuration_path / "users.sqlite"
user_tokens = {}
def init_users_db():
conn = sqlite3.connect(str(users_db_path))
cursor = conn.cursor()
cursor.execute('''CREATE TABLE IF NOT EXISTS users
(id INTEGER PRIMARY KEY, username TEXT UNIQUE, email TEXT, password TEXT, last_activity REAL, database_name TEXT)''')
conn.commit()
conn.close()
def get_user(username: str) -> Optional[User]:
conn = sqlite3.connect(str(users_db_path))
cursor = conn.cursor()
cursor.execute("SELECT * FROM users WHERE username = ?", (username,))
user_data = cursor.fetchone()
conn.close()
if user_data:
return User(id=user_data[0], username=user_data[1], email=user_data[2], password=user_data[3], last_activity=user_data[4], database_name=user_data[5])
return None
def create_user(username: str, email: str, password: str, database_name: str):
conn = sqlite3.connect(str(users_db_path))
cursor = conn.cursor()
try:
cursor.execute("INSERT INTO users (username, email, password, last_activity, database_name) VALUES (?, ?, ?, ?, ?)",
(username, email, password, time.time(), database_name))
conn.commit()
except sqlite3.IntegrityError:
conn.close()
raise HTTPException(status_code=400, detail="Username already exists")
conn.close()
def update_user_activity(username: str):
conn = sqlite3.connect(str(users_db_path))
cursor = conn.cursor()
cursor.execute("UPDATE users SET last_activity = ? WHERE username = ?", (time.time(), username))
conn.commit()
conn.close()
def authenticate_user(username: str, password: str) -> Optional[str]:
user = get_user(username)
if user and user.password == password:
token = secrets.token_urlsafe(32)
expiry = time.time() + 3600 # Token valid for 1 hour
user_tokens[token] = UserToken(token=token, expiry=expiry)
update_user_activity(username)
return token
return None
async def get_current_user(token: str = Header(...)):
if token not in user_tokens or user_tokens[token].expiry < time.time():
raise HTTPException(status_code=401, detail="Invalid or expired token")
return token
# ----------------------- Lifespan Event Handler ------------------------------
@asynccontextmanager
async def lifespan(app):
# Startup
init_users_db()
yield
# Add this lifespan event handler to your FastAPI app
# app.router.lifespan_context = lifespan
# ----------------------- Endpoints ------------------------------
@router.post("/register", response_model=User)
async def register(user: UserAuth):
# Generate a unique database name for the user
database_name = f"{user.username}_db.sqlite"
create_user(user.username, user.email, user.password, database_name)
return get_user(user.username)
@router.post("/login", response_model=UserToken)
async def login(user: UserAuth):
token = authenticate_user(user.username, user.password)
if not token:
raise HTTPException(status_code=401, detail="Invalid username or password")
user_data = get_user(user.username)
if not user_data:
raise HTTPException(status_code=404, detail="User not found")
# Load the user's discussion database
user_discussion_db = DiscussionsDB(lollmsElfServer.lollms_paths, user_data.database_name)
discussion = user_discussion_db.load_discussion_by_id(user_data.id) # Assuming ID is used to load the discussion
lollmsElfServer.session.add_client(token, 0, discussion, user_discussion_db)
return UserToken(token=token, expiry=user_tokens[token].expiry)
@router.get("/current_user", response_model=User)
async def current_user(token: str = Depends(get_current_user)):
for user_token in user_tokens.values():
if user_token.token == token:
user = get_user(user_token.token) # Assuming token is the username for simplicity
if user:
return user
raise HTTPException(status_code=404, detail="User not found")
# Add the router to your FastAPI app
# app.include_router(router)

View File

@ -0,0 +1,153 @@
"""
project: lollms_webui
file: lollms_rag.py
author: ParisNeo
description:
This module contains a set of FastAPI routes that allow users to interact with the RAG (Retrieval-Augmented Generation) library.
Usage:
1. Initialize the RAG system by adding documents using the /add_document endpoint.
2. Build the index using the /index_database endpoint.
3. Perform searches using the /search endpoint.
4. Remove documents using the /remove_document/{document_id} endpoint.
5. Wipe the entire database using the /wipe_database endpoint.
Authentication:
- If lollms_access_keys are specified in the configuration, API key authentication is required.
- If no keys are specified, authentication is bypassed, and all users are treated as user ID 1.
User Management:
- Each user gets a unique vectorizer based on their API key.
- If no API keys are specified, all requests are treated as coming from user ID 1.
Note: Ensure proper security measures are in place when deploying this API in a production environment.
"""
from fastapi import APIRouter, Request, HTTPException, Depends, Header
from lollms_webui import LOLLMSWebUI
from pydantic import BaseModel, Field
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception
from lollms.security import sanitize_path, check_access
from ascii_colors import ASCIIColors
from lollms.databases.discussions_database import DiscussionsDB, Discussion
from typing import List, Optional, Union
from pathlib import Path
from fastapi.security import APIKeyHeader
from lollmsvectordb.database_elements.chunk import Chunk
from lollmsvectordb.vector_database import VectorDatabase
from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
import sqlite3
import secrets
import time
import shutil
import os
from datetime import datetime, timedelta
import asyncio
from contextlib import asynccontextmanager
import hashlib
# ----------------------- Defining router and main class ------------------------------
router = APIRouter()
lollmsElfServer: LOLLMSWebUI = LOLLMSWebUI.get_instance()
api_key_header = APIKeyHeader(name="Authorization")
# ----------------------- RAG System ------------------------------
class RAGQuery(BaseModel):
query: str = Field(..., description="The query to process using RAG")
class RAGResponse(BaseModel):
answer: str = Field(..., description="The generated answer")
sources: List[str] = Field(..., description="List of sources used for the answer")
class IndexDocument(BaseModel):
title: str = Field(..., description="The title of the document")
content: str = Field(..., description="The content to be indexed")
path: str = Field(default="unknown", description="The path of the document")
class IndexResponse(BaseModel):
success: bool = Field(..., description="Indicates if the indexing was successful")
message: str = Field(..., description="Additional information about the indexing process")
class DocumentResponse(BaseModel):
success: bool
message: str
class RAGChunk(BaseModel):
id : int
chunk_id : int
doc_title : str
doc_path : str
text : str
nb_tokens : int
distance : float
def get_user_id(bearer_key: str) -> int:
"""
Determine the user ID based on the bearer key.
If no keys are specified in the configuration, always return 1.
"""
if not lollmsElfServer.config.lollms_access_keys:
return 1
# Use the index of the key in the list as the user ID
try:
return lollmsElfServer.config.lollms_access_keys.index(bearer_key) + 1
except ValueError:
raise HTTPException(status_code=403, detail="Invalid API Key")
def get_user_vectorizer(user_id: int, bearer_key: str):
small_key = hashlib.md5(bearer_key.encode()).hexdigest()[:8]
user_folder = lollmsElfServer.lollms_paths / str(user_id)
user_folder.mkdir(parents=True, exist_ok=True)
return VectorDatabase(
str(user_folder / f"rag_db_{small_key}.sqlite"),
BERTVectorizer(lollmsElfServer.config.rag_vectorizer_model) if lollmsElfServer.config.rag_vectorizer == "bert" else TFIDFVectorizer(),
lollmsElfServer.model,
chunk_size=lollmsElfServer.config.rag_chunk_size,
overlap=lollmsElfServer.config.rag_overlap
)
async def get_current_user(bearer_token: str = Depends(api_key_header)):
if lollmsElfServer.config.lollms_access_keys:
if bearer_token not in lollmsElfServer.config.lollms_access_keys:
raise HTTPException(status_code=403, detail="Invalid API Key")
return bearer_token
@router.post("/add_document", response_model=DocumentResponse)
async def add_document(doc: IndexDocument, user: str = Depends(get_current_user)):
user_id = get_user_id(user)
vectorizer = get_user_vectorizer(user_id, user)
vectorizer.add_document(title=doc.title, text=doc.content, path=doc.path)
return DocumentResponse(success=True, message="Document added successfully.")
@router.post("/remove_document/{document_id}", response_model=DocumentResponse)
async def remove_document(document_id: int, user: str = Depends(get_current_user)):
user_id = get_user_id(user)
vectorizer = get_user_vectorizer(user_id, user)
# Logic to remove the document by ID
return DocumentResponse(success=True, message="Document removed successfully.")
@router.post("/index_database", response_model=DocumentResponse)
async def index_database(user: str = Depends(get_current_user)):
user_id = get_user_id(user)
vectorizer = get_user_vectorizer(user_id, user)
vectorizer.build_index()
return DocumentResponse(success=True, message="Database indexed successfully.")
@router.post("/search", response_model=List[RAGChunk])
async def search(query: RAGQuery, user: str = Depends(get_current_user)):
user_id = get_user_id(user)
vectorizer = get_user_vectorizer(user_id, user)
chunks = vectorizer.search(query.query)
return [RAGChunk(c.id,c.chunk_id, c.doc.title, c.doc.path, c.text, c.nb_tokens, c.distance) for c in chunks]
@router.delete("/wipe_database", response_model=DocumentResponse)
async def wipe_database(user: str = Depends(get_current_user)):
user_id = get_user_id(user)
user_folder = lollmsElfServer.lollms_paths / str(user_id)
shutil.rmtree(user_folder, ignore_errors=True)
return DocumentResponse(success=True, message="Database wiped successfully.")

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 118
version: 130
binding_name: null
model_name: null
model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
end_ai_message_id_template: ""
system_message_template: "system"
use_continue_message: true
seed: -1
ctx_size: 4084
@ -76,6 +77,9 @@ discussion_db_name: default
# Automatic updates
debug: false
debug_show_final_full_prompt: false
debug_show_chunks: false
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
@ -97,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tti_service: "None" # autosd (offline), dall-e (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -150,12 +154,19 @@ xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
xtts_freq: 22050
# openai_whisper configuration
openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
# ***************** TTI *****************
use_negative_prompt: true
@ -181,6 +192,8 @@ dall_e_generation_engine: "dall-e-3"
# Midjourney service key
midjourney_key: ""
midjourney_timeout: 300
midjourney_retries: 1
# Image generation service comfyui
enable_comfyui_service: false
@ -205,8 +218,7 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
@ -240,11 +252,18 @@ rag_vectorizer: bert # possible values bert, tfidf, word2vec
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag
contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database