lollms/configs/config.yaml

# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 154

# video viewing and news recovering
last_viewed_video: null
last_viewed_changelog_version: null

binding_name: null
model_name: null
model_variant: null
model_type: null

show_news_panel: false

# Security measures
turn_on_setting_update_validation: true
turn_on_code_execution: true
turn_on_code_validation: true
turn_on_open_file_validation: true
turn_on_send_file_validation: true
turn_on_language_validation: true

force_accept_remote_access: false

# Server information
headless_server_mode: false
allowed_origins: []

# Host information
host: localhost
port: 9600

app_custom_logo: ""

# Genreration parameters
discussion_prompt_separator: "!@>"
start_header_id_template: "!@>"
end_header_id_template: ": "

separator_template: "\n"

start_user_header_id_template: "!@>"
end_user_header_id_template: ": "
end_user_message_id_template: ""

start_ai_header_id_template: "!@>"
end_ai_header_id_template: ": "
end_ai_message_id_template: ""

system_message_template: "system"
use_continue_message: true

seed: -1
ctx_size: 4084
max_n_predict: 4084
min_n_predict: 1024
temperature: 0.9
top_k: 50
top_p: 0.95
repeat_last_n: 40
repeat_penalty: 1.2
num_experts_per_token: 2

n_threads: 8

#Personality parameters
personalities: ["generic/lollms"]
active_personality_id: 0
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)

extensions: []

user_name: user
user_description: ""
use_assistant_name_in_discussion: false
use_user_name_in_discussions: false
use_model_name_in_discussions: false
user_avatar: null
use_user_informations_in_discussion: false

# UI parameters
discussion_db_name: default

# Automatic updates
debug: false
debug_show_final_full_prompt: false
debug_show_chunks: false

debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
auto_sync_extensions: true
auto_sync_bindings: true
auto_sync_models: true


auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true

# copy to clipboard
copy_to_clipboard_add_all_details: false

# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts, fish_tts (API key required)
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
active_ttv_service: "None" # cog_video_x, diffusers, lumalab (offline)
# -------------------- Services --------------------------

# ***************** STT *****************
stt_input_device: 0


# STT service
stt_listening_threshold: 1000
stt_silence_duration: 2
stt_sound_threshold_percentage: 10
stt_gain: 1.0
stt_rate: 44100
stt_channels: 1
stt_buffer_size: 10

stt_activate_word_detection: false
stt_word_detection_file: null


# ASR STT service
asr_enable: false
asr_base_url: http://localhost:9000

# openai_whisper configuration
openai_whisper_key: ""
openai_whisper_model: "whisper-1"


# whisper configuration
whisper_activate: false
whisper_model: base


# ***************** TTS *****************
tts_output_device: 0

# Voice service
auto_read: false
xtts_current_voice: null
xtts_current_language: en
xtts_stream_chunk_size: 100
xtts_temperature: 0.75
xtts_length_penalty: 1.0
xtts_repetition_penalty: 5.0
xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
xtts_freq: 22050

# openai_whisper configuration
openai_tts_key: ""
openai_tts_model: "tts-1"
openai_tts_voice: "alloy"


elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_turbo_v2_5"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL

fish_tts_key: ""
fish_tts_voice: "default"

# ***************** TTI *****************

use_negative_prompt: true
use_ai_generated_negative_prompt: false
negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))

# Image generation service
enable_sd_service: false
sd_base_url: http://localhost:7860

# Image generation service
enable_fooocus_service: false
fooocus_base_url: http://localhost:7860

# diffusers
diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
diffusers_model: v2ray/stable-diffusion-3-medium-diffusers

# diffusers client
diffusers_client_base_url: http://localhost:8593


# Dall e service key
dall_e_key: ""
dall_e_generation_engine: "dall-e-3"

# Midjourney service key
midjourney_key: ""
midjourney_timeout: 300
midjourney_retries: 1

# Image generation service comfyui
enable_comfyui_service: false
comfyui_base_url: http://127.0.0.1:8188/
comfyui_model: v1-5-pruned-emaonly.ckpt

# Motion control service
enable_motion_ctrl_service: false
motion_ctrl_base_url: http://localhost:7861


# ***************** TTV *****************
cog_video_x_model: "THUDM/CogVideoX-5b"

# lumalabs configuration
lumalabs_key: ""

# ***************** TTT *****************

# ollama service
enable_ollama_service: false
ollama_base_url: http://localhost:11434

# petals service
enable_petals_service: false
petals_base_url: http://localhost:8064
petals_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
petals_device: cuda

# lollms service
enable_lollms_service: false
lollms_access_keys : [] # set a list of keys separated by coma to restrict access
activate_lollms_server: true
activate_lollms_rag_server: true
activate_lollms_tts_server: true
activate_lollms_stt_server: true
activate_lollms_tti_server: true
activate_lollms_itt_server: true
activate_lollms_ttm_server: true
activate_ollama_emulator: true
activate_openai_emulator: true
activate_mistralai_emulator: true

use_smart_routing: false
smart_routing_router_model : ""
smart_routing_models_description : {}
restore_model_after_smart_routing : false


# elastic search service
elastic_search_service: false
elastic_search_url: http://localhost:9200

# vll service
enable_vllm_service: false
vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
vllm_max_num_seqs: 256


# Audio
media_on: false
audio_in_language: 'en-US'
auto_speak: false
audio_out_voice: null
audio_pitch: 1
audio_auto_send_input: true
audio_silenceTimer: 5000

# relmote databases
# This is the list of datalakes to be used for RAG
# Datalakes hae the following entries
#
datalakes: []

# Data vectorization
rag_local_services: [] # This is the list of rag services served locally

rag_vectorizer: semantic # possible values semantic, tfidf, openai, ollama
rag_service_url: "http://localhost:11434" # rag service url for ollama
rag_vectorizer_model:  "BAAI/bge-m3" # The model name if applicable
rag_vectorizer_execute_remote_code:  false # do not execute remote code or do
rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap
rag_min_correspondance: 0 # minimum correspondance between the query and the content

rag_n_chunks: 4 #Number of chunks to recover from the database
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
rag_max_n_hops: 3 #We set the maximum number of hop in multi hops rag

rag_deactivate: false # if you have a large context model, you can activate this to use your document as a whole
rag_vectorizer_openai_key: "" # The open ai key (if not provided, this will use the environment varaible OPENAI_API_KEY)

contextual_summary: false #If activated this will completely replace the rag and instead will use contextual summary

activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

max_summary_size: 512 # in tokens

rag_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
rag_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.

# Activate internet search
activate_internet_search: false
activate_internet_pages_judgement: true
internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 0 # overlap between chunks size
internet_vectorization_nb_chunks: 4 # number of chunks to use
internet_nb_search_pages: 8 # number of pages to select
internet_quick_search: false # If active the search engine will not load and read the webpages
internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
# Helpers
pdf_latex_path: null

# boosting information
positive_boost: null
negative_boost: null
current_language: english
fun_mode: false
think_first_mode: false
thinking_prompt:   "<thinking>
  Begin by thoroughly analyzing the user's request.
  If the user is seeking general information that does not require an internet search and you are confident in your knowledge, prepare a direct response.
  If the user is asking for general information that requires an internet search, use any relevant information from the provided context to formulate your answer.
  If there is no relevant information in the context, politely ask the user to enable internet search.

  When faced with riddles or math problems, employ rigorous hypothesis testing and analysis.
  For tasks or requests that require planning, break them down into logical steps and prepare your response accordingly.
  If the conversation is casual, refrain from using the think-first process.

  Continue thinking until you arrive at a satisfactory answer.
  Evaluate potential errors and strive to avoid them.
  Consider multiple expert perspectives and approaches to ensure a well-rounded response.
  If necessary, revisit your initial assumptions and revise them based on new insights.
  Ensure that your answer is clear, concise, and free of ambiguity.
  If the question is ambiguous, ask for clarification before providing a response.
  If you are unsure about any aspect of the question, admit your uncertainty and offer to seek additional information.
  </thinking>
  "


function_calls: []
# { name: the function name,
#   value: the function name without spaces,
#   selected: selected or not,
#   icon: the icon in form feather:icon name or img:url or b64:base64,
#   help: the help
# }

# webui configurations
show_code_of_conduct: true
activate_audio_infos: true