Upgraded core code

2024-12-24 06:46:40 +00:00 · 2024-01-04 03:56:42 +01:00 · 2024-01-04 03:56:42 +01:00 · e77c97f238
commit e77c97f238
parent 52532df832
9 changed files with 599 additions and 169 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null
@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
-enable_gpu: true
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -1,51 +1,96 @@
-active_personality_id: 15
+# =================== Lord Of Large Language Models Configuration file =========================== 
-audio_auto_send_input: true
+version: 40
 audio_in_language: en-US
 audio_out_voice: Google UK English Female
 audio_pitch: '1'
 audio_silenceTimer: 5000
 auto_save: true
 auto_show_browser: true
 auto_speak: false
 auto_update: true
 binding_name: null
 model_name: null
-config: local_config
+
-ctx_size: 4090
+
-data_vectorization_activate: true
+# Host information
 data_vectorization_build_keys_words: true
 data_vectorization_chunk_size: 512
 data_vectorization_method: tfidf_vectorizer
 data_vectorization_nb_chunks: 3
 data_vectorization_overlap_size: 128
 data_vectorization_save_db: false
 data_visualization_method: PCA
 db_path: lollms.db
 debug: true
 discussion_prompt_separator: '!@>'
 enable_gpu: true
 extensions: []
 host: localhost
 min_n_predict: 256
 n_predict: 1024
 n_threads: 8
 override_personality_model_parameters: false
 personalities:
 - generic/lollms
 port: 9600
-repeat_last_n: 40
+
-repeat_penalty: 1.2
+# Genreration parameters 
 discussion_prompt_separator: "!@>"
 seed: -1
-temperature: '0.3'
+n_predict: 1024
 ctx_size: 4084
 min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: true
+repeat_last_n: 40
-use_files: true
+repeat_penalty: 1.2
 n_threads: 8
 #Personality parameters
 personalities: ["generic/lollms"]
 active_personality_id: 0
 override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
 extensions: []
 user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 user_avatar: default_user.svg
 use_user_informations_in_discussion: false
-use_user_name_in_discussions: true
+
-user_avatar: default_user
+# UI parameters
-user_description: 
+db_path: database.db
-user_name: User
+
-version: 26
+# Automatic updates
 debug: False
 auto_update: true
 auto_save: true
 auto_title: false
 # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
 hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
 # Voice service
 enable_voice_service: false
 xtts_base_url: http://127.0.0.1:8020
 auto_read: false
 current_voice: null
 current_language: en
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://127.0.0.1:7860
 # Audio
 media_on: false
 audio_in_language: 'en-US'
 auto_speak: false
 audio_out_voice: null
 audio_pitch: 1
 audio_auto_send_input: true
 audio_silenceTimer: 5000
 # Data vectorization
 use_discussions_history: false # Activate vectorizing previous conversations
 summerize_discussion: false # activate discussion summary (better but adds computation time)
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
 use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
 data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
 # Helpers
 pdf_latex_path: null
 # boosting information
 positive_boost: null
 negative_boost: null
 force_output_language_to_be: null
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -1,51 +1,96 @@
-active_personality_id: 15
+# =================== Lord Of Large Language Models Configuration file =========================== 
-audio_auto_send_input: true
+version: 40
 audio_in_language: en-US
 audio_out_voice: Google UK English Female
 audio_pitch: '1'
 audio_silenceTimer: 5000
 auto_save: true
 auto_show_browser: true
 auto_speak: false
 auto_update: true
 binding_name: null
 model_name: null
-config: local_config
+
-ctx_size: 4090
+
-data_vectorization_activate: true
+# Host information
 data_vectorization_build_keys_words: true
 data_vectorization_chunk_size: 512
 data_vectorization_method: tfidf_vectorizer
 data_vectorization_nb_chunks: 3
 data_vectorization_overlap_size: 128
 data_vectorization_save_db: false
 data_visualization_method: PCA
 db_path: lollms.db
 debug: true
 discussion_prompt_separator: '!@>'
 enable_gpu: true
 extensions: []
 host: localhost
 min_n_predict: 256
 n_predict: 1024
 n_threads: 8
 override_personality_model_parameters: false
 personalities:
 - generic/lollms
 port: 9600
-repeat_last_n: 40
+
-repeat_penalty: 1.2
+# Genreration parameters 
 discussion_prompt_separator: "!@>"
 seed: -1
-temperature: '0.3'
+n_predict: 1024
 ctx_size: 4084
 min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: true
+repeat_last_n: 40
-use_files: true
+repeat_penalty: 1.2
 n_threads: 8
 #Personality parameters
 personalities: ["generic/lollms"]
 active_personality_id: 0
 override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
 extensions: []
 user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 user_avatar: default_user.svg
 use_user_informations_in_discussion: false
-use_user_name_in_discussions: true
+
-user_avatar: default_user
+# UI parameters
-user_description: 
+db_path: database.db
-user_name: User
+
-version: 26
+# Automatic updates
 debug: False
 auto_update: true
 auto_save: true
 auto_title: false
 # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
 hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
 # Voice service
 enable_voice_service: false
 xtts_base_url: http://127.0.0.1:8020
 auto_read: false
 current_voice: null
 current_language: en
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://127.0.0.1:7860
 # Audio
 media_on: false
 audio_in_language: 'en-US'
 auto_speak: false
 audio_out_voice: null
 audio_pitch: 1
 audio_auto_send_input: true
 audio_silenceTimer: 5000
 # Data vectorization
 use_discussions_history: false # Activate vectorizing previous conversations
 summerize_discussion: false # activate discussion summary (better but adds computation time)
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
 use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
 data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
 # Helpers
 pdf_latex_path: null
 # boosting information
 positive_boost: null
 negative_boost: null
 force_output_language_to_be: null
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -1,38 +1,96 @@
-active_personality_id: 0
+# =================== Lord Of Large Language Models Configuration file =========================== 
-auto_save: true
+version: 40
 auto_update: false
 binding_name: null
 ctx_size: 4096
 data_vectorization_activate: true
 data_vectorization_build_keys_words: false
 data_vectorization_chunk_size: 512
 data_vectorization_method: ftidf_vectorizer
 data_vectorization_nb_chunks: 2
 data_vectorization_overlap_size: 128
 data_vectorization_save_db: false
 data_visualization_method: PCA
 debug: false
 discussion_prompt_separator: '!@>'
 enable_gpu: true
 extensions: []
 host: localhost
 min_n_predict: 256
 model_name: null
-n_predict: 1024
+
-n_threads: 8
+
-override_personality_model_parameters: false
+
-personalities:
+# Host information
- generic/lollms
+host: localhost
-port: 9601
+port: 9600
-repeat_last_n: 40
+
-repeat_penalty: 1.2
+# Genreration parameters 
 discussion_prompt_separator: "!@>"
 seed: -1
 n_predict: 1024
 ctx_size: 4084
 min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_files: true
+repeat_last_n: 40
-use_user_name_in_discussions: false
+repeat_penalty: 1.2
-user_avatar: default_user
+
-user_description: ''
+n_threads: 8
 #Personality parameters
 personalities: ["generic/lollms"]
 active_personality_id: 0
 override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
 extensions: []
 user_name: user
-version: 26
+user_description: ""
 use_user_name_in_discussions: false
 user_avatar: default_user.svg
 use_user_informations_in_discussion: false
 # UI parameters
 db_path: database.db
 # Automatic updates
 debug: False
 auto_update: true
 auto_save: true
 auto_title: false
 # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
 hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
 # Voice service
 enable_voice_service: false
 xtts_base_url: http://127.0.0.1:8020
 auto_read: false
 current_voice: null
 current_language: en
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://127.0.0.1:7860
 # Audio
 media_on: false
 audio_in_language: 'en-US'
 auto_speak: false
 audio_out_voice: null
 audio_pitch: 1
 audio_auto_send_input: true
 audio_silenceTimer: 5000
 # Data vectorization
 use_discussions_history: false # Activate vectorizing previous conversations
 summerize_discussion: false # activate discussion summary (better but adds computation time)
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
 use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
 data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
 # Helpers
 pdf_latex_path: null
 # boosting information
 positive_boost: null
 negative_boost: null
 force_output_language_to_be: null
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null
@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
-enable_gpu: true
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -452,9 +452,15 @@ Date: {{date}}
        return string
    def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
        if callback is None:
            callback = self.callback
        if text is None:
            return True
-        bot_says = self.bot_says + text
+        if message_type==MSG_TYPE.MSG_TYPE_CHUNK:
            bot_says = self.bot_says + text
        elif  message_type==MSG_TYPE.MSG_TYPE_FULL:
            bot_says = text
        if show_progress:
            if self.nb_received_tokens==0:
                self.start_time = datetime.now()
@ -475,7 +481,7 @@ Date: {{date}}
            return False
        else:
            if callback:
-                callback(text,MSG_TYPE.MSG_TYPE_CHUNK)
+                callback(text,message_type)
            self.bot_says = bot_says
            return True
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null
@ -7,7 +7,7 @@ model_name: null
 # Host information
 host: localhost
-port: 9601
+port: 9600
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
-enable_gpu: true
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
--- a/lollms/server/endpoints/lollms_generator.py
+++ b/lollms/server/endpoints/lollms_generator.py
@ -6,15 +6,85 @@ from lollms.types import MSG_TYPE
 from lollms.utilities import detect_antiprompt, remove_text_from_string
 from ascii_colors import ASCIIColors
 class GenerateRequest(BaseModel):
    """
    Data model for the Generate Request.
    Attributes:
    - text: str representing the input text prompt for text generation.
    - n_predict: int representing the number of predictions to generate.
    - stream: bool indicating whether to stream the generated text or not.
    - temperature: float representing the temperature parameter for text generation.
    - top_k: int representing the top_k parameter for text generation.
    - top_p: float representing the top_p parameter for text generation.
    - repeat_penalty: float representing the repeat_penalty parameter for text generation.
    - repeat_last_n: int representing the repeat_last_n parameter for text generation.
    - seed: int representing the seed for text generation.
    - n_threads: int representing the number of threads for text generation.
    """    
    text: str
    n_predict: int = 1024
    stream: bool = False
    temperature: float = 0.4
    top_k: int = 50
    top_p: float = 0.6
    repeat_penalty: float = 1.3
    repeat_last_n: int = 40
    seed: int = -1
    n_threads: int = 1
 class V1ChatGenerateRequest(BaseModel):
    """
    Data model for the V1 Chat Generate Request.
    Attributes:
    - model: str representing the model to be used for text generation.
    - messages: list of messages to be used as prompts for text generation.
    - stream: bool indicating whether to stream the generated text or not.
    - temperature: float representing the temperature parameter for text generation.
    - max_tokens: float representing the maximum number of tokens to generate.
    """    
    model: str
    messages: list
    stream: bool
    temperature: float
    max_tokens: float
 class V1InstructGenerateRequest(BaseModel):
    """
    Data model for the V1 Chat Generate Request.
    Attributes:
    - model: str representing the model to be used for text generation.
    - messages: list of messages to be used as prompts for text generation.
    - stream: bool indicating whether to stream the generated text or not.
    - temperature: float representing the temperature parameter for text generation.
    - max_tokens: float representing the maximum number of tokens to generate.
    """    
    model: str
    prompt: str
    stream: bool
    temperature: float
    max_tokens: float
 router = APIRouter()
 elf_server = LOLLMSElfServer.get_instance()
@router.post("/generate")
-def generate(request_data: GenerateRequest):
+def lollms_generate(request_data: GenerateRequest):
    """
    Endpoint for generating text from prompts using the lollms fastapi server.
    Args:
    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
    Returns:
    - If the elf_server binding is not None:
        - If stream is True, returns a StreamingResponse of generated text chunks.
        - If stream is False, returns the generated text as a string.
    - If the elf_server binding is None, returns None.
    """    
    text = request_data.text
    n_predict = request_data.n_predict
    stream = request_data.stream
@ -34,7 +104,18 @@ def generate(request_data: GenerateRequest):
                    else:
                        yield chunk
                        return True
-                return iter(elf_server.binding.generate(text, n_predict, callback=callback))
+                return iter(elf_server.binding.generate(
                                            text, 
                                            n_predict, 
                                            callback=callback, 
                                            temperature=request_data.temperature,
                                            top_k=request_data.top_k, 
                                            top_p=request_data.top_p,
                                            repeat_penalty=request_data.repeat_penalty,
                                            repeat_last_n=request_data.repeat_last_n,
                                            seed=request_data.seed,
                                            n_threads=request_data.n_threads
                                        ))
            return StreamingResponse(generate_chunks())
        else:
@ -49,7 +130,154 @@ def generate(request_data: GenerateRequest):
                    return False
                else:
                    return True
-            elf_server.binding.generate(text, n_predict, callback=callback)
+            elf_server.binding.generate(
                                            text, 
                                            n_predict, 
                                            callback=callback,
                                            temperature=request_data.temperature,
                                            top_k=request_data.top_k, 
                                            top_p=request_data.top_p,
                                            repeat_penalty=request_data.repeat_penalty,
                                            repeat_last_n=request_data.repeat_last_n,
                                            seed=request_data.seed,
                                            n_threads=request_data.n_threads
                                        )
            return output["text"]
    else:
        return None
 # openai compatible generation
@router.post("/v1/chat/completions")
 def v1_chat_generate(request_data: V1ChatGenerateRequest):
    """
    Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
    This endpoint is compatible with open ai API and mistralAI API
    Args:
    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
    Returns:
    - If the elf_server binding is not None:
        - If stream is True, returns a StreamingResponse of generated text chunks.
        - If stream is False, returns the generated text as a string.
    - If the elf_server binding is None, returns None.
    """    
    messages = request_data.messages
    text = ""
    for message in messages:
        text += f"{message['role']}: {message['content']}\n"
    n_predict = request_data.max_tokens
    stream = request_data.stream
    if elf_server.binding is not None:
        if stream:
            output = {"text":""}
            def generate_chunks():
                def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
                    # Yield each chunk of data
                    output["text"] += chunk
                    antiprompt = detect_antiprompt(output["text"])
                    if antiprompt:
                        ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
                        output["text"] = remove_text_from_string(output["text"],antiprompt)
                        return False
                    else:
                        yield chunk
                        return True
                return iter(elf_server.binding.generate(
                                            text, 
                                            n_predict, 
                                            callback=callback, 
                                            temperature=request_data.temperature
                                        ))
            return StreamingResponse(generate_chunks())
        else:
            output = {"text":""}
            def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
                # Yield each chunk of data
                output["text"] += chunk
                antiprompt = detect_antiprompt(output["text"])
                if antiprompt:
                    ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
                    output["text"] = remove_text_from_string(output["text"],antiprompt)
                    return False
                else:
                    return True
            elf_server.binding.generate(
                                            text, 
                                            n_predict, 
                                            callback=callback,
                                            temperature=request_data.temperature
                                        )
            return output["text"]
    else:
        return None
 # openai compatible generation
@router.post("/v1/completions")
 def v1_instruct_generate(request_data: V1InstructGenerateRequest):
    """
    Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
    This endpoint is compatible with open ai API and mistralAI API
    Args:
    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
    Returns:
    - If the elf_server binding is not None:
        - If stream is True, returns a StreamingResponse of generated text chunks.
        - If stream is False, returns the generated text as a string.
    - If the elf_server binding is None, returns None.
    """    
    text = request_data.prompt
    n_predict = request_data.max_tokens
    stream = request_data.stream
    if elf_server.binding is not None:
        if stream:
            output = {"text":""}
            def generate_chunks():
                def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
                    # Yield each chunk of data
                    output["text"] += chunk
                    antiprompt = detect_antiprompt(output["text"])
                    if antiprompt:
                        ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
                        output["text"] = remove_text_from_string(output["text"],antiprompt)
                        return False
                    else:
                        yield chunk
                        return True
                return iter(elf_server.binding.generate(
                                            text, 
                                            n_predict, 
                                            callback=callback, 
                                            temperature=request_data.temperature
                                        ))
            return StreamingResponse(generate_chunks())
        else:
            output = {"text":""}
            def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
                # Yield each chunk of data
                output["text"] += chunk
                antiprompt = detect_antiprompt(output["text"])
                if antiprompt:
                    ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
                    output["text"] = remove_text_from_string(output["text"],antiprompt)
                    return False
                else:
                    return True
            elf_server.binding.generate(
                                            text, 
                                            n_predict, 
                                            callback=callback,
                                            temperature=request_data.temperature
                                        )
            return output["text"]
    else:
        return None
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -1,48 +1,96 @@
-active_personality_id: -1
+# =================== Lord Of Large Language Models Configuration file =========================== 
-audio_auto_send_input: true
+version: 40
 audio_in_language: en-US
 audio_out_voice: null
 audio_pitch: 1
 audio_silenceTimer: 5000
 auto_save: true
 auto_show_browser: true
 auto_speak: false
 auto_update: true
 binding_name: null
 ctx_size: 4084
 data_vectorization_activate: true
 data_vectorization_build_keys_words: false
 data_vectorization_chunk_size: 512
 data_vectorization_method: tfidf_vectorizer
 data_vectorization_nb_chunks: 2
 data_vectorization_overlap_size: 128
 data_vectorization_save_db: false
 data_vectorization_visualize_on_vectorization: false
 data_visualization_method: PCA
 db_path: database.db
 debug: false
 discussion_prompt_separator: '!@>'
 enable_gpu: true
 extensions: []
 host: localhost
 min_n_predict: 256
 model_name: null
-n_predict: 1024
+
-n_threads: 8
+
-override_personality_model_parameters: false
+
-personalities: []
+# Host information
 host: localhost
 port: 9600
-repeat_last_n: 40
+
-repeat_penalty: 1.2
+# Genreration parameters 
 discussion_prompt_separator: "!@>"
 seed: -1
 n_predict: 1024
 ctx_size: 4084
 min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: false
+repeat_last_n: 40
-use_files: true
+repeat_penalty: 1.2
-use_user_informations_in_discussion: false
+
 n_threads: 8
 #Personality parameters
 personalities: ["generic/lollms"]
 active_personality_id: 0
 override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
 extensions: []
 user_name: user
 user_description: ""
 use_user_name_in_discussions: false
 user_avatar: default_user.svg
-user_description: ''
+use_user_informations_in_discussion: false
-user_name: user
+
-version: 27
+# UI parameters
 db_path: database.db
 # Automatic updates
 debug: False
 auto_update: true
 auto_save: true
 auto_title: false
 # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
 hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
 # Voice service
 enable_voice_service: false
 xtts_base_url: http://127.0.0.1:8020
 auto_read: false
 current_voice: null
 current_language: en
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://127.0.0.1:7860
 # Audio
 media_on: false
 audio_in_language: 'en-US'
 auto_speak: false
 audio_out_voice: null
 audio_pitch: 1
 audio_auto_send_input: true
 audio_silenceTimer: 5000
 # Data vectorization
 use_discussions_history: false # Activate vectorizing previous conversations
 summerize_discussion: false # activate discussion summary (better but adds computation time)
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
 use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_save_db: False # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
 data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
 data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
 data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
 # Helpers
 pdf_latex_path: null
 # boosting information
 positive_boost: null
 negative_boost: null
 force_output_language_to_be: null