Upgraded core code

2025-04-13 22:02:56 +00:00 · 2024-01-04 03:56:42 +01:00 · 2024-01-04 03:56:42 +01:00 · e77c97f238
commit e77c97f238
parent 52532df832
9 changed files with 599 additions and 169 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null

@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
-enable_gpu: true
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true

--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -1,51 +1,96 @@
-active_personality_id: 15
-audio_auto_send_input: true
-audio_in_language: en-US
-audio_out_voice: Google UK English Female
-audio_pitch: '1'
-audio_silenceTimer: 5000
-auto_save: true
-auto_show_browser: true
-auto_speak: false
-auto_update: true
-
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
 model_name: null

-config: local_config
-ctx_size: 4090
-data_vectorization_activate: true
-data_vectorization_build_keys_words: true
-data_vectorization_chunk_size: 512
-data_vectorization_method: tfidf_vectorizer
-data_vectorization_nb_chunks: 3
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_visualization_method: PCA
-db_path: lollms.db
-debug: true
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
+
+
+# Host information
 host: localhost
-min_n_predict: 256
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities:
- generic/lollms
 port: 9600
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
-temperature: '0.3'
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
+temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: true
-use_files: true
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
+user_name: user
+user_description: ""
+use_user_name_in_discussions: false
+user_avatar: default_user.svg
 use_user_informations_in_discussion: false
-use_user_name_in_discussions: true
-user_avatar: default_user
-user_description: 
-user_name: User
-version: 26
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -1,51 +1,96 @@
-active_personality_id: 15
-audio_auto_send_input: true
-audio_in_language: en-US
-audio_out_voice: Google UK English Female
-audio_pitch: '1'
-audio_silenceTimer: 5000
-auto_save: true
-auto_show_browser: true
-auto_speak: false
-auto_update: true
-
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
 model_name: null

-config: local_config
-ctx_size: 4090
-data_vectorization_activate: true
-data_vectorization_build_keys_words: true
-data_vectorization_chunk_size: 512
-data_vectorization_method: tfidf_vectorizer
-data_vectorization_nb_chunks: 3
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_visualization_method: PCA
-db_path: lollms.db
-debug: true
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
+
+
+# Host information
 host: localhost
-min_n_predict: 256
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities:
- generic/lollms
 port: 9600
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
-temperature: '0.3'
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
+temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: true
-use_files: true
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
+user_name: user
+user_description: ""
+use_user_name_in_discussions: false
+user_avatar: default_user.svg
 use_user_informations_in_discussion: false
-use_user_name_in_discussions: true
-user_avatar: default_user
-user_description: 
-user_name: User
-version: 26
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -1,38 +1,96 @@
-active_personality_id: 0
-auto_save: true
-auto_update: false
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
-ctx_size: 4096
-data_vectorization_activate: true
-data_vectorization_build_keys_words: false
-data_vectorization_chunk_size: 512
-data_vectorization_method: ftidf_vectorizer
-data_vectorization_nb_chunks: 2
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_visualization_method: PCA
-debug: false
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
-host: localhost
-min_n_predict: 256
 model_name: null
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities:
- generic/lollms
-port: 9601
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+
+
+# Host information
+host: localhost
+port: 9600
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_files: true
-use_user_name_in_discussions: false
-user_avatar: default_user
-user_description: ''
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
 user_name: user
-version: 26
+user_description: ""
+use_user_name_in_discussions: false
+user_avatar: default_user.svg
+use_user_informations_in_discussion: false
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null

@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
-enable_gpu: true
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true

--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -452,9 +452,15 @@ Date: {{date}}
        return string

    def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
+        if callback is None:
+            callback = self.callback
        if text is None:
            return True
-        bot_says = self.bot_says + text
+        if message_type==MSG_TYPE.MSG_TYPE_CHUNK:
+            bot_says = self.bot_says + text
+        elif  message_type==MSG_TYPE.MSG_TYPE_FULL:
+            bot_says = text
+
        if show_progress:
            if self.nb_received_tokens==0:
                self.start_time = datetime.now()
@ -475,7 +481,7 @@ Date: {{date}}
            return False
        else:
            if callback:
-                callback(text,MSG_TYPE.MSG_TYPE_CHUNK)
+                callback(text,message_type)
            self.bot_says = bot_says
            return True

--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null

@ -7,7 +7,7 @@ model_name: null

 # Host information
 host: localhost
-port: 9601
+port: 9600

 # Genreration parameters 
 discussion_prompt_separator: "!@>"
@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
-enable_gpu: true
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true

--- a/lollms/server/endpoints/lollms_generator.py
+++ b/lollms/server/endpoints/lollms_generator.py
@ -6,15 +6,85 @@ from lollms.types import MSG_TYPE
 from lollms.utilities import detect_antiprompt, remove_text_from_string
 from ascii_colors import ASCIIColors
 class GenerateRequest(BaseModel):
+    """
+    Data model for the Generate Request.
+
+    Attributes:
+    - text: str representing the input text prompt for text generation.
+    - n_predict: int representing the number of predictions to generate.
+    - stream: bool indicating whether to stream the generated text or not.
+    - temperature: float representing the temperature parameter for text generation.
+    - top_k: int representing the top_k parameter for text generation.
+    - top_p: float representing the top_p parameter for text generation.
+    - repeat_penalty: float representing the repeat_penalty parameter for text generation.
+    - repeat_last_n: int representing the repeat_last_n parameter for text generation.
+    - seed: int representing the seed for text generation.
+    - n_threads: int representing the number of threads for text generation.
+    """    
    text: str
    n_predict: int = 1024
    stream: bool = False
+    temperature: float = 0.4
+    top_k: int = 50
+    top_p: float = 0.6
+    repeat_penalty: float = 1.3
+    repeat_last_n: int = 40
+    seed: int = -1
+    n_threads: int = 1
+
+class V1ChatGenerateRequest(BaseModel):
+    """
+    Data model for the V1 Chat Generate Request.
+
+    Attributes:
+    - model: str representing the model to be used for text generation.
+    - messages: list of messages to be used as prompts for text generation.
+    - stream: bool indicating whether to stream the generated text or not.
+    - temperature: float representing the temperature parameter for text generation.
+    - max_tokens: float representing the maximum number of tokens to generate.
+    """    
+    model: str
+    messages: list
+    stream: bool
+    temperature: float
+    max_tokens: float
+
+
+class V1InstructGenerateRequest(BaseModel):
+    """
+    Data model for the V1 Chat Generate Request.
+
+    Attributes:
+    - model: str representing the model to be used for text generation.
+    - messages: list of messages to be used as prompts for text generation.
+    - stream: bool indicating whether to stream the generated text or not.
+    - temperature: float representing the temperature parameter for text generation.
+    - max_tokens: float representing the maximum number of tokens to generate.
+    """    
+    model: str
+    prompt: str
+    stream: bool
+    temperature: float
+    max_tokens: float
+

 router = APIRouter()
 elf_server = LOLLMSElfServer.get_instance()

@router.post("/generate")
-def generate(request_data: GenerateRequest):
+def lollms_generate(request_data: GenerateRequest):
+    """
+    Endpoint for generating text from prompts using the lollms fastapi server.
+
+    Args:
+    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
+
+    Returns:
+    - If the elf_server binding is not None:
+        - If stream is True, returns a StreamingResponse of generated text chunks.
+        - If stream is False, returns the generated text as a string.
+    - If the elf_server binding is None, returns None.
+    """    
    text = request_data.text
    n_predict = request_data.n_predict
    stream = request_data.stream
@ -34,7 +104,18 @@ def generate(request_data: GenerateRequest):
                    else:
                        yield chunk
                        return True
-                return iter(elf_server.binding.generate(text, n_predict, callback=callback))
+                return iter(elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback, 
+                                            temperature=request_data.temperature,
+                                            top_k=request_data.top_k, 
+                                            top_p=request_data.top_p,
+                                            repeat_penalty=request_data.repeat_penalty,
+                                            repeat_last_n=request_data.repeat_last_n,
+                                            seed=request_data.seed,
+                                            n_threads=request_data.n_threads
+                                        ))
            
            return StreamingResponse(generate_chunks())
        else:
@ -49,7 +130,154 @@ def generate(request_data: GenerateRequest):
                    return False
                else:
                    return True
-            elf_server.binding.generate(text, n_predict, callback=callback)
+            elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback,
+                                            temperature=request_data.temperature,
+                                            top_k=request_data.top_k, 
+                                            top_p=request_data.top_p,
+                                            repeat_penalty=request_data.repeat_penalty,
+                                            repeat_last_n=request_data.repeat_last_n,
+                                            seed=request_data.seed,
+                                            n_threads=request_data.n_threads
+                                        )
+            return output["text"]
+    else:
+        return None
+    
+
+# openai compatible generation
+@router.post("/v1/chat/completions")
+def v1_chat_generate(request_data: V1ChatGenerateRequest):
+    """
+    Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
+    This endpoint is compatible with open ai API and mistralAI API
+    Args:
+    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
+
+    Returns:
+    - If the elf_server binding is not None:
+        - If stream is True, returns a StreamingResponse of generated text chunks.
+        - If stream is False, returns the generated text as a string.
+    - If the elf_server binding is None, returns None.
+    """    
+    messages = request_data.messages
+    text = ""
+    for message in messages:
+        text += f"{message['role']}: {message['content']}\n"
+    n_predict = request_data.max_tokens
+    stream = request_data.stream
+    
+    if elf_server.binding is not None:
+        if stream:
+            output = {"text":""}
+            def generate_chunks():
+                def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                    # Yield each chunk of data
+                    output["text"] += chunk
+                    antiprompt = detect_antiprompt(output["text"])
+                    if antiprompt:
+                        ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                        output["text"] = remove_text_from_string(output["text"],antiprompt)
+                        return False
+                    else:
+                        yield chunk
+                        return True
+                return iter(elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback, 
+                                            temperature=request_data.temperature
+                                        ))
+            
+            return StreamingResponse(generate_chunks())
+        else:
+            output = {"text":""}
+            def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                # Yield each chunk of data
+                output["text"] += chunk
+                antiprompt = detect_antiprompt(output["text"])
+                if antiprompt:
+                    ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                    output["text"] = remove_text_from_string(output["text"],antiprompt)
+                    return False
+                else:
+                    return True
+            elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback,
+                                            temperature=request_data.temperature
+                                        )
+            return output["text"]
+    else:
+        return None
+
+
+
+
+# openai compatible generation
+@router.post("/v1/completions")
+def v1_instruct_generate(request_data: V1InstructGenerateRequest):
+    """
+    Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
+    This endpoint is compatible with open ai API and mistralAI API
+    Args:
+    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
+
+    Returns:
+    - If the elf_server binding is not None:
+        - If stream is True, returns a StreamingResponse of generated text chunks.
+        - If stream is False, returns the generated text as a string.
+    - If the elf_server binding is None, returns None.
+    """    
+   
+    text = request_data.prompt
+    n_predict = request_data.max_tokens
+    stream = request_data.stream
+    
+    if elf_server.binding is not None:
+        if stream:
+            output = {"text":""}
+            def generate_chunks():
+                def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                    # Yield each chunk of data
+                    output["text"] += chunk
+                    antiprompt = detect_antiprompt(output["text"])
+                    if antiprompt:
+                        ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                        output["text"] = remove_text_from_string(output["text"],antiprompt)
+                        return False
+                    else:
+                        yield chunk
+                        return True
+                return iter(elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback, 
+                                            temperature=request_data.temperature
+                                        ))
+            
+            return StreamingResponse(generate_chunks())
+        else:
+            output = {"text":""}
+            def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                # Yield each chunk of data
+                output["text"] += chunk
+                antiprompt = detect_antiprompt(output["text"])
+                if antiprompt:
+                    ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                    output["text"] = remove_text_from_string(output["text"],antiprompt)
+                    return False
+                else:
+                    return True
+            elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback,
+                                            temperature=request_data.temperature
+                                        )
            return output["text"]
    else:
        return None
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -1,48 +1,96 @@
-active_personality_id: -1
-audio_auto_send_input: true
-audio_in_language: en-US
-audio_out_voice: null
-audio_pitch: 1
-audio_silenceTimer: 5000
-auto_save: true
-auto_show_browser: true
-auto_speak: false
-auto_update: true
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
-ctx_size: 4084
-data_vectorization_activate: true
-data_vectorization_build_keys_words: false
-data_vectorization_chunk_size: 512
-data_vectorization_method: tfidf_vectorizer
-data_vectorization_nb_chunks: 2
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_vectorization_visualize_on_vectorization: false
-data_visualization_method: PCA
-db_path: database.db
-debug: false
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
-host: localhost
-min_n_predict: 256
 model_name: null
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities: []
+
+
+
+# Host information
+host: localhost
 port: 9600
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: false
-use_files: true
-use_user_informations_in_discussion: false
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
+user_name: user
+user_description: ""
 use_user_name_in_discussions: false
 user_avatar: default_user.svg
-user_description: ''
-user_name: user
-version: 27
+use_user_informations_in_discussion: false
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null