diff --git a/configs/config.yaml b/configs/config.yaml
index e357897..fd5fc3c 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null
 
@@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
-enable_gpu: true
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
 
diff --git a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
index 3d0ed0e..fd5fc3c 100644
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@@ -1,51 +1,96 @@
-active_personality_id: 15
-audio_auto_send_input: true
-audio_in_language: en-US
-audio_out_voice: Google UK English Female
-audio_pitch: '1'
-audio_silenceTimer: 5000
-auto_save: true
-auto_show_browser: true
-auto_speak: false
-auto_update: true
-
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
 model_name: null
 
-config: local_config
-ctx_size: 4090
-data_vectorization_activate: true
-data_vectorization_build_keys_words: true
-data_vectorization_chunk_size: 512
-data_vectorization_method: tfidf_vectorizer
-data_vectorization_nb_chunks: 3
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_visualization_method: PCA
-db_path: lollms.db
-debug: true
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
+
+
+# Host information
 host: localhost
-min_n_predict: 256
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities:
-- generic/lollms
 port: 9600
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
-temperature: '0.3'
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
+temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: true
-use_files: true
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
+user_name: user
+user_description: ""
+use_user_name_in_discussions: false
+user_avatar: default_user.svg
 use_user_informations_in_discussion: false
-use_user_name_in_discussions: true
-user_avatar: default_user
-user_description: 
-user_name: User
-version: 26
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null
diff --git a/elf_test_cfg/personal/configs/lollms_elf_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
index 3d0ed0e..fd5fc3c 100644
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@@ -1,51 +1,96 @@
-active_personality_id: 15
-audio_auto_send_input: true
-audio_in_language: en-US
-audio_out_voice: Google UK English Female
-audio_pitch: '1'
-audio_silenceTimer: 5000
-auto_save: true
-auto_show_browser: true
-auto_speak: false
-auto_update: true
-
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
 model_name: null
 
-config: local_config
-ctx_size: 4090
-data_vectorization_activate: true
-data_vectorization_build_keys_words: true
-data_vectorization_chunk_size: 512
-data_vectorization_method: tfidf_vectorizer
-data_vectorization_nb_chunks: 3
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_visualization_method: PCA
-db_path: lollms.db
-debug: true
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
+
+
+# Host information
 host: localhost
-min_n_predict: 256
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities:
-- generic/lollms
 port: 9600
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
-temperature: '0.3'
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
+temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: true
-use_files: true
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
+user_name: user
+user_description: ""
+use_user_name_in_discussions: false
+user_avatar: default_user.svg
 use_user_informations_in_discussion: false
-use_user_name_in_discussions: true
-user_avatar: default_user
-user_description: 
-user_name: User
-version: 26
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null
diff --git a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
index 92aa6ff..fd5fc3c 100644
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@@ -1,38 +1,96 @@
-active_personality_id: 0
-auto_save: true
-auto_update: false
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
-ctx_size: 4096
-data_vectorization_activate: true
-data_vectorization_build_keys_words: false
-data_vectorization_chunk_size: 512
-data_vectorization_method: ftidf_vectorizer
-data_vectorization_nb_chunks: 2
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_visualization_method: PCA
-debug: false
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
-host: localhost
-min_n_predict: 256
 model_name: null
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities:
-- generic/lollms
-port: 9601
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+
+
+# Host information
+host: localhost
+port: 9600
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_files: true
-use_user_name_in_discussions: false
-user_avatar: default_user
-user_description: ''
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
 user_name: user
-version: 26
+user_description: ""
+use_user_name_in_discussions: false
+user_avatar: default_user.svg
+use_user_informations_in_discussion: false
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null
diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml
index e357897..fd5fc3c 100644
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null
 
@@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
-enable_gpu: true
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
 
diff --git a/lollms/personality.py b/lollms/personality.py
index 1e01ebf..6909862 100644
--- a/lollms/personality.py
+++ b/lollms/personality.py
@@ -452,9 +452,15 @@ Date: {{date}}
         return string
 
     def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
+        if callback is None:
+            callback = self.callback
         if text is None:
             return True
-        bot_says = self.bot_says + text
+        if message_type==MSG_TYPE.MSG_TYPE_CHUNK:
+            bot_says = self.bot_says + text
+        elif  message_type==MSG_TYPE.MSG_TYPE_FULL:
+            bot_says = text
+
         if show_progress:
             if self.nb_received_tokens==0:
                 self.start_time = datetime.now()
@@ -475,7 +481,7 @@ Date: {{date}}
             return False
         else:
             if callback:
-                callback(text,MSG_TYPE.MSG_TYPE_CHUNK)
+                callback(text,message_type)
             self.bot_says = bot_says
             return True
 
diff --git a/lollms/server/configs/config.yaml b/lollms/server/configs/config.yaml
index 25d0ec5..fd5fc3c 100644
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 39
+version: 40
 binding_name: null
 model_name: null
 
@@ -7,7 +7,7 @@ model_name: null
 
 # Host information
 host: localhost
-port: 9601
+port: 9600
 
 # Genreration parameters 
 discussion_prompt_separator: "!@>"
@@ -44,8 +44,8 @@ debug: False
 auto_update: true
 auto_save: true
 auto_title: false
-# Enables gpu usage
-enable_gpu: true
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
 # Automatically open the browser
 auto_show_browser: true
 
diff --git a/lollms/server/endpoints/lollms_generator.py b/lollms/server/endpoints/lollms_generator.py
index 09487cb..49a4865 100644
--- a/lollms/server/endpoints/lollms_generator.py
+++ b/lollms/server/endpoints/lollms_generator.py
@@ -6,15 +6,85 @@ from lollms.types import MSG_TYPE
 from lollms.utilities import detect_antiprompt, remove_text_from_string
 from ascii_colors import ASCIIColors
 class GenerateRequest(BaseModel):
+    """
+    Data model for the Generate Request.
+
+    Attributes:
+    - text: str representing the input text prompt for text generation.
+    - n_predict: int representing the number of predictions to generate.
+    - stream: bool indicating whether to stream the generated text or not.
+    - temperature: float representing the temperature parameter for text generation.
+    - top_k: int representing the top_k parameter for text generation.
+    - top_p: float representing the top_p parameter for text generation.
+    - repeat_penalty: float representing the repeat_penalty parameter for text generation.
+    - repeat_last_n: int representing the repeat_last_n parameter for text generation.
+    - seed: int representing the seed for text generation.
+    - n_threads: int representing the number of threads for text generation.
+    """    
     text: str
     n_predict: int = 1024
     stream: bool = False
+    temperature: float = 0.4
+    top_k: int = 50
+    top_p: float = 0.6
+    repeat_penalty: float = 1.3
+    repeat_last_n: int = 40
+    seed: int = -1
+    n_threads: int = 1
+
+class V1ChatGenerateRequest(BaseModel):
+    """
+    Data model for the V1 Chat Generate Request.
+
+    Attributes:
+    - model: str representing the model to be used for text generation.
+    - messages: list of messages to be used as prompts for text generation.
+    - stream: bool indicating whether to stream the generated text or not.
+    - temperature: float representing the temperature parameter for text generation.
+    - max_tokens: float representing the maximum number of tokens to generate.
+    """    
+    model: str
+    messages: list
+    stream: bool
+    temperature: float
+    max_tokens: float
+
+
+class V1InstructGenerateRequest(BaseModel):
+    """
+    Data model for the V1 Chat Generate Request.
+
+    Attributes:
+    - model: str representing the model to be used for text generation.
+    - messages: list of messages to be used as prompts for text generation.
+    - stream: bool indicating whether to stream the generated text or not.
+    - temperature: float representing the temperature parameter for text generation.
+    - max_tokens: float representing the maximum number of tokens to generate.
+    """    
+    model: str
+    prompt: str
+    stream: bool
+    temperature: float
+    max_tokens: float
+
 
 router = APIRouter()
 elf_server = LOLLMSElfServer.get_instance()
 
 @router.post("/generate")
-def generate(request_data: GenerateRequest):
+def lollms_generate(request_data: GenerateRequest):
+    """
+    Endpoint for generating text from prompts using the lollms fastapi server.
+
+    Args:
+    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
+
+    Returns:
+    - If the elf_server binding is not None:
+        - If stream is True, returns a StreamingResponse of generated text chunks.
+        - If stream is False, returns the generated text as a string.
+    - If the elf_server binding is None, returns None.
+    """    
     text = request_data.text
     n_predict = request_data.n_predict
     stream = request_data.stream
@@ -34,7 +104,18 @@ def generate(request_data: GenerateRequest):
                     else:
                         yield chunk
                         return True
-                return iter(elf_server.binding.generate(text, n_predict, callback=callback))
+                return iter(elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback, 
+                                            temperature=request_data.temperature,
+                                            top_k=request_data.top_k, 
+                                            top_p=request_data.top_p,
+                                            repeat_penalty=request_data.repeat_penalty,
+                                            repeat_last_n=request_data.repeat_last_n,
+                                            seed=request_data.seed,
+                                            n_threads=request_data.n_threads
+                                        ))
             
             return StreamingResponse(generate_chunks())
         else:
@@ -49,7 +130,154 @@ def generate(request_data: GenerateRequest):
                     return False
                 else:
                     return True
-            elf_server.binding.generate(text, n_predict, callback=callback)
+            elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback,
+                                            temperature=request_data.temperature,
+                                            top_k=request_data.top_k, 
+                                            top_p=request_data.top_p,
+                                            repeat_penalty=request_data.repeat_penalty,
+                                            repeat_last_n=request_data.repeat_last_n,
+                                            seed=request_data.seed,
+                                            n_threads=request_data.n_threads
+                                        )
+            return output["text"]
+    else:
+        return None
+    
+
+# openai compatible generation
+@router.post("/v1/chat/completions")
+def v1_chat_generate(request_data: V1ChatGenerateRequest):
+    """
+    Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
+    This endpoint is compatible with open ai API and mistralAI API
+    Args:
+    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
+
+    Returns:
+    - If the elf_server binding is not None:
+        - If stream is True, returns a StreamingResponse of generated text chunks.
+        - If stream is False, returns the generated text as a string.
+    - If the elf_server binding is None, returns None.
+    """    
+    messages = request_data.messages
+    text = ""
+    for message in messages:
+        text += f"{message['role']}: {message['content']}\n"
+    n_predict = request_data.max_tokens
+    stream = request_data.stream
+    
+    if elf_server.binding is not None:
+        if stream:
+            output = {"text":""}
+            def generate_chunks():
+                def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                    # Yield each chunk of data
+                    output["text"] += chunk
+                    antiprompt = detect_antiprompt(output["text"])
+                    if antiprompt:
+                        ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                        output["text"] = remove_text_from_string(output["text"],antiprompt)
+                        return False
+                    else:
+                        yield chunk
+                        return True
+                return iter(elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback, 
+                                            temperature=request_data.temperature
+                                        ))
+            
+            return StreamingResponse(generate_chunks())
+        else:
+            output = {"text":""}
+            def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                # Yield each chunk of data
+                output["text"] += chunk
+                antiprompt = detect_antiprompt(output["text"])
+                if antiprompt:
+                    ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                    output["text"] = remove_text_from_string(output["text"],antiprompt)
+                    return False
+                else:
+                    return True
+            elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback,
+                                            temperature=request_data.temperature
+                                        )
+            return output["text"]
+    else:
+        return None
+
+
+
+
+# openai compatible generation
+@router.post("/v1/completions")
+def v1_instruct_generate(request_data: V1InstructGenerateRequest):
+    """
+    Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
+    This endpoint is compatible with open ai API and mistralAI API
+    Args:
+    - request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
+
+    Returns:
+    - If the elf_server binding is not None:
+        - If stream is True, returns a StreamingResponse of generated text chunks.
+        - If stream is False, returns the generated text as a string.
+    - If the elf_server binding is None, returns None.
+    """    
+   
+    text = request_data.prompt
+    n_predict = request_data.max_tokens
+    stream = request_data.stream
+    
+    if elf_server.binding is not None:
+        if stream:
+            output = {"text":""}
+            def generate_chunks():
+                def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                    # Yield each chunk of data
+                    output["text"] += chunk
+                    antiprompt = detect_antiprompt(output["text"])
+                    if antiprompt:
+                        ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                        output["text"] = remove_text_from_string(output["text"],antiprompt)
+                        return False
+                    else:
+                        yield chunk
+                        return True
+                return iter(elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback, 
+                                            temperature=request_data.temperature
+                                        ))
+            
+            return StreamingResponse(generate_chunks())
+        else:
+            output = {"text":""}
+            def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
+                # Yield each chunk of data
+                output["text"] += chunk
+                antiprompt = detect_antiprompt(output["text"])
+                if antiprompt:
+                    ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
+                    output["text"] = remove_text_from_string(output["text"],antiprompt)
+                    return False
+                else:
+                    return True
+            elf_server.binding.generate(
+                                            text, 
+                                            n_predict, 
+                                            callback=callback,
+                                            temperature=request_data.temperature
+                                        )
             return output["text"]
     else:
         return None
diff --git a/personal_data/configs/lollms_discord_local_config.yaml b/personal_data/configs/lollms_discord_local_config.yaml
index b702d67..fd5fc3c 100644
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@@ -1,48 +1,96 @@
-active_personality_id: -1
-audio_auto_send_input: true
-audio_in_language: en-US
-audio_out_voice: null
-audio_pitch: 1
-audio_silenceTimer: 5000
-auto_save: true
-auto_show_browser: true
-auto_speak: false
-auto_update: true
+# =================== Lord Of Large Language Models Configuration file =========================== 
+version: 40
 binding_name: null
-ctx_size: 4084
-data_vectorization_activate: true
-data_vectorization_build_keys_words: false
-data_vectorization_chunk_size: 512
-data_vectorization_method: tfidf_vectorizer
-data_vectorization_nb_chunks: 2
-data_vectorization_overlap_size: 128
-data_vectorization_save_db: false
-data_vectorization_visualize_on_vectorization: false
-data_visualization_method: PCA
-db_path: database.db
-debug: false
-discussion_prompt_separator: '!@>'
-enable_gpu: true
-extensions: []
-host: localhost
-min_n_predict: 256
 model_name: null
-n_predict: 1024
-n_threads: 8
-override_personality_model_parameters: false
-personalities: []
+
+
+
+# Host information
+host: localhost
 port: 9600
-repeat_last_n: 40
-repeat_penalty: 1.2
+
+# Genreration parameters 
+discussion_prompt_separator: "!@>"
 seed: -1
+n_predict: 1024
+ctx_size: 4084
+min_n_predict: 512
 temperature: 0.9
 top_k: 50
 top_p: 0.95
-use_discussions_history: false
-use_files: true
-use_user_informations_in_discussion: false
+repeat_last_n: 40
+repeat_penalty: 1.2
+
+n_threads: 8
+
+#Personality parameters
+personalities: ["generic/lollms"]
+active_personality_id: 0
+override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour) 
+
+extensions: []
+
+user_name: user
+user_description: ""
 use_user_name_in_discussions: false
 user_avatar: default_user.svg
-user_description: ''
-user_name: user
-version: 27
+use_user_informations_in_discussion: false
+
+# UI parameters
+db_path: database.db
+
+# Automatic updates
+debug: False
+auto_update: true
+auto_save: true
+auto_title: false
+# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
+hardware_mode: nvidia-tensorcores
+# Automatically open the browser
+auto_show_browser: true
+
+# Voice service
+enable_voice_service: false
+xtts_base_url: http://127.0.0.1:8020
+auto_read: false
+current_voice: null
+current_language: en
+
+# Image generation service
+enable_sd_service: false
+sd_base_url: http://127.0.0.1:7860
+
+# Audio
+media_on: false
+audio_in_language: 'en-US'
+auto_speak: false
+audio_out_voice: null
+audio_pitch: 1
+audio_auto_send_input: true
+audio_silenceTimer: 5000
+
+# Data vectorization
+use_discussions_history: false # Activate vectorizing previous conversations
+summerize_discussion: false # activate discussion summary (better but adds computation time)
+max_summary_size: 512 # in tokens
+data_vectorization_visualize_on_vectorization: false
+use_files: true # Activate using files
+data_vectorization_activate: true # To activate/deactivate data vectorization
+data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
+data_visualization_method: "PCA" #"PCA" or "TSNE"
+data_vectorization_save_db: False # For each new session, new files
+data_vectorization_chunk_size: 512 # chunk size
+data_vectorization_overlap_size: 128 # overlap between chunks size
+data_vectorization_nb_chunks: 2 # number of chunks to use
+data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
+data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
+data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
+
+
+# Helpers
+pdf_latex_path: null
+
+# boosting information
+positive_boost: null
+negative_boost: null
+force_output_language_to_be: null