Upgraded core code

This commit is contained in:
Saifeddine ALOUI 2024-01-04 03:56:42 +01:00
parent 52532df832
commit e77c97f238
9 changed files with 599 additions and 169 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Models Configuration file =========================== # =================== Lord Of Large Language Models Configuration file ===========================
version: 39 version: 40
binding_name: null binding_name: null
model_name: null model_name: null
@ -44,8 +44,8 @@ debug: False
auto_update: true auto_update: true
auto_save: true auto_save: true
auto_title: false auto_title: false
# Enables gpu usage # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
enable_gpu: true hardware_mode: nvidia-tensorcores
# Automatically open the browser # Automatically open the browser
auto_show_browser: true auto_show_browser: true

View File

@ -1,51 +1,96 @@
active_personality_id: 15 # =================== Lord Of Large Language Models Configuration file ===========================
audio_auto_send_input: true version: 40
audio_in_language: en-US
audio_out_voice: Google UK English Female
audio_pitch: '1'
audio_silenceTimer: 5000
auto_save: true
auto_show_browser: true
auto_speak: false
auto_update: true
binding_name: null binding_name: null
model_name: null model_name: null
config: local_config
ctx_size: 4090
data_vectorization_activate: true # Host information
data_vectorization_build_keys_words: true
data_vectorization_chunk_size: 512
data_vectorization_method: tfidf_vectorizer
data_vectorization_nb_chunks: 3
data_vectorization_overlap_size: 128
data_vectorization_save_db: false
data_visualization_method: PCA
db_path: lollms.db
debug: true
discussion_prompt_separator: '!@>'
enable_gpu: true
extensions: []
host: localhost host: localhost
min_n_predict: 256
n_predict: 1024
n_threads: 8
override_personality_model_parameters: false
personalities:
- generic/lollms
port: 9600 port: 9600
repeat_last_n: 40
repeat_penalty: 1.2 # Genreration parameters
discussion_prompt_separator: "!@>"
seed: -1 seed: -1
temperature: '0.3' n_predict: 1024
ctx_size: 4084
min_n_predict: 512
temperature: 0.9
top_k: 50 top_k: 50
top_p: 0.95 top_p: 0.95
use_discussions_history: true repeat_last_n: 40
use_files: true repeat_penalty: 1.2
n_threads: 8
#Personality parameters
personalities: ["generic/lollms"]
active_personality_id: 0
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
extensions: []
user_name: user
user_description: ""
use_user_name_in_discussions: false
user_avatar: default_user.svg
use_user_informations_in_discussion: false use_user_informations_in_discussion: false
use_user_name_in_discussions: true
user_avatar: default_user # UI parameters
user_description: db_path: database.db
user_name: User
version: 26 # Automatic updates
debug: False
auto_update: true
auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true
# Voice service
enable_voice_service: false
xtts_base_url: http://127.0.0.1:8020
auto_read: false
current_voice: null
current_language: en
# Image generation service
enable_sd_service: false
sd_base_url: http://127.0.0.1:7860
# Audio
media_on: false
audio_in_language: 'en-US'
auto_speak: false
audio_out_voice: null
audio_pitch: 1
audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
use_discussions_history: false # Activate vectorizing previous conversations
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_save_db: False # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Helpers
pdf_latex_path: null
# boosting information
positive_boost: null
negative_boost: null
force_output_language_to_be: null

View File

@ -1,51 +1,96 @@
active_personality_id: 15 # =================== Lord Of Large Language Models Configuration file ===========================
audio_auto_send_input: true version: 40
audio_in_language: en-US
audio_out_voice: Google UK English Female
audio_pitch: '1'
audio_silenceTimer: 5000
auto_save: true
auto_show_browser: true
auto_speak: false
auto_update: true
binding_name: null binding_name: null
model_name: null model_name: null
config: local_config
ctx_size: 4090
data_vectorization_activate: true # Host information
data_vectorization_build_keys_words: true
data_vectorization_chunk_size: 512
data_vectorization_method: tfidf_vectorizer
data_vectorization_nb_chunks: 3
data_vectorization_overlap_size: 128
data_vectorization_save_db: false
data_visualization_method: PCA
db_path: lollms.db
debug: true
discussion_prompt_separator: '!@>'
enable_gpu: true
extensions: []
host: localhost host: localhost
min_n_predict: 256
n_predict: 1024
n_threads: 8
override_personality_model_parameters: false
personalities:
- generic/lollms
port: 9600 port: 9600
repeat_last_n: 40
repeat_penalty: 1.2 # Genreration parameters
discussion_prompt_separator: "!@>"
seed: -1 seed: -1
temperature: '0.3' n_predict: 1024
ctx_size: 4084
min_n_predict: 512
temperature: 0.9
top_k: 50 top_k: 50
top_p: 0.95 top_p: 0.95
use_discussions_history: true repeat_last_n: 40
use_files: true repeat_penalty: 1.2
n_threads: 8
#Personality parameters
personalities: ["generic/lollms"]
active_personality_id: 0
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
extensions: []
user_name: user
user_description: ""
use_user_name_in_discussions: false
user_avatar: default_user.svg
use_user_informations_in_discussion: false use_user_informations_in_discussion: false
use_user_name_in_discussions: true
user_avatar: default_user # UI parameters
user_description: db_path: database.db
user_name: User
version: 26 # Automatic updates
debug: False
auto_update: true
auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true
# Voice service
enable_voice_service: false
xtts_base_url: http://127.0.0.1:8020
auto_read: false
current_voice: null
current_language: en
# Image generation service
enable_sd_service: false
sd_base_url: http://127.0.0.1:7860
# Audio
media_on: false
audio_in_language: 'en-US'
auto_speak: false
audio_out_voice: null
audio_pitch: 1
audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
use_discussions_history: false # Activate vectorizing previous conversations
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_save_db: False # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Helpers
pdf_latex_path: null
# boosting information
positive_boost: null
negative_boost: null
force_output_language_to_be: null

View File

@ -1,38 +1,96 @@
active_personality_id: 0 # =================== Lord Of Large Language Models Configuration file ===========================
auto_save: true version: 40
auto_update: false
binding_name: null binding_name: null
ctx_size: 4096
data_vectorization_activate: true
data_vectorization_build_keys_words: false
data_vectorization_chunk_size: 512
data_vectorization_method: ftidf_vectorizer
data_vectorization_nb_chunks: 2
data_vectorization_overlap_size: 128
data_vectorization_save_db: false
data_visualization_method: PCA
debug: false
discussion_prompt_separator: '!@>'
enable_gpu: true
extensions: []
host: localhost
min_n_predict: 256
model_name: null model_name: null
n_predict: 1024
n_threads: 8
override_personality_model_parameters: false
personalities: # Host information
- generic/lollms host: localhost
port: 9601 port: 9600
repeat_last_n: 40
repeat_penalty: 1.2 # Genreration parameters
discussion_prompt_separator: "!@>"
seed: -1 seed: -1
n_predict: 1024
ctx_size: 4084
min_n_predict: 512
temperature: 0.9 temperature: 0.9
top_k: 50 top_k: 50
top_p: 0.95 top_p: 0.95
use_files: true repeat_last_n: 40
use_user_name_in_discussions: false repeat_penalty: 1.2
user_avatar: default_user
user_description: '' n_threads: 8
#Personality parameters
personalities: ["generic/lollms"]
active_personality_id: 0
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
extensions: []
user_name: user user_name: user
version: 26 user_description: ""
use_user_name_in_discussions: false
user_avatar: default_user.svg
use_user_informations_in_discussion: false
# UI parameters
db_path: database.db
# Automatic updates
debug: False
auto_update: true
auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true
# Voice service
enable_voice_service: false
xtts_base_url: http://127.0.0.1:8020
auto_read: false
current_voice: null
current_language: en
# Image generation service
enable_sd_service: false
sd_base_url: http://127.0.0.1:7860
# Audio
media_on: false
audio_in_language: 'en-US'
auto_speak: false
audio_out_voice: null
audio_pitch: 1
audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
use_discussions_history: false # Activate vectorizing previous conversations
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_save_db: False # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Helpers
pdf_latex_path: null
# boosting information
positive_boost: null
negative_boost: null
force_output_language_to_be: null

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Models Configuration file =========================== # =================== Lord Of Large Language Models Configuration file ===========================
version: 39 version: 40
binding_name: null binding_name: null
model_name: null model_name: null
@ -44,8 +44,8 @@ debug: False
auto_update: true auto_update: true
auto_save: true auto_save: true
auto_title: false auto_title: false
# Enables gpu usage # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
enable_gpu: true hardware_mode: nvidia-tensorcores
# Automatically open the browser # Automatically open the browser
auto_show_browser: true auto_show_browser: true

View File

@ -452,9 +452,15 @@ Date: {{date}}
return string return string
def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False): def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
if callback is None:
callback = self.callback
if text is None: if text is None:
return True return True
bot_says = self.bot_says + text if message_type==MSG_TYPE.MSG_TYPE_CHUNK:
bot_says = self.bot_says + text
elif message_type==MSG_TYPE.MSG_TYPE_FULL:
bot_says = text
if show_progress: if show_progress:
if self.nb_received_tokens==0: if self.nb_received_tokens==0:
self.start_time = datetime.now() self.start_time = datetime.now()
@ -475,7 +481,7 @@ Date: {{date}}
return False return False
else: else:
if callback: if callback:
callback(text,MSG_TYPE.MSG_TYPE_CHUNK) callback(text,message_type)
self.bot_says = bot_says self.bot_says = bot_says
return True return True

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Models Configuration file =========================== # =================== Lord Of Large Language Models Configuration file ===========================
version: 39 version: 40
binding_name: null binding_name: null
model_name: null model_name: null
@ -7,7 +7,7 @@ model_name: null
# Host information # Host information
host: localhost host: localhost
port: 9601 port: 9600
# Genreration parameters # Genreration parameters
discussion_prompt_separator: "!@>" discussion_prompt_separator: "!@>"
@ -44,8 +44,8 @@ debug: False
auto_update: true auto_update: true
auto_save: true auto_save: true
auto_title: false auto_title: false
# Enables gpu usage # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
enable_gpu: true hardware_mode: nvidia-tensorcores
# Automatically open the browser # Automatically open the browser
auto_show_browser: true auto_show_browser: true

View File

@ -6,15 +6,85 @@ from lollms.types import MSG_TYPE
from lollms.utilities import detect_antiprompt, remove_text_from_string from lollms.utilities import detect_antiprompt, remove_text_from_string
from ascii_colors import ASCIIColors from ascii_colors import ASCIIColors
class GenerateRequest(BaseModel): class GenerateRequest(BaseModel):
"""
Data model for the Generate Request.
Attributes:
- text: str representing the input text prompt for text generation.
- n_predict: int representing the number of predictions to generate.
- stream: bool indicating whether to stream the generated text or not.
- temperature: float representing the temperature parameter for text generation.
- top_k: int representing the top_k parameter for text generation.
- top_p: float representing the top_p parameter for text generation.
- repeat_penalty: float representing the repeat_penalty parameter for text generation.
- repeat_last_n: int representing the repeat_last_n parameter for text generation.
- seed: int representing the seed for text generation.
- n_threads: int representing the number of threads for text generation.
"""
text: str text: str
n_predict: int = 1024 n_predict: int = 1024
stream: bool = False stream: bool = False
temperature: float = 0.4
top_k: int = 50
top_p: float = 0.6
repeat_penalty: float = 1.3
repeat_last_n: int = 40
seed: int = -1
n_threads: int = 1
class V1ChatGenerateRequest(BaseModel):
"""
Data model for the V1 Chat Generate Request.
Attributes:
- model: str representing the model to be used for text generation.
- messages: list of messages to be used as prompts for text generation.
- stream: bool indicating whether to stream the generated text or not.
- temperature: float representing the temperature parameter for text generation.
- max_tokens: float representing the maximum number of tokens to generate.
"""
model: str
messages: list
stream: bool
temperature: float
max_tokens: float
class V1InstructGenerateRequest(BaseModel):
"""
Data model for the V1 Chat Generate Request.
Attributes:
- model: str representing the model to be used for text generation.
- messages: list of messages to be used as prompts for text generation.
- stream: bool indicating whether to stream the generated text or not.
- temperature: float representing the temperature parameter for text generation.
- max_tokens: float representing the maximum number of tokens to generate.
"""
model: str
prompt: str
stream: bool
temperature: float
max_tokens: float
router = APIRouter() router = APIRouter()
elf_server = LOLLMSElfServer.get_instance() elf_server = LOLLMSElfServer.get_instance()
@router.post("/generate") @router.post("/generate")
def generate(request_data: GenerateRequest): def lollms_generate(request_data: GenerateRequest):
"""
Endpoint for generating text from prompts using the lollms fastapi server.
Args:
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
Returns:
- If the elf_server binding is not None:
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If the elf_server binding is None, returns None.
"""
text = request_data.text text = request_data.text
n_predict = request_data.n_predict n_predict = request_data.n_predict
stream = request_data.stream stream = request_data.stream
@ -34,7 +104,18 @@ def generate(request_data: GenerateRequest):
else: else:
yield chunk yield chunk
return True return True
return iter(elf_server.binding.generate(text, n_predict, callback=callback)) return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature,
top_k=request_data.top_k,
top_p=request_data.top_p,
repeat_penalty=request_data.repeat_penalty,
repeat_last_n=request_data.repeat_last_n,
seed=request_data.seed,
n_threads=request_data.n_threads
))
return StreamingResponse(generate_chunks()) return StreamingResponse(generate_chunks())
else: else:
@ -49,7 +130,154 @@ def generate(request_data: GenerateRequest):
return False return False
else: else:
return True return True
elf_server.binding.generate(text, n_predict, callback=callback) elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature,
top_k=request_data.top_k,
top_p=request_data.top_p,
repeat_penalty=request_data.repeat_penalty,
repeat_last_n=request_data.repeat_last_n,
seed=request_data.seed,
n_threads=request_data.n_threads
)
return output["text"]
else:
return None
# openai compatible generation
@router.post("/v1/chat/completions")
def v1_chat_generate(request_data: V1ChatGenerateRequest):
"""
Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
This endpoint is compatible with open ai API and mistralAI API
Args:
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
Returns:
- If the elf_server binding is not None:
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If the elf_server binding is None, returns None.
"""
messages = request_data.messages
text = ""
for message in messages:
text += f"{message['role']}: {message['content']}\n"
n_predict = request_data.max_tokens
stream = request_data.stream
if elf_server.binding is not None:
if stream:
output = {"text":""}
def generate_chunks():
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
yield chunk
return True
return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
))
return StreamingResponse(generate_chunks())
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
return True
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
)
return output["text"]
else:
return None
# openai compatible generation
@router.post("/v1/completions")
def v1_instruct_generate(request_data: V1InstructGenerateRequest):
"""
Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
This endpoint is compatible with open ai API and mistralAI API
Args:
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
Returns:
- If the elf_server binding is not None:
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If the elf_server binding is None, returns None.
"""
text = request_data.prompt
n_predict = request_data.max_tokens
stream = request_data.stream
if elf_server.binding is not None:
if stream:
output = {"text":""}
def generate_chunks():
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
yield chunk
return True
return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
))
return StreamingResponse(generate_chunks())
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
return True
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
)
return output["text"] return output["text"]
else: else:
return None return None

View File

@ -1,48 +1,96 @@
active_personality_id: -1 # =================== Lord Of Large Language Models Configuration file ===========================
audio_auto_send_input: true version: 40
audio_in_language: en-US
audio_out_voice: null
audio_pitch: 1
audio_silenceTimer: 5000
auto_save: true
auto_show_browser: true
auto_speak: false
auto_update: true
binding_name: null binding_name: null
ctx_size: 4084
data_vectorization_activate: true
data_vectorization_build_keys_words: false
data_vectorization_chunk_size: 512
data_vectorization_method: tfidf_vectorizer
data_vectorization_nb_chunks: 2
data_vectorization_overlap_size: 128
data_vectorization_save_db: false
data_vectorization_visualize_on_vectorization: false
data_visualization_method: PCA
db_path: database.db
debug: false
discussion_prompt_separator: '!@>'
enable_gpu: true
extensions: []
host: localhost
min_n_predict: 256
model_name: null model_name: null
n_predict: 1024
n_threads: 8
override_personality_model_parameters: false
personalities: [] # Host information
host: localhost
port: 9600 port: 9600
repeat_last_n: 40
repeat_penalty: 1.2 # Genreration parameters
discussion_prompt_separator: "!@>"
seed: -1 seed: -1
n_predict: 1024
ctx_size: 4084
min_n_predict: 512
temperature: 0.9 temperature: 0.9
top_k: 50 top_k: 50
top_p: 0.95 top_p: 0.95
use_discussions_history: false repeat_last_n: 40
use_files: true repeat_penalty: 1.2
use_user_informations_in_discussion: false
n_threads: 8
#Personality parameters
personalities: ["generic/lollms"]
active_personality_id: 0
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
extensions: []
user_name: user
user_description: ""
use_user_name_in_discussions: false use_user_name_in_discussions: false
user_avatar: default_user.svg user_avatar: default_user.svg
user_description: '' use_user_informations_in_discussion: false
user_name: user
version: 27 # UI parameters
db_path: database.db
# Automatic updates
debug: False
auto_update: true
auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true
# Voice service
enable_voice_service: false
xtts_base_url: http://127.0.0.1:8020
auto_read: false
current_voice: null
current_language: en
# Image generation service
enable_sd_service: false
sd_base_url: http://127.0.0.1:7860
# Audio
media_on: false
audio_in_language: 'en-US'
auto_speak: false
audio_out_voice: null
audio_pitch: 1
audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
use_discussions_history: false # Activate vectorizing previous conversations
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_save_db: False # For each new session, new files
data_vectorization_chunk_size: 512 # chunk size
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Helpers
pdf_latex_path: null
# boosting information
positive_boost: null
negative_boost: null
force_output_language_to_be: null