mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-24 06:46:40 +00:00
Upgraded core code
This commit is contained in:
parent
52532df832
commit
e77c97f238
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||||
version: 39
|
version: 40
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
@ -44,8 +44,8 @@ debug: False
|
|||||||
auto_update: true
|
auto_update: true
|
||||||
auto_save: true
|
auto_save: true
|
||||||
auto_title: false
|
auto_title: false
|
||||||
# Enables gpu usage
|
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||||
enable_gpu: true
|
hardware_mode: nvidia-tensorcores
|
||||||
# Automatically open the browser
|
# Automatically open the browser
|
||||||
auto_show_browser: true
|
auto_show_browser: true
|
||||||
|
|
||||||
|
@ -1,51 +1,96 @@
|
|||||||
active_personality_id: 15
|
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||||
audio_auto_send_input: true
|
version: 40
|
||||||
audio_in_language: en-US
|
|
||||||
audio_out_voice: Google UK English Female
|
|
||||||
audio_pitch: '1'
|
|
||||||
audio_silenceTimer: 5000
|
|
||||||
auto_save: true
|
|
||||||
auto_show_browser: true
|
|
||||||
auto_speak: false
|
|
||||||
auto_update: true
|
|
||||||
|
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
config: local_config
|
|
||||||
ctx_size: 4090
|
|
||||||
data_vectorization_activate: true
|
# Host information
|
||||||
data_vectorization_build_keys_words: true
|
|
||||||
data_vectorization_chunk_size: 512
|
|
||||||
data_vectorization_method: tfidf_vectorizer
|
|
||||||
data_vectorization_nb_chunks: 3
|
|
||||||
data_vectorization_overlap_size: 128
|
|
||||||
data_vectorization_save_db: false
|
|
||||||
data_visualization_method: PCA
|
|
||||||
db_path: lollms.db
|
|
||||||
debug: true
|
|
||||||
discussion_prompt_separator: '!@>'
|
|
||||||
enable_gpu: true
|
|
||||||
extensions: []
|
|
||||||
host: localhost
|
host: localhost
|
||||||
min_n_predict: 256
|
|
||||||
n_predict: 1024
|
|
||||||
n_threads: 8
|
|
||||||
override_personality_model_parameters: false
|
|
||||||
personalities:
|
|
||||||
- generic/lollms
|
|
||||||
port: 9600
|
port: 9600
|
||||||
repeat_last_n: 40
|
|
||||||
repeat_penalty: 1.2
|
# Genreration parameters
|
||||||
|
discussion_prompt_separator: "!@>"
|
||||||
seed: -1
|
seed: -1
|
||||||
temperature: '0.3'
|
n_predict: 1024
|
||||||
|
ctx_size: 4084
|
||||||
|
min_n_predict: 512
|
||||||
|
temperature: 0.9
|
||||||
top_k: 50
|
top_k: 50
|
||||||
top_p: 0.95
|
top_p: 0.95
|
||||||
use_discussions_history: true
|
repeat_last_n: 40
|
||||||
use_files: true
|
repeat_penalty: 1.2
|
||||||
|
|
||||||
|
n_threads: 8
|
||||||
|
|
||||||
|
#Personality parameters
|
||||||
|
personalities: ["generic/lollms"]
|
||||||
|
active_personality_id: 0
|
||||||
|
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||||
|
|
||||||
|
extensions: []
|
||||||
|
|
||||||
|
user_name: user
|
||||||
|
user_description: ""
|
||||||
|
use_user_name_in_discussions: false
|
||||||
|
user_avatar: default_user.svg
|
||||||
use_user_informations_in_discussion: false
|
use_user_informations_in_discussion: false
|
||||||
use_user_name_in_discussions: true
|
|
||||||
user_avatar: default_user
|
# UI parameters
|
||||||
user_description:
|
db_path: database.db
|
||||||
user_name: User
|
|
||||||
version: 26
|
# Automatic updates
|
||||||
|
debug: False
|
||||||
|
auto_update: true
|
||||||
|
auto_save: true
|
||||||
|
auto_title: false
|
||||||
|
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||||
|
hardware_mode: nvidia-tensorcores
|
||||||
|
# Automatically open the browser
|
||||||
|
auto_show_browser: true
|
||||||
|
|
||||||
|
# Voice service
|
||||||
|
enable_voice_service: false
|
||||||
|
xtts_base_url: http://127.0.0.1:8020
|
||||||
|
auto_read: false
|
||||||
|
current_voice: null
|
||||||
|
current_language: en
|
||||||
|
|
||||||
|
# Image generation service
|
||||||
|
enable_sd_service: false
|
||||||
|
sd_base_url: http://127.0.0.1:7860
|
||||||
|
|
||||||
|
# Audio
|
||||||
|
media_on: false
|
||||||
|
audio_in_language: 'en-US'
|
||||||
|
auto_speak: false
|
||||||
|
audio_out_voice: null
|
||||||
|
audio_pitch: 1
|
||||||
|
audio_auto_send_input: true
|
||||||
|
audio_silenceTimer: 5000
|
||||||
|
|
||||||
|
# Data vectorization
|
||||||
|
use_discussions_history: false # Activate vectorizing previous conversations
|
||||||
|
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||||
|
max_summary_size: 512 # in tokens
|
||||||
|
data_vectorization_visualize_on_vectorization: false
|
||||||
|
use_files: true # Activate using files
|
||||||
|
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||||
|
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||||
|
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||||
|
data_vectorization_save_db: False # For each new session, new files
|
||||||
|
data_vectorization_chunk_size: 512 # chunk size
|
||||||
|
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||||
|
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||||
|
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||||
|
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||||
|
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||||
|
|
||||||
|
|
||||||
|
# Helpers
|
||||||
|
pdf_latex_path: null
|
||||||
|
|
||||||
|
# boosting information
|
||||||
|
positive_boost: null
|
||||||
|
negative_boost: null
|
||||||
|
force_output_language_to_be: null
|
||||||
|
@ -1,51 +1,96 @@
|
|||||||
active_personality_id: 15
|
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||||
audio_auto_send_input: true
|
version: 40
|
||||||
audio_in_language: en-US
|
|
||||||
audio_out_voice: Google UK English Female
|
|
||||||
audio_pitch: '1'
|
|
||||||
audio_silenceTimer: 5000
|
|
||||||
auto_save: true
|
|
||||||
auto_show_browser: true
|
|
||||||
auto_speak: false
|
|
||||||
auto_update: true
|
|
||||||
|
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
config: local_config
|
|
||||||
ctx_size: 4090
|
|
||||||
data_vectorization_activate: true
|
# Host information
|
||||||
data_vectorization_build_keys_words: true
|
|
||||||
data_vectorization_chunk_size: 512
|
|
||||||
data_vectorization_method: tfidf_vectorizer
|
|
||||||
data_vectorization_nb_chunks: 3
|
|
||||||
data_vectorization_overlap_size: 128
|
|
||||||
data_vectorization_save_db: false
|
|
||||||
data_visualization_method: PCA
|
|
||||||
db_path: lollms.db
|
|
||||||
debug: true
|
|
||||||
discussion_prompt_separator: '!@>'
|
|
||||||
enable_gpu: true
|
|
||||||
extensions: []
|
|
||||||
host: localhost
|
host: localhost
|
||||||
min_n_predict: 256
|
|
||||||
n_predict: 1024
|
|
||||||
n_threads: 8
|
|
||||||
override_personality_model_parameters: false
|
|
||||||
personalities:
|
|
||||||
- generic/lollms
|
|
||||||
port: 9600
|
port: 9600
|
||||||
repeat_last_n: 40
|
|
||||||
repeat_penalty: 1.2
|
# Genreration parameters
|
||||||
|
discussion_prompt_separator: "!@>"
|
||||||
seed: -1
|
seed: -1
|
||||||
temperature: '0.3'
|
n_predict: 1024
|
||||||
|
ctx_size: 4084
|
||||||
|
min_n_predict: 512
|
||||||
|
temperature: 0.9
|
||||||
top_k: 50
|
top_k: 50
|
||||||
top_p: 0.95
|
top_p: 0.95
|
||||||
use_discussions_history: true
|
repeat_last_n: 40
|
||||||
use_files: true
|
repeat_penalty: 1.2
|
||||||
|
|
||||||
|
n_threads: 8
|
||||||
|
|
||||||
|
#Personality parameters
|
||||||
|
personalities: ["generic/lollms"]
|
||||||
|
active_personality_id: 0
|
||||||
|
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||||
|
|
||||||
|
extensions: []
|
||||||
|
|
||||||
|
user_name: user
|
||||||
|
user_description: ""
|
||||||
|
use_user_name_in_discussions: false
|
||||||
|
user_avatar: default_user.svg
|
||||||
use_user_informations_in_discussion: false
|
use_user_informations_in_discussion: false
|
||||||
use_user_name_in_discussions: true
|
|
||||||
user_avatar: default_user
|
# UI parameters
|
||||||
user_description:
|
db_path: database.db
|
||||||
user_name: User
|
|
||||||
version: 26
|
# Automatic updates
|
||||||
|
debug: False
|
||||||
|
auto_update: true
|
||||||
|
auto_save: true
|
||||||
|
auto_title: false
|
||||||
|
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||||
|
hardware_mode: nvidia-tensorcores
|
||||||
|
# Automatically open the browser
|
||||||
|
auto_show_browser: true
|
||||||
|
|
||||||
|
# Voice service
|
||||||
|
enable_voice_service: false
|
||||||
|
xtts_base_url: http://127.0.0.1:8020
|
||||||
|
auto_read: false
|
||||||
|
current_voice: null
|
||||||
|
current_language: en
|
||||||
|
|
||||||
|
# Image generation service
|
||||||
|
enable_sd_service: false
|
||||||
|
sd_base_url: http://127.0.0.1:7860
|
||||||
|
|
||||||
|
# Audio
|
||||||
|
media_on: false
|
||||||
|
audio_in_language: 'en-US'
|
||||||
|
auto_speak: false
|
||||||
|
audio_out_voice: null
|
||||||
|
audio_pitch: 1
|
||||||
|
audio_auto_send_input: true
|
||||||
|
audio_silenceTimer: 5000
|
||||||
|
|
||||||
|
# Data vectorization
|
||||||
|
use_discussions_history: false # Activate vectorizing previous conversations
|
||||||
|
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||||
|
max_summary_size: 512 # in tokens
|
||||||
|
data_vectorization_visualize_on_vectorization: false
|
||||||
|
use_files: true # Activate using files
|
||||||
|
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||||
|
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||||
|
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||||
|
data_vectorization_save_db: False # For each new session, new files
|
||||||
|
data_vectorization_chunk_size: 512 # chunk size
|
||||||
|
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||||
|
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||||
|
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||||
|
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||||
|
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||||
|
|
||||||
|
|
||||||
|
# Helpers
|
||||||
|
pdf_latex_path: null
|
||||||
|
|
||||||
|
# boosting information
|
||||||
|
positive_boost: null
|
||||||
|
negative_boost: null
|
||||||
|
force_output_language_to_be: null
|
||||||
|
@ -1,38 +1,96 @@
|
|||||||
active_personality_id: 0
|
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||||
auto_save: true
|
version: 40
|
||||||
auto_update: false
|
|
||||||
binding_name: null
|
binding_name: null
|
||||||
ctx_size: 4096
|
|
||||||
data_vectorization_activate: true
|
|
||||||
data_vectorization_build_keys_words: false
|
|
||||||
data_vectorization_chunk_size: 512
|
|
||||||
data_vectorization_method: ftidf_vectorizer
|
|
||||||
data_vectorization_nb_chunks: 2
|
|
||||||
data_vectorization_overlap_size: 128
|
|
||||||
data_vectorization_save_db: false
|
|
||||||
data_visualization_method: PCA
|
|
||||||
debug: false
|
|
||||||
discussion_prompt_separator: '!@>'
|
|
||||||
enable_gpu: true
|
|
||||||
extensions: []
|
|
||||||
host: localhost
|
|
||||||
min_n_predict: 256
|
|
||||||
model_name: null
|
model_name: null
|
||||||
n_predict: 1024
|
|
||||||
n_threads: 8
|
|
||||||
override_personality_model_parameters: false
|
|
||||||
personalities:
|
# Host information
|
||||||
- generic/lollms
|
host: localhost
|
||||||
port: 9601
|
port: 9600
|
||||||
repeat_last_n: 40
|
|
||||||
repeat_penalty: 1.2
|
# Genreration parameters
|
||||||
|
discussion_prompt_separator: "!@>"
|
||||||
seed: -1
|
seed: -1
|
||||||
|
n_predict: 1024
|
||||||
|
ctx_size: 4084
|
||||||
|
min_n_predict: 512
|
||||||
temperature: 0.9
|
temperature: 0.9
|
||||||
top_k: 50
|
top_k: 50
|
||||||
top_p: 0.95
|
top_p: 0.95
|
||||||
use_files: true
|
repeat_last_n: 40
|
||||||
use_user_name_in_discussions: false
|
repeat_penalty: 1.2
|
||||||
user_avatar: default_user
|
|
||||||
user_description: ''
|
n_threads: 8
|
||||||
|
|
||||||
|
#Personality parameters
|
||||||
|
personalities: ["generic/lollms"]
|
||||||
|
active_personality_id: 0
|
||||||
|
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||||
|
|
||||||
|
extensions: []
|
||||||
|
|
||||||
user_name: user
|
user_name: user
|
||||||
version: 26
|
user_description: ""
|
||||||
|
use_user_name_in_discussions: false
|
||||||
|
user_avatar: default_user.svg
|
||||||
|
use_user_informations_in_discussion: false
|
||||||
|
|
||||||
|
# UI parameters
|
||||||
|
db_path: database.db
|
||||||
|
|
||||||
|
# Automatic updates
|
||||||
|
debug: False
|
||||||
|
auto_update: true
|
||||||
|
auto_save: true
|
||||||
|
auto_title: false
|
||||||
|
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||||
|
hardware_mode: nvidia-tensorcores
|
||||||
|
# Automatically open the browser
|
||||||
|
auto_show_browser: true
|
||||||
|
|
||||||
|
# Voice service
|
||||||
|
enable_voice_service: false
|
||||||
|
xtts_base_url: http://127.0.0.1:8020
|
||||||
|
auto_read: false
|
||||||
|
current_voice: null
|
||||||
|
current_language: en
|
||||||
|
|
||||||
|
# Image generation service
|
||||||
|
enable_sd_service: false
|
||||||
|
sd_base_url: http://127.0.0.1:7860
|
||||||
|
|
||||||
|
# Audio
|
||||||
|
media_on: false
|
||||||
|
audio_in_language: 'en-US'
|
||||||
|
auto_speak: false
|
||||||
|
audio_out_voice: null
|
||||||
|
audio_pitch: 1
|
||||||
|
audio_auto_send_input: true
|
||||||
|
audio_silenceTimer: 5000
|
||||||
|
|
||||||
|
# Data vectorization
|
||||||
|
use_discussions_history: false # Activate vectorizing previous conversations
|
||||||
|
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||||
|
max_summary_size: 512 # in tokens
|
||||||
|
data_vectorization_visualize_on_vectorization: false
|
||||||
|
use_files: true # Activate using files
|
||||||
|
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||||
|
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||||
|
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||||
|
data_vectorization_save_db: False # For each new session, new files
|
||||||
|
data_vectorization_chunk_size: 512 # chunk size
|
||||||
|
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||||
|
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||||
|
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||||
|
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||||
|
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||||
|
|
||||||
|
|
||||||
|
# Helpers
|
||||||
|
pdf_latex_path: null
|
||||||
|
|
||||||
|
# boosting information
|
||||||
|
positive_boost: null
|
||||||
|
negative_boost: null
|
||||||
|
force_output_language_to_be: null
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||||
version: 39
|
version: 40
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
@ -44,8 +44,8 @@ debug: False
|
|||||||
auto_update: true
|
auto_update: true
|
||||||
auto_save: true
|
auto_save: true
|
||||||
auto_title: false
|
auto_title: false
|
||||||
# Enables gpu usage
|
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||||
enable_gpu: true
|
hardware_mode: nvidia-tensorcores
|
||||||
# Automatically open the browser
|
# Automatically open the browser
|
||||||
auto_show_browser: true
|
auto_show_browser: true
|
||||||
|
|
||||||
|
@ -452,9 +452,15 @@ Date: {{date}}
|
|||||||
return string
|
return string
|
||||||
|
|
||||||
def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
|
def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
|
||||||
|
if callback is None:
|
||||||
|
callback = self.callback
|
||||||
if text is None:
|
if text is None:
|
||||||
return True
|
return True
|
||||||
bot_says = self.bot_says + text
|
if message_type==MSG_TYPE.MSG_TYPE_CHUNK:
|
||||||
|
bot_says = self.bot_says + text
|
||||||
|
elif message_type==MSG_TYPE.MSG_TYPE_FULL:
|
||||||
|
bot_says = text
|
||||||
|
|
||||||
if show_progress:
|
if show_progress:
|
||||||
if self.nb_received_tokens==0:
|
if self.nb_received_tokens==0:
|
||||||
self.start_time = datetime.now()
|
self.start_time = datetime.now()
|
||||||
@ -475,7 +481,7 @@ Date: {{date}}
|
|||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
if callback:
|
if callback:
|
||||||
callback(text,MSG_TYPE.MSG_TYPE_CHUNK)
|
callback(text,message_type)
|
||||||
self.bot_says = bot_says
|
self.bot_says = bot_says
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||||
version: 39
|
version: 40
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
@ -7,7 +7,7 @@ model_name: null
|
|||||||
|
|
||||||
# Host information
|
# Host information
|
||||||
host: localhost
|
host: localhost
|
||||||
port: 9601
|
port: 9600
|
||||||
|
|
||||||
# Genreration parameters
|
# Genreration parameters
|
||||||
discussion_prompt_separator: "!@>"
|
discussion_prompt_separator: "!@>"
|
||||||
@ -44,8 +44,8 @@ debug: False
|
|||||||
auto_update: true
|
auto_update: true
|
||||||
auto_save: true
|
auto_save: true
|
||||||
auto_title: false
|
auto_title: false
|
||||||
# Enables gpu usage
|
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||||
enable_gpu: true
|
hardware_mode: nvidia-tensorcores
|
||||||
# Automatically open the browser
|
# Automatically open the browser
|
||||||
auto_show_browser: true
|
auto_show_browser: true
|
||||||
|
|
||||||
|
@ -6,15 +6,85 @@ from lollms.types import MSG_TYPE
|
|||||||
from lollms.utilities import detect_antiprompt, remove_text_from_string
|
from lollms.utilities import detect_antiprompt, remove_text_from_string
|
||||||
from ascii_colors import ASCIIColors
|
from ascii_colors import ASCIIColors
|
||||||
class GenerateRequest(BaseModel):
|
class GenerateRequest(BaseModel):
|
||||||
|
"""
|
||||||
|
Data model for the Generate Request.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
- text: str representing the input text prompt for text generation.
|
||||||
|
- n_predict: int representing the number of predictions to generate.
|
||||||
|
- stream: bool indicating whether to stream the generated text or not.
|
||||||
|
- temperature: float representing the temperature parameter for text generation.
|
||||||
|
- top_k: int representing the top_k parameter for text generation.
|
||||||
|
- top_p: float representing the top_p parameter for text generation.
|
||||||
|
- repeat_penalty: float representing the repeat_penalty parameter for text generation.
|
||||||
|
- repeat_last_n: int representing the repeat_last_n parameter for text generation.
|
||||||
|
- seed: int representing the seed for text generation.
|
||||||
|
- n_threads: int representing the number of threads for text generation.
|
||||||
|
"""
|
||||||
text: str
|
text: str
|
||||||
n_predict: int = 1024
|
n_predict: int = 1024
|
||||||
stream: bool = False
|
stream: bool = False
|
||||||
|
temperature: float = 0.4
|
||||||
|
top_k: int = 50
|
||||||
|
top_p: float = 0.6
|
||||||
|
repeat_penalty: float = 1.3
|
||||||
|
repeat_last_n: int = 40
|
||||||
|
seed: int = -1
|
||||||
|
n_threads: int = 1
|
||||||
|
|
||||||
|
class V1ChatGenerateRequest(BaseModel):
|
||||||
|
"""
|
||||||
|
Data model for the V1 Chat Generate Request.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
- model: str representing the model to be used for text generation.
|
||||||
|
- messages: list of messages to be used as prompts for text generation.
|
||||||
|
- stream: bool indicating whether to stream the generated text or not.
|
||||||
|
- temperature: float representing the temperature parameter for text generation.
|
||||||
|
- max_tokens: float representing the maximum number of tokens to generate.
|
||||||
|
"""
|
||||||
|
model: str
|
||||||
|
messages: list
|
||||||
|
stream: bool
|
||||||
|
temperature: float
|
||||||
|
max_tokens: float
|
||||||
|
|
||||||
|
|
||||||
|
class V1InstructGenerateRequest(BaseModel):
|
||||||
|
"""
|
||||||
|
Data model for the V1 Chat Generate Request.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
- model: str representing the model to be used for text generation.
|
||||||
|
- messages: list of messages to be used as prompts for text generation.
|
||||||
|
- stream: bool indicating whether to stream the generated text or not.
|
||||||
|
- temperature: float representing the temperature parameter for text generation.
|
||||||
|
- max_tokens: float representing the maximum number of tokens to generate.
|
||||||
|
"""
|
||||||
|
model: str
|
||||||
|
prompt: str
|
||||||
|
stream: bool
|
||||||
|
temperature: float
|
||||||
|
max_tokens: float
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
elf_server = LOLLMSElfServer.get_instance()
|
elf_server = LOLLMSElfServer.get_instance()
|
||||||
|
|
||||||
@router.post("/generate")
|
@router.post("/generate")
|
||||||
def generate(request_data: GenerateRequest):
|
def lollms_generate(request_data: GenerateRequest):
|
||||||
|
"""
|
||||||
|
Endpoint for generating text from prompts using the lollms fastapi server.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- If the elf_server binding is not None:
|
||||||
|
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||||
|
- If stream is False, returns the generated text as a string.
|
||||||
|
- If the elf_server binding is None, returns None.
|
||||||
|
"""
|
||||||
text = request_data.text
|
text = request_data.text
|
||||||
n_predict = request_data.n_predict
|
n_predict = request_data.n_predict
|
||||||
stream = request_data.stream
|
stream = request_data.stream
|
||||||
@ -34,7 +104,18 @@ def generate(request_data: GenerateRequest):
|
|||||||
else:
|
else:
|
||||||
yield chunk
|
yield chunk
|
||||||
return True
|
return True
|
||||||
return iter(elf_server.binding.generate(text, n_predict, callback=callback))
|
return iter(elf_server.binding.generate(
|
||||||
|
text,
|
||||||
|
n_predict,
|
||||||
|
callback=callback,
|
||||||
|
temperature=request_data.temperature,
|
||||||
|
top_k=request_data.top_k,
|
||||||
|
top_p=request_data.top_p,
|
||||||
|
repeat_penalty=request_data.repeat_penalty,
|
||||||
|
repeat_last_n=request_data.repeat_last_n,
|
||||||
|
seed=request_data.seed,
|
||||||
|
n_threads=request_data.n_threads
|
||||||
|
))
|
||||||
|
|
||||||
return StreamingResponse(generate_chunks())
|
return StreamingResponse(generate_chunks())
|
||||||
else:
|
else:
|
||||||
@ -49,7 +130,154 @@ def generate(request_data: GenerateRequest):
|
|||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
elf_server.binding.generate(text, n_predict, callback=callback)
|
elf_server.binding.generate(
|
||||||
|
text,
|
||||||
|
n_predict,
|
||||||
|
callback=callback,
|
||||||
|
temperature=request_data.temperature,
|
||||||
|
top_k=request_data.top_k,
|
||||||
|
top_p=request_data.top_p,
|
||||||
|
repeat_penalty=request_data.repeat_penalty,
|
||||||
|
repeat_last_n=request_data.repeat_last_n,
|
||||||
|
seed=request_data.seed,
|
||||||
|
n_threads=request_data.n_threads
|
||||||
|
)
|
||||||
|
return output["text"]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# openai compatible generation
|
||||||
|
@router.post("/v1/chat/completions")
|
||||||
|
def v1_chat_generate(request_data: V1ChatGenerateRequest):
|
||||||
|
"""
|
||||||
|
Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
|
||||||
|
This endpoint is compatible with open ai API and mistralAI API
|
||||||
|
Args:
|
||||||
|
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- If the elf_server binding is not None:
|
||||||
|
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||||
|
- If stream is False, returns the generated text as a string.
|
||||||
|
- If the elf_server binding is None, returns None.
|
||||||
|
"""
|
||||||
|
messages = request_data.messages
|
||||||
|
text = ""
|
||||||
|
for message in messages:
|
||||||
|
text += f"{message['role']}: {message['content']}\n"
|
||||||
|
n_predict = request_data.max_tokens
|
||||||
|
stream = request_data.stream
|
||||||
|
|
||||||
|
if elf_server.binding is not None:
|
||||||
|
if stream:
|
||||||
|
output = {"text":""}
|
||||||
|
def generate_chunks():
|
||||||
|
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||||
|
# Yield each chunk of data
|
||||||
|
output["text"] += chunk
|
||||||
|
antiprompt = detect_antiprompt(output["text"])
|
||||||
|
if antiprompt:
|
||||||
|
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||||
|
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
yield chunk
|
||||||
|
return True
|
||||||
|
return iter(elf_server.binding.generate(
|
||||||
|
text,
|
||||||
|
n_predict,
|
||||||
|
callback=callback,
|
||||||
|
temperature=request_data.temperature
|
||||||
|
))
|
||||||
|
|
||||||
|
return StreamingResponse(generate_chunks())
|
||||||
|
else:
|
||||||
|
output = {"text":""}
|
||||||
|
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||||
|
# Yield each chunk of data
|
||||||
|
output["text"] += chunk
|
||||||
|
antiprompt = detect_antiprompt(output["text"])
|
||||||
|
if antiprompt:
|
||||||
|
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||||
|
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
elf_server.binding.generate(
|
||||||
|
text,
|
||||||
|
n_predict,
|
||||||
|
callback=callback,
|
||||||
|
temperature=request_data.temperature
|
||||||
|
)
|
||||||
|
return output["text"]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# openai compatible generation
|
||||||
|
@router.post("/v1/completions")
|
||||||
|
def v1_instruct_generate(request_data: V1InstructGenerateRequest):
|
||||||
|
"""
|
||||||
|
Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
|
||||||
|
This endpoint is compatible with open ai API and mistralAI API
|
||||||
|
Args:
|
||||||
|
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- If the elf_server binding is not None:
|
||||||
|
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||||
|
- If stream is False, returns the generated text as a string.
|
||||||
|
- If the elf_server binding is None, returns None.
|
||||||
|
"""
|
||||||
|
|
||||||
|
text = request_data.prompt
|
||||||
|
n_predict = request_data.max_tokens
|
||||||
|
stream = request_data.stream
|
||||||
|
|
||||||
|
if elf_server.binding is not None:
|
||||||
|
if stream:
|
||||||
|
output = {"text":""}
|
||||||
|
def generate_chunks():
|
||||||
|
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||||
|
# Yield each chunk of data
|
||||||
|
output["text"] += chunk
|
||||||
|
antiprompt = detect_antiprompt(output["text"])
|
||||||
|
if antiprompt:
|
||||||
|
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||||
|
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
yield chunk
|
||||||
|
return True
|
||||||
|
return iter(elf_server.binding.generate(
|
||||||
|
text,
|
||||||
|
n_predict,
|
||||||
|
callback=callback,
|
||||||
|
temperature=request_data.temperature
|
||||||
|
))
|
||||||
|
|
||||||
|
return StreamingResponse(generate_chunks())
|
||||||
|
else:
|
||||||
|
output = {"text":""}
|
||||||
|
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||||
|
# Yield each chunk of data
|
||||||
|
output["text"] += chunk
|
||||||
|
antiprompt = detect_antiprompt(output["text"])
|
||||||
|
if antiprompt:
|
||||||
|
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||||
|
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
elf_server.binding.generate(
|
||||||
|
text,
|
||||||
|
n_predict,
|
||||||
|
callback=callback,
|
||||||
|
temperature=request_data.temperature
|
||||||
|
)
|
||||||
return output["text"]
|
return output["text"]
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
@ -1,48 +1,96 @@
|
|||||||
active_personality_id: -1
|
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||||
audio_auto_send_input: true
|
version: 40
|
||||||
audio_in_language: en-US
|
|
||||||
audio_out_voice: null
|
|
||||||
audio_pitch: 1
|
|
||||||
audio_silenceTimer: 5000
|
|
||||||
auto_save: true
|
|
||||||
auto_show_browser: true
|
|
||||||
auto_speak: false
|
|
||||||
auto_update: true
|
|
||||||
binding_name: null
|
binding_name: null
|
||||||
ctx_size: 4084
|
|
||||||
data_vectorization_activate: true
|
|
||||||
data_vectorization_build_keys_words: false
|
|
||||||
data_vectorization_chunk_size: 512
|
|
||||||
data_vectorization_method: tfidf_vectorizer
|
|
||||||
data_vectorization_nb_chunks: 2
|
|
||||||
data_vectorization_overlap_size: 128
|
|
||||||
data_vectorization_save_db: false
|
|
||||||
data_vectorization_visualize_on_vectorization: false
|
|
||||||
data_visualization_method: PCA
|
|
||||||
db_path: database.db
|
|
||||||
debug: false
|
|
||||||
discussion_prompt_separator: '!@>'
|
|
||||||
enable_gpu: true
|
|
||||||
extensions: []
|
|
||||||
host: localhost
|
|
||||||
min_n_predict: 256
|
|
||||||
model_name: null
|
model_name: null
|
||||||
n_predict: 1024
|
|
||||||
n_threads: 8
|
|
||||||
override_personality_model_parameters: false
|
|
||||||
personalities: []
|
# Host information
|
||||||
|
host: localhost
|
||||||
port: 9600
|
port: 9600
|
||||||
repeat_last_n: 40
|
|
||||||
repeat_penalty: 1.2
|
# Genreration parameters
|
||||||
|
discussion_prompt_separator: "!@>"
|
||||||
seed: -1
|
seed: -1
|
||||||
|
n_predict: 1024
|
||||||
|
ctx_size: 4084
|
||||||
|
min_n_predict: 512
|
||||||
temperature: 0.9
|
temperature: 0.9
|
||||||
top_k: 50
|
top_k: 50
|
||||||
top_p: 0.95
|
top_p: 0.95
|
||||||
use_discussions_history: false
|
repeat_last_n: 40
|
||||||
use_files: true
|
repeat_penalty: 1.2
|
||||||
use_user_informations_in_discussion: false
|
|
||||||
|
n_threads: 8
|
||||||
|
|
||||||
|
#Personality parameters
|
||||||
|
personalities: ["generic/lollms"]
|
||||||
|
active_personality_id: 0
|
||||||
|
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||||
|
|
||||||
|
extensions: []
|
||||||
|
|
||||||
|
user_name: user
|
||||||
|
user_description: ""
|
||||||
use_user_name_in_discussions: false
|
use_user_name_in_discussions: false
|
||||||
user_avatar: default_user.svg
|
user_avatar: default_user.svg
|
||||||
user_description: ''
|
use_user_informations_in_discussion: false
|
||||||
user_name: user
|
|
||||||
version: 27
|
# UI parameters
|
||||||
|
db_path: database.db
|
||||||
|
|
||||||
|
# Automatic updates
|
||||||
|
debug: False
|
||||||
|
auto_update: true
|
||||||
|
auto_save: true
|
||||||
|
auto_title: false
|
||||||
|
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||||
|
hardware_mode: nvidia-tensorcores
|
||||||
|
# Automatically open the browser
|
||||||
|
auto_show_browser: true
|
||||||
|
|
||||||
|
# Voice service
|
||||||
|
enable_voice_service: false
|
||||||
|
xtts_base_url: http://127.0.0.1:8020
|
||||||
|
auto_read: false
|
||||||
|
current_voice: null
|
||||||
|
current_language: en
|
||||||
|
|
||||||
|
# Image generation service
|
||||||
|
enable_sd_service: false
|
||||||
|
sd_base_url: http://127.0.0.1:7860
|
||||||
|
|
||||||
|
# Audio
|
||||||
|
media_on: false
|
||||||
|
audio_in_language: 'en-US'
|
||||||
|
auto_speak: false
|
||||||
|
audio_out_voice: null
|
||||||
|
audio_pitch: 1
|
||||||
|
audio_auto_send_input: true
|
||||||
|
audio_silenceTimer: 5000
|
||||||
|
|
||||||
|
# Data vectorization
|
||||||
|
use_discussions_history: false # Activate vectorizing previous conversations
|
||||||
|
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||||
|
max_summary_size: 512 # in tokens
|
||||||
|
data_vectorization_visualize_on_vectorization: false
|
||||||
|
use_files: true # Activate using files
|
||||||
|
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||||
|
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||||
|
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||||
|
data_vectorization_save_db: False # For each new session, new files
|
||||||
|
data_vectorization_chunk_size: 512 # chunk size
|
||||||
|
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||||
|
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||||
|
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||||
|
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||||
|
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||||
|
|
||||||
|
|
||||||
|
# Helpers
|
||||||
|
pdf_latex_path: null
|
||||||
|
|
||||||
|
# boosting information
|
||||||
|
positive_boost: null
|
||||||
|
negative_boost: null
|
||||||
|
force_output_language_to_be: null
|
||||||
|
Loading…
Reference in New Issue
Block a user