mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 20:57:58 +00:00
Upgraded core code
This commit is contained in:
parent
52532df832
commit
e77c97f238
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||
version: 39
|
||||
version: 40
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -44,8 +44,8 @@ debug: False
|
||||
auto_update: true
|
||||
auto_save: true
|
||||
auto_title: false
|
||||
# Enables gpu usage
|
||||
enable_gpu: true
|
||||
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||
hardware_mode: nvidia-tensorcores
|
||||
# Automatically open the browser
|
||||
auto_show_browser: true
|
||||
|
||||
|
@ -1,51 +1,96 @@
|
||||
active_personality_id: 15
|
||||
audio_auto_send_input: true
|
||||
audio_in_language: en-US
|
||||
audio_out_voice: Google UK English Female
|
||||
audio_pitch: '1'
|
||||
audio_silenceTimer: 5000
|
||||
auto_save: true
|
||||
auto_show_browser: true
|
||||
auto_speak: false
|
||||
auto_update: true
|
||||
|
||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||
version: 40
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
config: local_config
|
||||
ctx_size: 4090
|
||||
data_vectorization_activate: true
|
||||
data_vectorization_build_keys_words: true
|
||||
data_vectorization_chunk_size: 512
|
||||
data_vectorization_method: tfidf_vectorizer
|
||||
data_vectorization_nb_chunks: 3
|
||||
data_vectorization_overlap_size: 128
|
||||
data_vectorization_save_db: false
|
||||
data_visualization_method: PCA
|
||||
db_path: lollms.db
|
||||
debug: true
|
||||
discussion_prompt_separator: '!@>'
|
||||
enable_gpu: true
|
||||
extensions: []
|
||||
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
min_n_predict: 256
|
||||
n_predict: 1024
|
||||
n_threads: 8
|
||||
override_personality_model_parameters: false
|
||||
personalities:
|
||||
- generic/lollms
|
||||
port: 9600
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
seed: -1
|
||||
temperature: '0.3'
|
||||
n_predict: 1024
|
||||
ctx_size: 4084
|
||||
min_n_predict: 512
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
top_p: 0.95
|
||||
use_discussions_history: true
|
||||
use_files: true
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
n_threads: 8
|
||||
|
||||
#Personality parameters
|
||||
personalities: ["generic/lollms"]
|
||||
active_personality_id: 0
|
||||
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||
|
||||
extensions: []
|
||||
|
||||
user_name: user
|
||||
user_description: ""
|
||||
use_user_name_in_discussions: false
|
||||
user_avatar: default_user.svg
|
||||
use_user_informations_in_discussion: false
|
||||
use_user_name_in_discussions: true
|
||||
user_avatar: default_user
|
||||
user_description:
|
||||
user_name: User
|
||||
version: 26
|
||||
|
||||
# UI parameters
|
||||
db_path: database.db
|
||||
|
||||
# Automatic updates
|
||||
debug: False
|
||||
auto_update: true
|
||||
auto_save: true
|
||||
auto_title: false
|
||||
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||
hardware_mode: nvidia-tensorcores
|
||||
# Automatically open the browser
|
||||
auto_show_browser: true
|
||||
|
||||
# Voice service
|
||||
enable_voice_service: false
|
||||
xtts_base_url: http://127.0.0.1:8020
|
||||
auto_read: false
|
||||
current_voice: null
|
||||
current_language: en
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://127.0.0.1:7860
|
||||
|
||||
# Audio
|
||||
media_on: false
|
||||
audio_in_language: 'en-US'
|
||||
auto_speak: false
|
||||
audio_out_voice: null
|
||||
audio_pitch: 1
|
||||
audio_auto_send_input: true
|
||||
audio_silenceTimer: 5000
|
||||
|
||||
# Data vectorization
|
||||
use_discussions_history: false # Activate vectorizing previous conversations
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
use_files: true # Activate using files
|
||||
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||
data_vectorization_save_db: False # For each new session, new files
|
||||
data_vectorization_chunk_size: 512 # chunk size
|
||||
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||
|
||||
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
# boosting information
|
||||
positive_boost: null
|
||||
negative_boost: null
|
||||
force_output_language_to_be: null
|
||||
|
@ -1,51 +1,96 @@
|
||||
active_personality_id: 15
|
||||
audio_auto_send_input: true
|
||||
audio_in_language: en-US
|
||||
audio_out_voice: Google UK English Female
|
||||
audio_pitch: '1'
|
||||
audio_silenceTimer: 5000
|
||||
auto_save: true
|
||||
auto_show_browser: true
|
||||
auto_speak: false
|
||||
auto_update: true
|
||||
|
||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||
version: 40
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
config: local_config
|
||||
ctx_size: 4090
|
||||
data_vectorization_activate: true
|
||||
data_vectorization_build_keys_words: true
|
||||
data_vectorization_chunk_size: 512
|
||||
data_vectorization_method: tfidf_vectorizer
|
||||
data_vectorization_nb_chunks: 3
|
||||
data_vectorization_overlap_size: 128
|
||||
data_vectorization_save_db: false
|
||||
data_visualization_method: PCA
|
||||
db_path: lollms.db
|
||||
debug: true
|
||||
discussion_prompt_separator: '!@>'
|
||||
enable_gpu: true
|
||||
extensions: []
|
||||
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
min_n_predict: 256
|
||||
n_predict: 1024
|
||||
n_threads: 8
|
||||
override_personality_model_parameters: false
|
||||
personalities:
|
||||
- generic/lollms
|
||||
port: 9600
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
seed: -1
|
||||
temperature: '0.3'
|
||||
n_predict: 1024
|
||||
ctx_size: 4084
|
||||
min_n_predict: 512
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
top_p: 0.95
|
||||
use_discussions_history: true
|
||||
use_files: true
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
n_threads: 8
|
||||
|
||||
#Personality parameters
|
||||
personalities: ["generic/lollms"]
|
||||
active_personality_id: 0
|
||||
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||
|
||||
extensions: []
|
||||
|
||||
user_name: user
|
||||
user_description: ""
|
||||
use_user_name_in_discussions: false
|
||||
user_avatar: default_user.svg
|
||||
use_user_informations_in_discussion: false
|
||||
use_user_name_in_discussions: true
|
||||
user_avatar: default_user
|
||||
user_description:
|
||||
user_name: User
|
||||
version: 26
|
||||
|
||||
# UI parameters
|
||||
db_path: database.db
|
||||
|
||||
# Automatic updates
|
||||
debug: False
|
||||
auto_update: true
|
||||
auto_save: true
|
||||
auto_title: false
|
||||
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||
hardware_mode: nvidia-tensorcores
|
||||
# Automatically open the browser
|
||||
auto_show_browser: true
|
||||
|
||||
# Voice service
|
||||
enable_voice_service: false
|
||||
xtts_base_url: http://127.0.0.1:8020
|
||||
auto_read: false
|
||||
current_voice: null
|
||||
current_language: en
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://127.0.0.1:7860
|
||||
|
||||
# Audio
|
||||
media_on: false
|
||||
audio_in_language: 'en-US'
|
||||
auto_speak: false
|
||||
audio_out_voice: null
|
||||
audio_pitch: 1
|
||||
audio_auto_send_input: true
|
||||
audio_silenceTimer: 5000
|
||||
|
||||
# Data vectorization
|
||||
use_discussions_history: false # Activate vectorizing previous conversations
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
use_files: true # Activate using files
|
||||
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||
data_vectorization_save_db: False # For each new session, new files
|
||||
data_vectorization_chunk_size: 512 # chunk size
|
||||
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||
|
||||
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
# boosting information
|
||||
positive_boost: null
|
||||
negative_boost: null
|
||||
force_output_language_to_be: null
|
||||
|
@ -1,38 +1,96 @@
|
||||
active_personality_id: 0
|
||||
auto_save: true
|
||||
auto_update: false
|
||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||
version: 40
|
||||
binding_name: null
|
||||
ctx_size: 4096
|
||||
data_vectorization_activate: true
|
||||
data_vectorization_build_keys_words: false
|
||||
data_vectorization_chunk_size: 512
|
||||
data_vectorization_method: ftidf_vectorizer
|
||||
data_vectorization_nb_chunks: 2
|
||||
data_vectorization_overlap_size: 128
|
||||
data_vectorization_save_db: false
|
||||
data_visualization_method: PCA
|
||||
debug: false
|
||||
discussion_prompt_separator: '!@>'
|
||||
enable_gpu: true
|
||||
extensions: []
|
||||
host: localhost
|
||||
min_n_predict: 256
|
||||
model_name: null
|
||||
n_predict: 1024
|
||||
n_threads: 8
|
||||
override_personality_model_parameters: false
|
||||
personalities:
|
||||
- generic/lollms
|
||||
port: 9601
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
port: 9600
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
seed: -1
|
||||
n_predict: 1024
|
||||
ctx_size: 4084
|
||||
min_n_predict: 512
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
top_p: 0.95
|
||||
use_files: true
|
||||
use_user_name_in_discussions: false
|
||||
user_avatar: default_user
|
||||
user_description: ''
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
n_threads: 8
|
||||
|
||||
#Personality parameters
|
||||
personalities: ["generic/lollms"]
|
||||
active_personality_id: 0
|
||||
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||
|
||||
extensions: []
|
||||
|
||||
user_name: user
|
||||
version: 26
|
||||
user_description: ""
|
||||
use_user_name_in_discussions: false
|
||||
user_avatar: default_user.svg
|
||||
use_user_informations_in_discussion: false
|
||||
|
||||
# UI parameters
|
||||
db_path: database.db
|
||||
|
||||
# Automatic updates
|
||||
debug: False
|
||||
auto_update: true
|
||||
auto_save: true
|
||||
auto_title: false
|
||||
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||
hardware_mode: nvidia-tensorcores
|
||||
# Automatically open the browser
|
||||
auto_show_browser: true
|
||||
|
||||
# Voice service
|
||||
enable_voice_service: false
|
||||
xtts_base_url: http://127.0.0.1:8020
|
||||
auto_read: false
|
||||
current_voice: null
|
||||
current_language: en
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://127.0.0.1:7860
|
||||
|
||||
# Audio
|
||||
media_on: false
|
||||
audio_in_language: 'en-US'
|
||||
auto_speak: false
|
||||
audio_out_voice: null
|
||||
audio_pitch: 1
|
||||
audio_auto_send_input: true
|
||||
audio_silenceTimer: 5000
|
||||
|
||||
# Data vectorization
|
||||
use_discussions_history: false # Activate vectorizing previous conversations
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
use_files: true # Activate using files
|
||||
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||
data_vectorization_save_db: False # For each new session, new files
|
||||
data_vectorization_chunk_size: 512 # chunk size
|
||||
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||
|
||||
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
# boosting information
|
||||
positive_boost: null
|
||||
negative_boost: null
|
||||
force_output_language_to_be: null
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||
version: 39
|
||||
version: 40
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -44,8 +44,8 @@ debug: False
|
||||
auto_update: true
|
||||
auto_save: true
|
||||
auto_title: false
|
||||
# Enables gpu usage
|
||||
enable_gpu: true
|
||||
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||
hardware_mode: nvidia-tensorcores
|
||||
# Automatically open the browser
|
||||
auto_show_browser: true
|
||||
|
||||
|
@ -452,9 +452,15 @@ Date: {{date}}
|
||||
return string
|
||||
|
||||
def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
|
||||
if callback is None:
|
||||
callback = self.callback
|
||||
if text is None:
|
||||
return True
|
||||
bot_says = self.bot_says + text
|
||||
if message_type==MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
bot_says = self.bot_says + text
|
||||
elif message_type==MSG_TYPE.MSG_TYPE_FULL:
|
||||
bot_says = text
|
||||
|
||||
if show_progress:
|
||||
if self.nb_received_tokens==0:
|
||||
self.start_time = datetime.now()
|
||||
@ -475,7 +481,7 @@ Date: {{date}}
|
||||
return False
|
||||
else:
|
||||
if callback:
|
||||
callback(text,MSG_TYPE.MSG_TYPE_CHUNK)
|
||||
callback(text,message_type)
|
||||
self.bot_says = bot_says
|
||||
return True
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||
version: 39
|
||||
version: 40
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -7,7 +7,7 @@ model_name: null
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
port: 9601
|
||||
port: 9600
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
@ -44,8 +44,8 @@ debug: False
|
||||
auto_update: true
|
||||
auto_save: true
|
||||
auto_title: false
|
||||
# Enables gpu usage
|
||||
enable_gpu: true
|
||||
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||
hardware_mode: nvidia-tensorcores
|
||||
# Automatically open the browser
|
||||
auto_show_browser: true
|
||||
|
||||
|
@ -6,15 +6,85 @@ from lollms.types import MSG_TYPE
|
||||
from lollms.utilities import detect_antiprompt, remove_text_from_string
|
||||
from ascii_colors import ASCIIColors
|
||||
class GenerateRequest(BaseModel):
|
||||
"""
|
||||
Data model for the Generate Request.
|
||||
|
||||
Attributes:
|
||||
- text: str representing the input text prompt for text generation.
|
||||
- n_predict: int representing the number of predictions to generate.
|
||||
- stream: bool indicating whether to stream the generated text or not.
|
||||
- temperature: float representing the temperature parameter for text generation.
|
||||
- top_k: int representing the top_k parameter for text generation.
|
||||
- top_p: float representing the top_p parameter for text generation.
|
||||
- repeat_penalty: float representing the repeat_penalty parameter for text generation.
|
||||
- repeat_last_n: int representing the repeat_last_n parameter for text generation.
|
||||
- seed: int representing the seed for text generation.
|
||||
- n_threads: int representing the number of threads for text generation.
|
||||
"""
|
||||
text: str
|
||||
n_predict: int = 1024
|
||||
stream: bool = False
|
||||
temperature: float = 0.4
|
||||
top_k: int = 50
|
||||
top_p: float = 0.6
|
||||
repeat_penalty: float = 1.3
|
||||
repeat_last_n: int = 40
|
||||
seed: int = -1
|
||||
n_threads: int = 1
|
||||
|
||||
class V1ChatGenerateRequest(BaseModel):
|
||||
"""
|
||||
Data model for the V1 Chat Generate Request.
|
||||
|
||||
Attributes:
|
||||
- model: str representing the model to be used for text generation.
|
||||
- messages: list of messages to be used as prompts for text generation.
|
||||
- stream: bool indicating whether to stream the generated text or not.
|
||||
- temperature: float representing the temperature parameter for text generation.
|
||||
- max_tokens: float representing the maximum number of tokens to generate.
|
||||
"""
|
||||
model: str
|
||||
messages: list
|
||||
stream: bool
|
||||
temperature: float
|
||||
max_tokens: float
|
||||
|
||||
|
||||
class V1InstructGenerateRequest(BaseModel):
|
||||
"""
|
||||
Data model for the V1 Chat Generate Request.
|
||||
|
||||
Attributes:
|
||||
- model: str representing the model to be used for text generation.
|
||||
- messages: list of messages to be used as prompts for text generation.
|
||||
- stream: bool indicating whether to stream the generated text or not.
|
||||
- temperature: float representing the temperature parameter for text generation.
|
||||
- max_tokens: float representing the maximum number of tokens to generate.
|
||||
"""
|
||||
model: str
|
||||
prompt: str
|
||||
stream: bool
|
||||
temperature: float
|
||||
max_tokens: float
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
elf_server = LOLLMSElfServer.get_instance()
|
||||
|
||||
@router.post("/generate")
|
||||
def generate(request_data: GenerateRequest):
|
||||
def lollms_generate(request_data: GenerateRequest):
|
||||
"""
|
||||
Endpoint for generating text from prompts using the lollms fastapi server.
|
||||
|
||||
Args:
|
||||
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||
|
||||
Returns:
|
||||
- If the elf_server binding is not None:
|
||||
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||
- If stream is False, returns the generated text as a string.
|
||||
- If the elf_server binding is None, returns None.
|
||||
"""
|
||||
text = request_data.text
|
||||
n_predict = request_data.n_predict
|
||||
stream = request_data.stream
|
||||
@ -34,7 +104,18 @@ def generate(request_data: GenerateRequest):
|
||||
else:
|
||||
yield chunk
|
||||
return True
|
||||
return iter(elf_server.binding.generate(text, n_predict, callback=callback))
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature,
|
||||
top_k=request_data.top_k,
|
||||
top_p=request_data.top_p,
|
||||
repeat_penalty=request_data.repeat_penalty,
|
||||
repeat_last_n=request_data.repeat_last_n,
|
||||
seed=request_data.seed,
|
||||
n_threads=request_data.n_threads
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
else:
|
||||
@ -49,7 +130,154 @@ def generate(request_data: GenerateRequest):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elf_server.binding.generate(text, n_predict, callback=callback)
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature,
|
||||
top_k=request_data.top_k,
|
||||
top_p=request_data.top_p,
|
||||
repeat_penalty=request_data.repeat_penalty,
|
||||
repeat_last_n=request_data.repeat_last_n,
|
||||
seed=request_data.seed,
|
||||
n_threads=request_data.n_threads
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
# openai compatible generation
|
||||
@router.post("/v1/chat/completions")
|
||||
def v1_chat_generate(request_data: V1ChatGenerateRequest):
|
||||
"""
|
||||
Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
|
||||
This endpoint is compatible with open ai API and mistralAI API
|
||||
Args:
|
||||
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||
|
||||
Returns:
|
||||
- If the elf_server binding is not None:
|
||||
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||
- If stream is False, returns the generated text as a string.
|
||||
- If the elf_server binding is None, returns None.
|
||||
"""
|
||||
messages = request_data.messages
|
||||
text = ""
|
||||
for message in messages:
|
||||
text += f"{message['role']}: {message['content']}\n"
|
||||
n_predict = request_data.max_tokens
|
||||
stream = request_data.stream
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"text":""}
|
||||
def generate_chunks():
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
yield chunk
|
||||
return True
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
# openai compatible generation
|
||||
@router.post("/v1/completions")
|
||||
def v1_instruct_generate(request_data: V1InstructGenerateRequest):
|
||||
"""
|
||||
Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
|
||||
This endpoint is compatible with open ai API and mistralAI API
|
||||
Args:
|
||||
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||
|
||||
Returns:
|
||||
- If the elf_server binding is not None:
|
||||
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||
- If stream is False, returns the generated text as a string.
|
||||
- If the elf_server binding is None, returns None.
|
||||
"""
|
||||
|
||||
text = request_data.prompt
|
||||
n_predict = request_data.max_tokens
|
||||
stream = request_data.stream
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"text":""}
|
||||
def generate_chunks():
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
yield chunk
|
||||
return True
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
return None
|
||||
|
@ -1,48 +1,96 @@
|
||||
active_personality_id: -1
|
||||
audio_auto_send_input: true
|
||||
audio_in_language: en-US
|
||||
audio_out_voice: null
|
||||
audio_pitch: 1
|
||||
audio_silenceTimer: 5000
|
||||
auto_save: true
|
||||
auto_show_browser: true
|
||||
auto_speak: false
|
||||
auto_update: true
|
||||
# =================== Lord Of Large Language Models Configuration file ===========================
|
||||
version: 40
|
||||
binding_name: null
|
||||
ctx_size: 4084
|
||||
data_vectorization_activate: true
|
||||
data_vectorization_build_keys_words: false
|
||||
data_vectorization_chunk_size: 512
|
||||
data_vectorization_method: tfidf_vectorizer
|
||||
data_vectorization_nb_chunks: 2
|
||||
data_vectorization_overlap_size: 128
|
||||
data_vectorization_save_db: false
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
data_visualization_method: PCA
|
||||
db_path: database.db
|
||||
debug: false
|
||||
discussion_prompt_separator: '!@>'
|
||||
enable_gpu: true
|
||||
extensions: []
|
||||
host: localhost
|
||||
min_n_predict: 256
|
||||
model_name: null
|
||||
n_predict: 1024
|
||||
n_threads: 8
|
||||
override_personality_model_parameters: false
|
||||
personalities: []
|
||||
|
||||
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
port: 9600
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
seed: -1
|
||||
n_predict: 1024
|
||||
ctx_size: 4084
|
||||
min_n_predict: 512
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
top_p: 0.95
|
||||
use_discussions_history: false
|
||||
use_files: true
|
||||
use_user_informations_in_discussion: false
|
||||
repeat_last_n: 40
|
||||
repeat_penalty: 1.2
|
||||
|
||||
n_threads: 8
|
||||
|
||||
#Personality parameters
|
||||
personalities: ["generic/lollms"]
|
||||
active_personality_id: 0
|
||||
override_personality_model_parameters: false #if true the personality parameters are overriden by those of the configuration (may affect personality behaviour)
|
||||
|
||||
extensions: []
|
||||
|
||||
user_name: user
|
||||
user_description: ""
|
||||
use_user_name_in_discussions: false
|
||||
user_avatar: default_user.svg
|
||||
user_description: ''
|
||||
user_name: user
|
||||
version: 27
|
||||
use_user_informations_in_discussion: false
|
||||
|
||||
# UI parameters
|
||||
db_path: database.db
|
||||
|
||||
# Automatic updates
|
||||
debug: False
|
||||
auto_update: true
|
||||
auto_save: true
|
||||
auto_title: false
|
||||
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
|
||||
hardware_mode: nvidia-tensorcores
|
||||
# Automatically open the browser
|
||||
auto_show_browser: true
|
||||
|
||||
# Voice service
|
||||
enable_voice_service: false
|
||||
xtts_base_url: http://127.0.0.1:8020
|
||||
auto_read: false
|
||||
current_voice: null
|
||||
current_language: en
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://127.0.0.1:7860
|
||||
|
||||
# Audio
|
||||
media_on: false
|
||||
audio_in_language: 'en-US'
|
||||
auto_speak: false
|
||||
audio_out_voice: null
|
||||
audio_pitch: 1
|
||||
audio_auto_send_input: true
|
||||
audio_silenceTimer: 5000
|
||||
|
||||
# Data vectorization
|
||||
use_discussions_history: false # Activate vectorizing previous conversations
|
||||
summerize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
use_files: true # Activate using files
|
||||
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||
data_vectorization_save_db: False # For each new session, new files
|
||||
data_vectorization_chunk_size: 512 # chunk size
|
||||
data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
data_vectorization_build_keys_words: false # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
|
||||
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
|
||||
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
|
||||
|
||||
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
# boosting information
|
||||
positive_boost: null
|
||||
negative_boost: null
|
||||
force_output_language_to_be: null
|
||||
|
Loading…
Reference in New Issue
Block a user