mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-01-18 18:56:28 +00:00
upgraded vector db
This commit is contained in:
parent
6f40981651
commit
1437b2c40d
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 125
|
||||
version: 127
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -153,6 +153,7 @@ xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
xtts_freq: 22050
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
|
@ -1,35 +1,53 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 81
|
||||
version: 118
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
model_type: null
|
||||
|
||||
show_news_panel: True
|
||||
show_news_panel: true
|
||||
|
||||
# Security measures
|
||||
turn_on_setting_update_validation: True
|
||||
turn_on_code_execution: True
|
||||
turn_on_code_validation: True
|
||||
turn_on_open_file_validation: False
|
||||
turn_on_send_file_validation: False
|
||||
turn_on_setting_update_validation: true
|
||||
turn_on_code_execution: true
|
||||
turn_on_code_validation: true
|
||||
turn_on_open_file_validation: true
|
||||
turn_on_send_file_validation: true
|
||||
turn_on_language_validation: true
|
||||
|
||||
force_accept_remote_access: false
|
||||
|
||||
# Server information
|
||||
headless_server_mode: False
|
||||
headless_server_mode: false
|
||||
allowed_origins: []
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
port: 9600
|
||||
|
||||
app_custom_logo: ""
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
start_header_id_template: "!@>"
|
||||
end_header_id_template: ": "
|
||||
|
||||
separator_template: "\n"
|
||||
|
||||
start_user_header_id_template: "!@>"
|
||||
end_user_header_id_template: ": "
|
||||
end_user_message_id_template: ""
|
||||
|
||||
start_ai_header_id_template: "!@>"
|
||||
end_ai_header_id_template: ": "
|
||||
end_ai_message_id_template: ""
|
||||
|
||||
system_message_template: "system"
|
||||
|
||||
seed: -1
|
||||
ctx_size: 4084
|
||||
max_n_predict: 4096
|
||||
min_n_predict: 512
|
||||
min_n_predict: 1024
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
top_p: 0.95
|
||||
@ -50,14 +68,14 @@ user_name: user
|
||||
user_description: ""
|
||||
use_user_name_in_discussions: false
|
||||
use_model_name_in_discussions: false
|
||||
user_avatar: default_user.svg
|
||||
user_avatar: null
|
||||
use_user_informations_in_discussion: false
|
||||
|
||||
# UI parameters
|
||||
discussion_db_name: default
|
||||
|
||||
# Automatic updates
|
||||
debug: False
|
||||
debug: false
|
||||
debug_log_file_path: ""
|
||||
auto_update: true
|
||||
auto_sync_personalities: true
|
||||
@ -77,23 +95,104 @@ auto_show_browser: true
|
||||
# copy to clipboard
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
stt_input_device: 0
|
||||
|
||||
|
||||
# STT service
|
||||
stt_listening_threshold: 1000
|
||||
stt_silence_duration: 2
|
||||
stt_sound_threshold_percentage: 10
|
||||
stt_gain: 1.0
|
||||
stt_rate: 44100
|
||||
stt_channels: 1
|
||||
stt_buffer_size: 10
|
||||
|
||||
stt_activate_word_detection: false
|
||||
stt_word_detection_file: null
|
||||
|
||||
|
||||
|
||||
# ASR STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_whisper_key: ""
|
||||
openai_whisper_model: "whisper-1"
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
tts_output_device: 0
|
||||
|
||||
# Voice service
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
xtts_stream_chunk_size: 100
|
||||
xtts_temperature: 0.75
|
||||
xtts_length_penalty: 1.0
|
||||
xtts_repetition_penalty: 5.0
|
||||
xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
use_negative_prompt: true
|
||||
use_ai_generated_negative_prompt: false
|
||||
negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
|
||||
default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://localhost:7860
|
||||
|
||||
# Image generation service
|
||||
enable_fooocus_service: false
|
||||
fooocus_base_url: http://localhost:7860
|
||||
|
||||
# diffuser
|
||||
diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
|
||||
diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
|
||||
|
||||
# Dall e service key
|
||||
dall_e_key: ""
|
||||
dall_e_generation_engine: "dall-e-3"
|
||||
|
||||
# Midjourney service key
|
||||
midjourney_key: ""
|
||||
|
||||
# Image generation service comfyui
|
||||
enable_comfyui_service: false
|
||||
comfyui_base_url: http://127.0.0.1:8188/
|
||||
comfyui_model: v1-5-pruned-emaonly.ckpt
|
||||
|
||||
# Motion control service
|
||||
enable_motion_ctrl_service: false
|
||||
motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
# ***************** TTT *****************
|
||||
|
||||
# ollama service
|
||||
enable_ollama_service: false
|
||||
ollama_base_url: http://localhost:11434
|
||||
@ -107,6 +206,11 @@ petals_device: cuda
|
||||
# lollms service
|
||||
enable_lollms_service: false
|
||||
lollms_base_url: http://localhost:1234
|
||||
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
|
||||
activate_lollms_server: true
|
||||
activate_ollama_emulator: true
|
||||
activate_openai_emulator: true
|
||||
activate_mistralai_emulator: true
|
||||
|
||||
# elastic search service
|
||||
elastic_search_service: false
|
||||
@ -131,13 +235,22 @@ audio_auto_send_input: true
|
||||
audio_silenceTimer: 5000
|
||||
|
||||
# Data vectorization
|
||||
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
|
||||
rag_vectorizer: bert # possible values bert, tfidf, word2vec
|
||||
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
|
||||
rag_vectorizer_parameters: null # Parameters of the model in json format
|
||||
rag_chunk_size: 512 # number of tokens per chunk
|
||||
rag_n_chunks: 4 #Number of chunks to recover from the database
|
||||
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
|
||||
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
|
||||
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
|
||||
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
|
||||
|
||||
activate_skills_lib: false # Activate vectorizing previous conversations
|
||||
skills_lib_database_name: "default" # Default skills database
|
||||
summarize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
use_files: true # Activate using files
|
||||
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||
@ -154,12 +267,13 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
|
||||
|
||||
# Activate internet search
|
||||
activate_internet_search: false
|
||||
activate_internet_pages_judgement: true
|
||||
internet_vectorization_chunk_size: 512 # chunk size
|
||||
internet_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
internet_nb_search_pages: 3 # number of pages to select
|
||||
internet_quick_search: False # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
|
||||
internet_vectorization_overlap_size: 0 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 4 # number of chunks to use
|
||||
internet_nb_search_pages: 8 # number of pages to select
|
||||
internet_quick_search: false # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
@ -167,7 +281,7 @@ pdf_latex_path: null
|
||||
positive_boost: null
|
||||
negative_boost: null
|
||||
current_language: english
|
||||
fun_mode: False
|
||||
fun_mode: false
|
||||
|
||||
|
||||
# webui configurations
|
||||
@ -175,5 +289,3 @@ show_code_of_conduct: true
|
||||
activate_audio_infos: true
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_model: base
|
@ -1,35 +1,53 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 81
|
||||
version: 118
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
model_type: null
|
||||
|
||||
show_news_panel: True
|
||||
show_news_panel: true
|
||||
|
||||
# Security measures
|
||||
turn_on_setting_update_validation: True
|
||||
turn_on_code_execution: True
|
||||
turn_on_code_validation: True
|
||||
turn_on_open_file_validation: False
|
||||
turn_on_send_file_validation: False
|
||||
turn_on_setting_update_validation: true
|
||||
turn_on_code_execution: true
|
||||
turn_on_code_validation: true
|
||||
turn_on_open_file_validation: true
|
||||
turn_on_send_file_validation: true
|
||||
turn_on_language_validation: true
|
||||
|
||||
force_accept_remote_access: false
|
||||
|
||||
# Server information
|
||||
headless_server_mode: False
|
||||
headless_server_mode: false
|
||||
allowed_origins: []
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
port: 9600
|
||||
|
||||
app_custom_logo: ""
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
start_header_id_template: "!@>"
|
||||
end_header_id_template: ": "
|
||||
|
||||
separator_template: "\n"
|
||||
|
||||
start_user_header_id_template: "!@>"
|
||||
end_user_header_id_template: ": "
|
||||
end_user_message_id_template: ""
|
||||
|
||||
start_ai_header_id_template: "!@>"
|
||||
end_ai_header_id_template: ": "
|
||||
end_ai_message_id_template: ""
|
||||
|
||||
system_message_template: "system"
|
||||
|
||||
seed: -1
|
||||
ctx_size: 4084
|
||||
max_n_predict: 4096
|
||||
min_n_predict: 512
|
||||
min_n_predict: 1024
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
top_p: 0.95
|
||||
@ -50,14 +68,14 @@ user_name: user
|
||||
user_description: ""
|
||||
use_user_name_in_discussions: false
|
||||
use_model_name_in_discussions: false
|
||||
user_avatar: default_user.svg
|
||||
user_avatar: null
|
||||
use_user_informations_in_discussion: false
|
||||
|
||||
# UI parameters
|
||||
discussion_db_name: default
|
||||
|
||||
# Automatic updates
|
||||
debug: False
|
||||
debug: false
|
||||
debug_log_file_path: ""
|
||||
auto_update: true
|
||||
auto_sync_personalities: true
|
||||
@ -77,23 +95,104 @@ auto_show_browser: true
|
||||
# copy to clipboard
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
stt_input_device: 0
|
||||
|
||||
|
||||
# STT service
|
||||
stt_listening_threshold: 1000
|
||||
stt_silence_duration: 2
|
||||
stt_sound_threshold_percentage: 10
|
||||
stt_gain: 1.0
|
||||
stt_rate: 44100
|
||||
stt_channels: 1
|
||||
stt_buffer_size: 10
|
||||
|
||||
stt_activate_word_detection: false
|
||||
stt_word_detection_file: null
|
||||
|
||||
|
||||
|
||||
# ASR STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_whisper_key: ""
|
||||
openai_whisper_model: "whisper-1"
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
tts_output_device: 0
|
||||
|
||||
# Voice service
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
xtts_stream_chunk_size: 100
|
||||
xtts_temperature: 0.75
|
||||
xtts_length_penalty: 1.0
|
||||
xtts_repetition_penalty: 5.0
|
||||
xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
use_negative_prompt: true
|
||||
use_ai_generated_negative_prompt: false
|
||||
negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
|
||||
default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://localhost:7860
|
||||
|
||||
# Image generation service
|
||||
enable_fooocus_service: false
|
||||
fooocus_base_url: http://localhost:7860
|
||||
|
||||
# diffuser
|
||||
diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
|
||||
diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
|
||||
|
||||
# Dall e service key
|
||||
dall_e_key: ""
|
||||
dall_e_generation_engine: "dall-e-3"
|
||||
|
||||
# Midjourney service key
|
||||
midjourney_key: ""
|
||||
|
||||
# Image generation service comfyui
|
||||
enable_comfyui_service: false
|
||||
comfyui_base_url: http://127.0.0.1:8188/
|
||||
comfyui_model: v1-5-pruned-emaonly.ckpt
|
||||
|
||||
# Motion control service
|
||||
enable_motion_ctrl_service: false
|
||||
motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
# ***************** TTT *****************
|
||||
|
||||
# ollama service
|
||||
enable_ollama_service: false
|
||||
ollama_base_url: http://localhost:11434
|
||||
@ -107,6 +206,11 @@ petals_device: cuda
|
||||
# lollms service
|
||||
enable_lollms_service: false
|
||||
lollms_base_url: http://localhost:1234
|
||||
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
|
||||
activate_lollms_server: true
|
||||
activate_ollama_emulator: true
|
||||
activate_openai_emulator: true
|
||||
activate_mistralai_emulator: true
|
||||
|
||||
# elastic search service
|
||||
elastic_search_service: false
|
||||
@ -131,13 +235,22 @@ audio_auto_send_input: true
|
||||
audio_silenceTimer: 5000
|
||||
|
||||
# Data vectorization
|
||||
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
|
||||
rag_vectorizer: bert # possible values bert, tfidf, word2vec
|
||||
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
|
||||
rag_vectorizer_parameters: null # Parameters of the model in json format
|
||||
rag_chunk_size: 512 # number of tokens per chunk
|
||||
rag_n_chunks: 4 #Number of chunks to recover from the database
|
||||
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
|
||||
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
|
||||
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
|
||||
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
|
||||
|
||||
activate_skills_lib: false # Activate vectorizing previous conversations
|
||||
skills_lib_database_name: "default" # Default skills database
|
||||
summarize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
use_files: true # Activate using files
|
||||
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||
@ -154,20 +267,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
|
||||
|
||||
# Activate internet search
|
||||
activate_internet_search: false
|
||||
activate_internet_pages_judgement: true
|
||||
internet_vectorization_chunk_size: 512 # chunk size
|
||||
internet_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
internet_nb_search_pages: 3 # number of pages to select
|
||||
internet_quick_search: False # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
|
||||
internet_vectorization_overlap_size: 0 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 4 # number of chunks to use
|
||||
internet_nb_search_pages: 8 # number of pages to select
|
||||
internet_quick_search: false # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
# boosting information
|
||||
positive_boost: null
|
||||
negative_boost: null
|
||||
current_language: null
|
||||
fun_mode: False
|
||||
current_language: english
|
||||
fun_mode: false
|
||||
|
||||
|
||||
# webui configurations
|
||||
@ -175,5 +289,3 @@ show_code_of_conduct: true
|
||||
activate_audio_infos: true
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_model: base
|
@ -13,7 +13,6 @@ from lollms.utilities import PromptReshaper
|
||||
from lollms.client_session import Client, Session
|
||||
from lollms.databases.skills_database import SkillsLibrary
|
||||
from lollms.tasks import TasksLibrary
|
||||
from safe_store import TextVectorizer, VectorizationMethod, VisualizationMethod
|
||||
|
||||
from lollmsvectordb.database_elements.chunk import Chunk
|
||||
from lollmsvectordb.vector_database import VectorDatabase
|
||||
@ -335,7 +334,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
trace_exception(ex)
|
||||
|
||||
ASCIIColors.blue("Loading local TTS services")
|
||||
if self.config.xtts_enable or self.config.active_tts_service == "xtts":
|
||||
if self.config.active_tts_service == "xtts":
|
||||
ASCIIColors.yellow("Loading XTTS")
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
@ -348,6 +347,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
self.xtts = LollmsXTTS(
|
||||
self,
|
||||
voices_folders=[voices_folder, self.lollms_paths.custom_voices_path],
|
||||
freq=self.config.xtts_freq
|
||||
)
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
@ -448,7 +448,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
trace_exception(ex)
|
||||
|
||||
ASCIIColors.blue("Loading loacal TTS services")
|
||||
if (self.config.xtts_enable or self.config.active_tts_service == "xtts") and self.xtts is None:
|
||||
if self.config.active_tts_service == "xtts" and self.xtts is None:
|
||||
ASCIIColors.yellow("Loading XTTS")
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
@ -461,6 +461,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
self.xtts = LollmsXTTS(
|
||||
self,
|
||||
voices_folders=[voices_folder, self.lollms_paths.custom_voices_path],
|
||||
freq=self.config.xtts_freq
|
||||
)
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
@ -532,17 +533,6 @@ class LollmsApplication(LoLLMsCom):
|
||||
trace_exception(ex)
|
||||
|
||||
|
||||
def build_long_term_skills_memory(self):
|
||||
discussion_db_name:Path = self.lollms_paths.personal_discussions_path/self.config.discussion_db_name.split(".")[0]
|
||||
discussion_db_name.mkdir(exist_ok=True, parents=True)
|
||||
self.long_term_memory = TextVectorizer(
|
||||
vectorization_method=VectorizationMethod.TFIDF_VECTORIZER,
|
||||
model=self.model,
|
||||
database_path=discussion_db_name/"skills_memory.json",
|
||||
save_db=True,
|
||||
data_visualization_method=VisualizationMethod.PCA,
|
||||
)
|
||||
return self.long_term_memory
|
||||
|
||||
def process_chunk(
|
||||
self,
|
||||
@ -969,6 +959,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
f"{self.start_header_id_template}websearch query{self.end_header_id_template}"
|
||||
])
|
||||
query = self.personality.fast_gen(q, max_generation_size=256, show_progress=True, callback=self.personality.sink)
|
||||
query = query.replace("\"","")
|
||||
self.personality.step_end("Crafting internet search query")
|
||||
self.personality.step(f"web search query: {query}")
|
||||
|
||||
@ -979,12 +970,12 @@ class LollmsApplication(LoLLMsCom):
|
||||
|
||||
internet_search_results=f"{self.system_full_header}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{self.separator_template}{self.start_header_id_template}Web search results{self.end_header_id_template}\n"
|
||||
|
||||
docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)
|
||||
chunks:List[Chunk] = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)
|
||||
|
||||
if len(docs)>0:
|
||||
for doc, infos,document_id in zip(docs, sorted_similarities, document_ids):
|
||||
internet_search_infos.append(document_id)
|
||||
internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{document_id['url']}\nchunk_title:{document_id['title']}\ncontent:{doc}\n"
|
||||
if len(chunks)>0:
|
||||
for chunk in chunks:
|
||||
internet_search_infos.append(chunk.doc.title)
|
||||
internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{chunk.doc.path}\nchunk_title:{chunk.doc.title}\ncontent:{doc}\n"
|
||||
else:
|
||||
internet_search_results += "The search response was empty!\nFailed to recover useful information from the search engine.\n"
|
||||
if self.config.internet_quick_search:
|
||||
@ -1051,9 +1042,12 @@ class LollmsApplication(LoLLMsCom):
|
||||
docs = v.list_documents()
|
||||
for doc in docs:
|
||||
document=v.get_document(document_path = doc["path"])
|
||||
self.personality.step_start(f"Summeryzing document {doc['path']}")
|
||||
summary = self.personality.summarize_text(document, f"Extract information from the following text chunk to answer this request. If there is no information about the query, just return an empty string.\n{self.system_custom_header('query')}{query}", callback=self.personality.sink)
|
||||
self.personality.step_end(f"Summeryzing document {doc['path']}")
|
||||
self.personality.step_start(f"Summaryzing document {doc['path']}")
|
||||
def post_process(summary):
|
||||
return summary
|
||||
summary = self.personality.summarize_text(document,
|
||||
f"Extract information from the following text chunk to answer this request.\n{self.system_custom_header('query')}{query}", chunk_summary_post_processing=post_process, callback=self.personality.sink)
|
||||
self.personality.step_end(f"Summaryzing document {doc['path']}")
|
||||
document_infos = f"{self.separator_template}".join([
|
||||
self.system_custom_header('document contextual summary'),
|
||||
f"source_document_title:{doc['title']}",
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 125
|
||||
version: 127
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -153,6 +153,7 @@ xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
xtts_freq: 22050
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
|
@ -7,9 +7,8 @@ from lollms.types import MSG_TYPE
|
||||
from lollms.types import BindingType
|
||||
from lollms.utilities import PackageManager, discussion_path_to_url
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.databases.skills_database import SkillsLibrary
|
||||
from lollms.com import LoLLMsCom
|
||||
from safe_store import TextVectorizer, VisualizationMethod, GenericDataLoader
|
||||
|
||||
from lollmsvectordb.vector_database import VectorDatabase
|
||||
from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
|
||||
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
|
||||
@ -671,7 +670,7 @@ class Discussion:
|
||||
|
||||
if len(self.vectorizer.list_documents())==0 and len(self.text_files)>0:
|
||||
for path in self.text_files:
|
||||
data = GenericDataLoader.read_file(path)
|
||||
data = TextDocumentsLoader.read_file(path)
|
||||
try:
|
||||
self.vectorizer.add_document(path.stem, data, path, True)
|
||||
except Exception as ex:
|
||||
@ -833,7 +832,7 @@ class Discussion:
|
||||
return True
|
||||
except Exception as e:
|
||||
trace_exception(e)
|
||||
self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id)
|
||||
self.lollms.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {TextDocumentsLoader.get_supported_file_types()}",client_id=client.client_id)
|
||||
return False
|
||||
|
||||
def load_message(self, id):
|
||||
|
@ -1,6 +1,6 @@
|
||||
from pathlib import Path
|
||||
from lollms.personality import APScript
|
||||
from safe_store.generic_data_loader import GenericDataLoader
|
||||
from lollmsvectordb.text_document_loader import TextDocumentsLoader
|
||||
from safe_store.text_vectorizer import TextVectorizer
|
||||
import json
|
||||
import re
|
||||
|
@ -7,7 +7,6 @@ from typing import Union
|
||||
from lollms.utilities import PackageManager
|
||||
from lollms.personality import APScript
|
||||
from lollms.tts import LollmsTTS
|
||||
from safe_store import GenericDataLoader
|
||||
from ascii_colors import trace_exception
|
||||
|
||||
# Here is the core of the function to be built
|
||||
|
@ -7,7 +7,7 @@ from typing import Union
|
||||
from lollms.utilities import PackageManager
|
||||
from lollms.personality import APScript
|
||||
from lollms.tts import LollmsTTS
|
||||
from safe_store import GenericDataLoader
|
||||
from lollmsvectordb import TextDocumentsLoader
|
||||
from ascii_colors import trace_exception
|
||||
|
||||
# Here is the core of the function to be built
|
||||
@ -28,7 +28,7 @@ def read_text_from_file(file_path: Union[Path, str], tts_module:LollmsTTS, llm:A
|
||||
file_path = Path(file_path)
|
||||
|
||||
# Read the text from the file
|
||||
text = GenericDataLoader.read_file(file_path)
|
||||
text = TextDocumentsLoader.read_file(file_path)
|
||||
|
||||
# Generate audio from the text
|
||||
audio_file_path = tts_module.tts_audio(text,use_threading=True)
|
||||
|
@ -29,7 +29,7 @@ def get_root_url(url):
|
||||
|
||||
|
||||
def format_url_parameter(value:str):
|
||||
encoded_value = value.strip().replace("\"","")
|
||||
encoded_value = value.strip().replace("\"","").replace(" ","+")
|
||||
return encoded_value
|
||||
|
||||
|
||||
@ -294,7 +294,6 @@ def internet_search(query, internet_nb_search_pages, chromedriver_path=None, qui
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod
|
||||
|
||||
search_results = []
|
||||
|
||||
@ -349,9 +348,10 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
|
||||
nb_non_empty = 0
|
||||
# Configure Chrome options
|
||||
driver = prepare_chrome_driver(chromedriver_path)
|
||||
|
||||
qquery = format_url_parameter(query)
|
||||
url = f"https://duckduckgo.com/?q={qquery}&t=h_&ia=web"
|
||||
results = extract_results(
|
||||
f"https://duckduckgo.com/?q={format_url_parameter(query)}&t=h_&ia=web",
|
||||
url,
|
||||
internet_nb_search_pages,
|
||||
driver
|
||||
)
|
||||
@ -369,13 +369,11 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
|
||||
nb_non_empty += 1
|
||||
if nb_non_empty>=internet_nb_search_pages:
|
||||
break
|
||||
docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
|
||||
vectorizer.build_index()
|
||||
chunks = vectorizer.search(query, internet_vectorization_nb_chunks)
|
||||
else:
|
||||
docs = ["The web search has failed. Try using another query"]
|
||||
sorted_similarities = [0]
|
||||
document_ids = ["duckduckgo.com"]
|
||||
chunks = []
|
||||
# Close the browser
|
||||
driver.quit()
|
||||
|
||||
return docs, sorted_similarities, document_ids
|
||||
return chunks
|
||||
|
@ -20,7 +20,7 @@ from lollmsvectordb.vector_database import VectorDatabase
|
||||
from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
|
||||
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
|
||||
from lollmsvectordb.text_document_loader import TextDocumentsLoader
|
||||
|
||||
from lollmsvectordb.database_elements.document import Document
|
||||
import pkg_resources
|
||||
from pathlib import Path
|
||||
from PIL import Image
|
||||
@ -37,7 +37,11 @@ from lollms.types import MSG_TYPE, SUMMARY_MODE
|
||||
import json
|
||||
from typing import Any, List, Optional, Type, Callable, Dict, Any, Union
|
||||
import json
|
||||
from safe_store import TextVectorizer, GenericDataLoader, VisualizationMethod, VectorizationMethod, DocumentDecomposer
|
||||
from lollmsvectordb.vector_database import VectorDatabase
|
||||
from lollmsvectordb.text_document_loader import TextDocumentsLoader
|
||||
from lollmsvectordb.text_chunker import TextChunker
|
||||
import hashlib
|
||||
|
||||
from functools import partial
|
||||
import sys
|
||||
from lollms.com import LoLLMsCom
|
||||
@ -910,42 +914,34 @@ class AIPersonality:
|
||||
|
||||
# Verify if the persona has a data folder
|
||||
if self.data_path.exists():
|
||||
self.database_path = self.data_path / "db.json"
|
||||
if self.database_path.exists():
|
||||
ASCIIColors.info("Loading database ...",end="")
|
||||
self.persona_data_vectorizer = TextVectorizer(
|
||||
"tfidf_vectorizer", # self.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
|
||||
model=self.model, #needed in case of using model_embedding
|
||||
save_db=True,
|
||||
database_path=self.database_path,
|
||||
data_visualization_method=VisualizationMethod.PCA,
|
||||
database_dict=None)
|
||||
ASCIIColors.green("Ok")
|
||||
else:
|
||||
files = [f for f in self.data_path.iterdir() if f.suffix.lower() in ['.asm', '.bat', '.c', '.cpp', '.cs', '.csproj', '.css',
|
||||
'.csv', '.docx', '.h', '.hh', '.hpp', '.html', '.inc', '.ini', '.java', '.js', '.json', '.log',
|
||||
'.lua', '.map', '.md', '.pas', '.pdf', '.php', '.pptx', '.ps1', '.py', '.rb', '.rtf', '.s', '.se', '.sh', '.sln',
|
||||
'.snippet', '.snippets', '.sql', '.sym', '.ts', '.txt', '.xlsx', '.xml', '.yaml', '.yml', '.msg'] ]
|
||||
if len(files)>0:
|
||||
dl = GenericDataLoader()
|
||||
self.persona_data_vectorizer = TextVectorizer(
|
||||
"tfidf_vectorizer", # self.config.data_vectorization_method, # supported "model_embedding" or "tfidf_vectorizer"
|
||||
model=self.model, #needed in case of using model_embedding
|
||||
save_db=True,
|
||||
database_path=self.database_path,
|
||||
data_visualization_method=VisualizationMethod.PCA,
|
||||
database_dict=None)
|
||||
for f in files:
|
||||
text = dl.read_file(f)
|
||||
self.persona_data_vectorizer.add_document(f.name,text,self.config.data_vectorization_chunk_size, self.config.data_vectorization_overlap_size)
|
||||
# data_vectorization_chunk_size: 512 # chunk size
|
||||
# data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
# data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
self.persona_data_vectorizer.index()
|
||||
self.persona_data_vectorizer.save_db()
|
||||
else:
|
||||
self.persona_data_vectorizer = None
|
||||
self._data = None
|
||||
self.database_path = self.data_path / "db.sqlite"
|
||||
from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
|
||||
vectorizer = self.config.rag_vectorizer
|
||||
if vectorizer == "bert":
|
||||
from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
|
||||
v = BERTVectorizer()
|
||||
elif vectorizer == "tfidf":
|
||||
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
|
||||
v = TFIDFVectorizer()
|
||||
elif vectorizer == "word2vec":
|
||||
from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
|
||||
v = Word2VecVectorizer()
|
||||
|
||||
self.persona_data_vectorizer = VectorDatabase(self.database_path, v, TikTokenTokenizer(), self.config.rag_chunk_size, self.config.rag_overlap)
|
||||
|
||||
files = [f for f in self.data_path.iterdir() if f.suffix.lower() in ['.asm', '.bat', '.c', '.cpp', '.cs', '.csproj', '.css',
|
||||
'.csv', '.docx', '.h', '.hh', '.hpp', '.html', '.inc', '.ini', '.java', '.js', '.json', '.log',
|
||||
'.lua', '.map', '.md', '.pas', '.pdf', '.php', '.pptx', '.ps1', '.py', '.rb', '.rtf', '.s', '.se', '.sh', '.sln',
|
||||
'.snippet', '.snippets', '.sql', '.sym', '.ts', '.txt', '.xlsx', '.xml', '.yaml', '.yml', '.msg'] ]
|
||||
dl = TextDocumentsLoader()
|
||||
|
||||
for f in files:
|
||||
text = dl.read_file(f)
|
||||
self.persona_data_vectorizer.add_document(f.name, text, f)
|
||||
# data_vectorization_chunk_size: 512 # chunk size
|
||||
# data_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
# data_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
self.persona_data_vectorizer.build_index()
|
||||
|
||||
else:
|
||||
self.persona_data_vectorizer = None
|
||||
@ -1820,7 +1816,7 @@ class AIPersonality:
|
||||
while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
|
||||
self.step_start(f"Comprerssing {doc_name}...")
|
||||
chunk_size = int(self.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
|
||||
document_chunks =TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
summary_instruction,
|
||||
@ -1831,7 +1827,6 @@ class AIPersonality:
|
||||
chunk_summary_post_processing=chunk_summary_post_processing,
|
||||
summary_mode=summary_mode)
|
||||
tk = self.model.tokenize(text)
|
||||
tk = self.model.tokenize(text)
|
||||
dtk_ln=prev_len-len(tk)
|
||||
prev_len = len(tk)
|
||||
self.step(f"Current text size : {prev_len}, max summary size : {max_summary_size}")
|
||||
@ -1857,7 +1852,7 @@ class AIPersonality:
|
||||
prev_len = len(tk)
|
||||
while len(tk)>max_summary_size:
|
||||
chunk_size = int(self.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.model.tokenize, self.model.detokenize, True)
|
||||
document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
data_extraction_instruction,
|
||||
@ -2548,7 +2543,7 @@ class APScript(StateMachine):
|
||||
while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
|
||||
self.step_start(f"Comprerssing {doc_name}...")
|
||||
chunk_size = int(self.personality.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
|
||||
document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
summary_instruction,
|
||||
@ -2585,7 +2580,7 @@ class APScript(StateMachine):
|
||||
prev_len = len(tk)
|
||||
while len(tk)>max_summary_size:
|
||||
chunk_size = int(self.personality.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.personality.model.tokenize, self.personality.model.detokenize, True)
|
||||
document_chunks = TextChunker.chunk_text(text, self.model, chunk_size, 0, True)
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
data_extraction_instruction,
|
||||
@ -2893,15 +2888,25 @@ class APScript(StateMachine):
|
||||
return self.personality.internet_search_with_vectorization(query, quick_search=quick_search)
|
||||
|
||||
|
||||
def vectorize_and_query(self, text, query, max_chunk_size=512, overlap_size=20, internet_vectorization_nb_chunks=3):
|
||||
vectorizer = TextVectorizer(VectorizationMethod.TFIDF_VECTORIZER, model = self.personality.model)
|
||||
decomposer = DocumentDecomposer()
|
||||
chunks = decomposer.decompose_document(text, max_chunk_size, overlap_size,self.personality.model.tokenize,self.personality.model.detokenize)
|
||||
for i, chunk in enumerate(chunks):
|
||||
vectorizer.add_document(f"chunk_{i}", self.personality.model.detokenize(chunk))
|
||||
vectorizer.index()
|
||||
docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
|
||||
return docs, sorted_similarities
|
||||
def vectorize_and_query(self, title, url, text, query, max_chunk_size=512, overlap_size=20, internet_vectorization_nb_chunks=3):
|
||||
|
||||
from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
|
||||
vectorizer = self.config.rag_vectorizer
|
||||
if vectorizer == "bert":
|
||||
from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
|
||||
v = BERTVectorizer()
|
||||
elif vectorizer == "tfidf":
|
||||
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
|
||||
v = TFIDFVectorizer()
|
||||
elif vectorizer == "word2vec":
|
||||
from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
|
||||
v = Word2VecVectorizer()
|
||||
|
||||
vectorizer = VectorDatabase("", v, TikTokenTokenizer(), self.config.rag_chunk_size, self.config.rag_overlap)
|
||||
vectorizer.add_document(title, text, url)
|
||||
vectorizer.build_index()
|
||||
chunks = vectorizer.search(query, internet_vectorization_nb_chunks)
|
||||
return chunks
|
||||
|
||||
|
||||
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
|
||||
|
@ -18,7 +18,6 @@ from ascii_colors import ASCIIColors
|
||||
from lollms.databases.discussions_database import DiscussionsDB, Discussion
|
||||
from typing import List
|
||||
import shutil
|
||||
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
|
||||
import tqdm
|
||||
from pathlib import Path
|
||||
class GenerateRequest(BaseModel):
|
||||
|
@ -14,7 +14,7 @@ from pydantic import BaseModel
|
||||
from starlette.responses import StreamingResponse
|
||||
from lollms.types import MSG_TYPE
|
||||
from lollms.main_config import BaseConfig
|
||||
from lollms.utilities import output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
|
||||
from lollms.utilities import find_next_available_filename, output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
|
||||
from lollms.security import sanitize_path, validate_path, check_access
|
||||
from pathlib import Path
|
||||
from ascii_colors import ASCIIColors
|
||||
@ -176,8 +176,7 @@ async def text2Wave(request: LollmsText2AudioRequest):
|
||||
request.fn = (lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")/request.fn
|
||||
validate_path(request.fn,[str(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out")])
|
||||
else:
|
||||
request.fn = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out"/"tts2audio.wav"
|
||||
|
||||
request.fn = find_next_available_filename(lollmsElfServer.lollms_paths.personal_outputs_path/"audio_out", "tts_out","wave")
|
||||
# Verify the path exists
|
||||
request.fn.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
@ -236,6 +235,7 @@ def start_xtts():
|
||||
lollmsElfServer.tts = LollmsXTTS(
|
||||
lollmsElfServer,
|
||||
voices_folders=[voices_folder, lollmsElfServer.lollms_paths.custom_voices_path],
|
||||
freq=lollmsElfServer.config.xtts_freq
|
||||
)
|
||||
lollmsElfServer.HideBlockingMessage()
|
||||
except Exception as ex:
|
||||
|
@ -18,7 +18,6 @@ from ascii_colors import ASCIIColors
|
||||
from lollms.databases.discussions_database import DiscussionsDB
|
||||
from lollms.security import check_access
|
||||
from pathlib import Path
|
||||
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
|
||||
import tqdm
|
||||
from fastapi import FastAPI, UploadFile, File
|
||||
import shutil
|
||||
|
@ -34,8 +34,9 @@ from queue import Queue
|
||||
import re
|
||||
|
||||
class LollmsXTTS(LollmsTTS):
|
||||
def __init__(self, app: LollmsApplication, voices_folders: List[str|Path]):
|
||||
def __init__(self, app: LollmsApplication, voices_folders: List[str|Path], freq = 22050):
|
||||
super().__init__("lollms_xtts", app)
|
||||
self.freq = freq
|
||||
self.generation_threads = {}
|
||||
self.voices_folders = [Path(v) for v in voices_folders] + [Path(__file__).parent/"voices"]
|
||||
self.stop_event = threading.Event()
|
||||
@ -75,7 +76,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
def get(app: LollmsApplication) -> 'LollmsXTTS':
|
||||
# Verify if the service is installed and if true then return an instance of LollmsXTTS
|
||||
if LollmsXTTS.verify(app.lollms_paths):
|
||||
return LollmsXTTS(app, app.lollms_paths.custom_voices_path)
|
||||
return LollmsXTTS(app, app.lollms_paths.custom_voices_path, freq=app.config.xtts_freq)
|
||||
else:
|
||||
raise Exception("LollmsXTTS service is not installed properly.")
|
||||
def get_speaker_wav(self, speaker) -> Path:
|
||||
@ -147,7 +148,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
if wav is None:
|
||||
# Play any remaining buffered sentences
|
||||
for buffered_wav in buffer:
|
||||
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
|
||||
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, self.freq)
|
||||
self.play_obj.wait_done()
|
||||
time.sleep(0.5) # Pause between sentences
|
||||
ASCIIColors.green("Audio done")
|
||||
@ -156,7 +157,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
buffered_sentences += 1
|
||||
if buffered_sentences >= 2:
|
||||
for buffered_wav in buffer:
|
||||
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, 22050)
|
||||
self.play_obj = sa.play_buffer(buffered_wav.tobytes(), 1, 2, self.freq)
|
||||
self.play_obj.wait_done()
|
||||
time.sleep(0.5) # Pause between sentences
|
||||
buffer = []
|
||||
@ -166,7 +167,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
with wave.open(str(file_name_or_path), 'wb') as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(22050)
|
||||
wf.setframerate(self.freq)
|
||||
for wav in wav_data:
|
||||
wf.writeframes(wav.tobytes())
|
||||
|
||||
|
@ -7,7 +7,10 @@ from ascii_colors import ASCIIColors
|
||||
from lollms.types import MSG_TYPE, SUMMARY_MODE
|
||||
from lollms.com import LoLLMsCom
|
||||
from lollms.utilities import PromptReshaper, remove_text_from_string, process_ai_output
|
||||
from safe_store import DocumentDecomposer
|
||||
from lollmsvectordb.text_chunker import TextChunker
|
||||
from lollmsvectordb.database_elements.document import Document
|
||||
from lollmsvectordb.directory_binding import DirectoryBinding
|
||||
import hashlib
|
||||
import json
|
||||
class TasksLibrary:
|
||||
def __init__(self, lollms:LoLLMsCom, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None) -> None:
|
||||
@ -566,7 +569,11 @@ class TasksLibrary:
|
||||
while len(tk)>max_summary_size and (document_chunks is None or len(document_chunks)>1):
|
||||
self.step_start(f"Comprerssing {doc_name}... [depth {depth+1}]")
|
||||
chunk_size = int(self.lollms.config.ctx_size*0.6)
|
||||
document_chunks = DocumentDecomposer.decompose_document(text, chunk_size, 0, self.lollms.model.tokenize, self.lollms.model.detokenize, True)
|
||||
tc = TextChunker(chunk_size, 0, model= self.lollms.model)
|
||||
hasher = hashlib.md5()
|
||||
hasher.update(text.encode("utf8"))
|
||||
|
||||
document_chunks = tc.get_text_chunks(text, Document(hasher.hexdigest(), doc_name ) )
|
||||
text = self.summarize_chunks(
|
||||
document_chunks,
|
||||
summary_instruction,
|
||||
@ -577,7 +584,6 @@ class TasksLibrary:
|
||||
chunk_summary_post_processing=chunk_summary_post_processing,
|
||||
summary_mode=summary_mode)
|
||||
tk = self.lollms.model.tokenize(text)
|
||||
tk = self.lollms.model.tokenize(text)
|
||||
dtk_ln=prev_len-len(tk)
|
||||
prev_len = len(tk)
|
||||
self.step(f"Current text size : {prev_len}, max summary size : {max_summary_size}")
|
||||
|
@ -608,7 +608,7 @@ def add_period(text):
|
||||
processed_text = '\n'.join(processed_lines)
|
||||
return processed_text
|
||||
|
||||
def find_next_available_filename(folder_path, prefix):
|
||||
def find_next_available_filename(folder_path, prefix, extension="png"):
|
||||
folder = Path(folder_path)
|
||||
|
||||
if not folder.exists():
|
||||
@ -616,7 +616,7 @@ def find_next_available_filename(folder_path, prefix):
|
||||
|
||||
index = 1
|
||||
while True:
|
||||
next_filename = f"{prefix}_{index}.png"
|
||||
next_filename = f"{prefix}_{index}.{extension}"
|
||||
potential_file = folder / next_filename
|
||||
if not potential_file.exists():
|
||||
return potential_file
|
||||
|
@ -1,35 +1,53 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 81
|
||||
version: 118
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
model_type: null
|
||||
|
||||
show_news_panel: True
|
||||
show_news_panel: true
|
||||
|
||||
# Security measures
|
||||
turn_on_setting_update_validation: True
|
||||
turn_on_code_execution: True
|
||||
turn_on_code_validation: True
|
||||
turn_on_open_file_validation: False
|
||||
turn_on_send_file_validation: False
|
||||
turn_on_setting_update_validation: true
|
||||
turn_on_code_execution: true
|
||||
turn_on_code_validation: true
|
||||
turn_on_open_file_validation: true
|
||||
turn_on_send_file_validation: true
|
||||
turn_on_language_validation: true
|
||||
|
||||
force_accept_remote_access: false
|
||||
|
||||
# Server information
|
||||
headless_server_mode: False
|
||||
headless_server_mode: false
|
||||
allowed_origins: []
|
||||
|
||||
# Host information
|
||||
host: localhost
|
||||
port: 9600
|
||||
|
||||
app_custom_logo: ""
|
||||
|
||||
# Genreration parameters
|
||||
discussion_prompt_separator: "!@>"
|
||||
start_header_id_template: "!@>"
|
||||
end_header_id_template: ": "
|
||||
|
||||
separator_template: "\n"
|
||||
|
||||
start_user_header_id_template: "!@>"
|
||||
end_user_header_id_template: ": "
|
||||
end_user_message_id_template: ""
|
||||
|
||||
start_ai_header_id_template: "!@>"
|
||||
end_ai_header_id_template: ": "
|
||||
end_ai_message_id_template: ""
|
||||
|
||||
system_message_template: "system"
|
||||
|
||||
seed: -1
|
||||
ctx_size: 4084
|
||||
max_n_predict: 4096
|
||||
min_n_predict: 512
|
||||
min_n_predict: 1024
|
||||
temperature: 0.9
|
||||
top_k: 50
|
||||
top_p: 0.95
|
||||
@ -50,14 +68,14 @@ user_name: user
|
||||
user_description: ""
|
||||
use_user_name_in_discussions: false
|
||||
use_model_name_in_discussions: false
|
||||
user_avatar: default_user.svg
|
||||
user_avatar: null
|
||||
use_user_informations_in_discussion: false
|
||||
|
||||
# UI parameters
|
||||
discussion_db_name: default
|
||||
|
||||
# Automatic updates
|
||||
debug: False
|
||||
debug: false
|
||||
debug_log_file_path: ""
|
||||
auto_update: true
|
||||
auto_sync_personalities: true
|
||||
@ -77,23 +95,104 @@ auto_show_browser: true
|
||||
# copy to clipboard
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
stt_input_device: 0
|
||||
|
||||
|
||||
# STT service
|
||||
stt_listening_threshold: 1000
|
||||
stt_silence_duration: 2
|
||||
stt_sound_threshold_percentage: 10
|
||||
stt_gain: 1.0
|
||||
stt_rate: 44100
|
||||
stt_channels: 1
|
||||
stt_buffer_size: 10
|
||||
|
||||
stt_activate_word_detection: false
|
||||
stt_word_detection_file: null
|
||||
|
||||
|
||||
|
||||
# ASR STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_whisper_key: ""
|
||||
openai_whisper_model: "whisper-1"
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
tts_output_device: 0
|
||||
|
||||
# Voice service
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
xtts_stream_chunk_size: 100
|
||||
xtts_temperature: 0.75
|
||||
xtts_length_penalty: 1.0
|
||||
xtts_repetition_penalty: 5.0
|
||||
xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
use_negative_prompt: true
|
||||
use_ai_generated_negative_prompt: false
|
||||
negative_prompt_generation_prompt: Generate negative prompt for the following prompt. negative prompt is a set of words that describe things we do not want to have in the generated image.
|
||||
default_negative_prompt: (((text))), (((ugly))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), ((extra arms)), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck))), ((watermark)), ((robot eyes))
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://localhost:7860
|
||||
|
||||
# Image generation service
|
||||
enable_fooocus_service: false
|
||||
fooocus_base_url: http://localhost:7860
|
||||
|
||||
# diffuser
|
||||
diffusers_offloading_mode: sequential_cpu_offload # sequential_cpu_offload
|
||||
diffusers_model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
|
||||
|
||||
# Dall e service key
|
||||
dall_e_key: ""
|
||||
dall_e_generation_engine: "dall-e-3"
|
||||
|
||||
# Midjourney service key
|
||||
midjourney_key: ""
|
||||
|
||||
# Image generation service comfyui
|
||||
enable_comfyui_service: false
|
||||
comfyui_base_url: http://127.0.0.1:8188/
|
||||
comfyui_model: v1-5-pruned-emaonly.ckpt
|
||||
|
||||
# Motion control service
|
||||
enable_motion_ctrl_service: false
|
||||
motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
# ***************** TTT *****************
|
||||
|
||||
# ollama service
|
||||
enable_ollama_service: false
|
||||
ollama_base_url: http://localhost:11434
|
||||
@ -107,6 +206,11 @@ petals_device: cuda
|
||||
# lollms service
|
||||
enable_lollms_service: false
|
||||
lollms_base_url: http://localhost:1234
|
||||
lollms_access_keys : "" # set a list of keys separated by coma to restrict access
|
||||
activate_lollms_server: true
|
||||
activate_ollama_emulator: true
|
||||
activate_openai_emulator: true
|
||||
activate_mistralai_emulator: true
|
||||
|
||||
# elastic search service
|
||||
elastic_search_service: false
|
||||
@ -131,13 +235,22 @@ audio_auto_send_input: true
|
||||
audio_silenceTimer: 5000
|
||||
|
||||
# Data vectorization
|
||||
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
|
||||
rag_vectorizer: bert # possible values bert, tfidf, word2vec
|
||||
rag_vectorizer_model: bert-base-nli-mean-tokens # The model name if applicable
|
||||
rag_vectorizer_parameters: null # Parameters of the model in json format
|
||||
rag_chunk_size: 512 # number of tokens per chunk
|
||||
rag_n_chunks: 4 #Number of chunks to recover from the database
|
||||
rag_clean_chunks: true #Removed all uinecessary spaces and line returns
|
||||
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
|
||||
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
|
||||
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
|
||||
|
||||
activate_skills_lib: false # Activate vectorizing previous conversations
|
||||
skills_lib_database_name: "default" # Default skills database
|
||||
summarize_discussion: false # activate discussion summary (better but adds computation time)
|
||||
|
||||
max_summary_size: 512 # in tokens
|
||||
data_vectorization_visualize_on_vectorization: false
|
||||
use_files: true # Activate using files
|
||||
data_vectorization_activate: true # To activate/deactivate data vectorization
|
||||
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
|
||||
data_visualization_method: "PCA" #"PCA" or "TSNE"
|
||||
@ -154,20 +267,21 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
|
||||
|
||||
# Activate internet search
|
||||
activate_internet_search: false
|
||||
activate_internet_pages_judgement: true
|
||||
internet_vectorization_chunk_size: 512 # chunk size
|
||||
internet_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
internet_nb_search_pages: 3 # number of pages to select
|
||||
internet_quick_search: False # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
|
||||
internet_vectorization_overlap_size: 0 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 4 # number of chunks to use
|
||||
internet_nb_search_pages: 8 # number of pages to select
|
||||
internet_quick_search: false # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
# boosting information
|
||||
positive_boost: null
|
||||
negative_boost: null
|
||||
current_language: null
|
||||
fun_mode: False
|
||||
current_language: english
|
||||
fun_mode: false
|
||||
|
||||
|
||||
# webui configurations
|
||||
@ -175,5 +289,3 @@ show_code_of_conduct: true
|
||||
activate_audio_infos: true
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_model: base
|
@ -6,7 +6,7 @@ setuptools
|
||||
requests
|
||||
|
||||
|
||||
safe_store
|
||||
lollmsvectordb
|
||||
pipmaster
|
||||
ascii_colors>=0.1.3
|
||||
beautifulsoup4
|
||||
|
@ -5,8 +5,8 @@ wget
|
||||
setuptools
|
||||
requests
|
||||
|
||||
safe_store
|
||||
ascii_colors>=0.1.3
|
||||
lollmsvectordb
|
||||
|
||||
autopep8
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user