Skills library is now updated

This commit is contained in:
Saifeddine ALOUI 2024-02-26 22:58:56 +01:00
parent 895f54b422
commit 65a5b08e4b
15 changed files with 380 additions and 65 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 72
version: 73
binding_name: null
model_name: null
model_variant: null
@ -122,8 +122,10 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
activate_ltm: false # Activate vectorizing previous conversations
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files

View File

@ -1,9 +1,23 @@
# =================== Lord Of Large Language Models Configuration file ===========================
version: 40
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 73
binding_name: null
model_name: null
model_variant: null
model_type: null
show_news_panel: True
# Security measures
turn_on_code_execution: True
turn_on_code_validation: True
turn_on_open_file_validation: False
turn_on_send_file_validation: False
force_accept_remote_access: false
# Server information
headless_server_mode: False
allowed_origins: []
# Host information
host: localhost
@ -37,11 +51,19 @@ user_avatar: default_user.svg
use_user_informations_in_discussion: false
# UI parameters
discussion_db_name: database.db
discussion_db_name: default
# Automatic updates
debug: False
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
auto_sync_extensions: true
auto_sync_bindings: true
auto_sync_models: true
auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
@ -49,16 +71,46 @@ hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true
# copy to clipboard
copy_to_clipboard_add_all_details: false
# Voice service
enable_voice_service: false
xtts_base_url: http://127.0.0.1:8020
xtts_base_url: http://localhost:8020
auto_read: false
current_voice: null
current_language: en
# Image generation service
enable_sd_service: false
sd_base_url: http://127.0.0.1:7860
sd_base_url: http://localhost:7860
# ollama service
enable_ollama_service: false
ollama_base_url: http://localhost:11434
# petals service
enable_petals_service: false
petals_base_url: http://localhost:8064
petals_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
# elastic search service
elastic_search_service: false
elastic_search_url: http://localhost:9200
# vll service
enable_vllm_service: false
vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
vllm_max_num_seqs: 256
# Audio
media_on: false
@ -70,8 +122,10 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
activate_ltm: false # Activate vectorizing previous conversations
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
@ -86,7 +140,14 @@ data_vectorization_build_keys_words: false # If true, when querrying the databas
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Activate internet search
activate_internet_search: false
internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select
internet_quick_search: False # If active the search engine will not load and read the webpages
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
# Helpers
pdf_latex_path: null
@ -94,3 +155,13 @@ pdf_latex_path: null
positive_boost: null
negative_boost: null
force_output_language_to_be: null
fun_mode: False
# webui configurations
show_code_of_conduct: true
activate_audio_infos: true
# whisper configuration
whisper_model: base

View File

@ -1,9 +1,23 @@
# =================== Lord Of Large Language Models Configuration file ===========================
version: 40
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 73
binding_name: null
model_name: null
model_variant: null
model_type: null
show_news_panel: True
# Security measures
turn_on_code_execution: True
turn_on_code_validation: True
turn_on_open_file_validation: False
turn_on_send_file_validation: False
force_accept_remote_access: false
# Server information
headless_server_mode: False
allowed_origins: []
# Host information
host: localhost
@ -37,11 +51,19 @@ user_avatar: default_user.svg
use_user_informations_in_discussion: false
# UI parameters
discussion_db_name: database.db
discussion_db_name: default
# Automatic updates
debug: False
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
auto_sync_extensions: true
auto_sync_bindings: true
auto_sync_models: true
auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
@ -49,16 +71,46 @@ hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true
# copy to clipboard
copy_to_clipboard_add_all_details: false
# Voice service
enable_voice_service: false
xtts_base_url: http://127.0.0.1:8020
xtts_base_url: http://localhost:8020
auto_read: false
current_voice: null
current_language: en
# Image generation service
enable_sd_service: false
sd_base_url: http://127.0.0.1:7860
sd_base_url: http://localhost:7860
# ollama service
enable_ollama_service: false
ollama_base_url: http://localhost:11434
# petals service
enable_petals_service: false
petals_base_url: http://localhost:8064
petals_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
# elastic search service
elastic_search_service: false
elastic_search_url: http://localhost:9200
# vll service
enable_vllm_service: false
vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
vllm_max_num_seqs: 256
# Audio
media_on: false
@ -70,8 +122,10 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
activate_ltm: false # Activate vectorizing previous conversations
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
@ -86,7 +140,14 @@ data_vectorization_build_keys_words: false # If true, when querrying the databas
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Activate internet search
activate_internet_search: false
internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select
internet_quick_search: False # If active the search engine will not load and read the webpages
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
# Helpers
pdf_latex_path: null
@ -94,3 +155,13 @@ pdf_latex_path: null
positive_boost: null
negative_boost: null
force_output_language_to_be: null
fun_mode: False
# webui configurations
show_code_of_conduct: true
activate_audio_infos: true
# whisper configuration
whisper_model: base

View File

@ -70,7 +70,7 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
activate_ltm: false # Activate vectorizing previous conversations
activate_skills_lib: false # Activate vectorizing previous conversations
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false

View File

@ -10,6 +10,7 @@ from lollms.terminal import MainMenu
from lollms.types import MSG_TYPE, SENDER_TYPES
from lollms.utilities import PromptReshaper
from lollms.client_session import Client, Session
from lollms.databases.skills_database import SkillsLibrary
from safe_store import TextVectorizer, VectorizationMethod, VisualizationMethod
from typing import Callable
from pathlib import Path
@ -59,6 +60,7 @@ class LollmsApplication(LoLLMsCom):
self.tts = None
self.session = Session(lollms_paths)
self.skills_library = SkillsLibrary(self.lollms_paths.personal_skills_path/(self.config.skills_lib_database_name+".db"))
if not free_mode:
try:
@ -549,7 +551,7 @@ class LollmsApplication(LoLLMsCom):
except:
self.warning("Couldn't add documentation to the context. Please verify the vector database")
# Check if there is discussion knowledge to add to the prompt
if self.config.activate_ltm and self.long_term_memory is not None:
if self.config.activate_skills_lib and self.long_term_memory is not None:
if knowledge=="":
knowledge="!@>knowledge:\n"

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 72
version: 73
binding_name: null
model_name: null
model_variant: null
@ -122,8 +122,10 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
activate_ltm: false # Activate vectorizing previous conversations
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files

View File

@ -0,0 +1,96 @@
import sqlite3
class SkillsLibrary:
def __init__(self, db_path):
self.conn = sqlite3.connect(db_path)
self.cursor = self.conn.cursor()
self._create_table()
def __init__(self, db_path):
self.conn = sqlite3.connect(db_path)
self.cursor = self.conn.cursor()
self._initialize_db()
def _initialize_db(self):
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS skills_library (
id INTEGER PRIMARY KEY,
version INTEGER,
category TEXT,
title TEXT,
content TEXT
)
""")
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS db_info (
version INTEGER
)
""")
self.cursor.execute("SELECT version FROM db_info")
version = self.cursor.fetchone()
if version is None:
self.cursor.execute("INSERT INTO db_info (version) VALUES (1)")
self.conn.commit()
else:
self._migrate_db(version[0])
def _migrate_db(self, version):
# Perform migrations based on the current version
# For example, if the current version is 1 and the latest version is 2:
if version < 2:
self.cursor.execute("ALTER TABLE skills_library ADD COLUMN new_column TEXT")
self.cursor.execute("UPDATE db_info SET version = 2")
self.conn.commit()
def _create_table(self):
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS skills_library (
id INTEGER PRIMARY KEY,
version INTEGER,
category TEXT,
title TEXT,
content TEXT
)
""")
self.conn.commit()
def add_entry(self, version, category, title, content):
self.cursor.execute("""
INSERT INTO skills_library (version, category, title, content)
VALUES (?, ?, ?, ?)
""", (version, category, title, content))
self.conn.commit()
def list_entries(self):
self.cursor.execute("SELECT * FROM skills_library")
return self.cursor.fetchall()
def query_entry(self, text):
self.cursor.execute("""
SELECT * FROM skills_library
WHERE category LIKE ? OR title LIKE ? OR content LIKE ?
""", (f'%{text}%', f'%{text}%', f'%{text}%'))
return self.cursor.fetchall()
def remove_entry(self, id):
self.cursor.execute("DELETE FROM skills_library WHERE id = ?", (id,))
self.conn.commit()
def export_entries(self, file_path):
with open(file_path, 'w') as f:
for entry in self.list_entries():
f.write(f'{entry}\n')
def import_entries(self, file_path):
with open(file_path, 'r') as f:
for line in f:
entry = line.strip().split(',')
self.add_entry(*entry)
def fuse_with_another_db(self, other_db_path):
other_conn = sqlite3.connect(other_db_path)
other_cursor = other_conn.cursor()
other_cursor.execute("SELECT * FROM skills_library")
for row in other_cursor.fetchall():
self.add_entry(*row[1:]) # skip the id column

View File

@ -64,6 +64,7 @@ class LollmsPaths:
self.personal_data_path = self.personal_path / "data"
self.personal_memory_path = self.personal_path / "memory"
self.personal_discussions_path = self.personal_path / "discussion_databases"
self.personal_skills_path = self.personal_path / "skill_databases"
self.personal_models_path = self.personal_path / "models"
self.personal_uploads_path = self.personal_path / "uploads"
self.personal_log_path = self.personal_path / "logs"
@ -109,6 +110,9 @@ class LollmsPaths:
ASCIIColors.yellow(f"{self.personal_configuration_path}")
ASCIIColors.red("personal_discussions_path:",end="")
ASCIIColors.yellow(f"{self.personal_discussions_path}")
ASCIIColors.red("personal_skills_path:",end="")
ASCIIColors.yellow(f"{self.personal_skills_path}")
ASCIIColors.red("personal_models_path:",end="")
ASCIIColors.yellow(f"{self.personal_models_path}")
ASCIIColors.red("personal_user_infos_path:",end="")
@ -147,6 +151,7 @@ class LollmsPaths:
"Personal Configuration Path": self.personal_configuration_path,
"Personal Data Path": self.personal_data_path,
"Personal Databases Path": self.personal_discussions_path,
"Personal Skills Path": self.personal_skills_path,
"Personal Models Path": self.personal_models_path,
"Personal Uploads Path": self.personal_uploads_path,
"Personal Log Path": self.personal_log_path,
@ -169,6 +174,7 @@ class LollmsPaths:
self.personal_models_path.mkdir(parents=True, exist_ok=True)
self.personal_data_path.mkdir(parents=True, exist_ok=True)
self.personal_discussions_path.mkdir(parents=True, exist_ok=True)
self.personal_skills_path.mkdir(parents=True, exist_ok=True)
self.personal_log_path.mkdir(parents=True, exist_ok=True)
self.personal_certificates.mkdir(parents=True, exist_ok=True)
self.personal_outputs_path.mkdir(parents=True, exist_ok=True)

View File

@ -1,7 +1,9 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 71
version: 73
binding_name: null
model_name: null
model_variant: null
model_type: null
show_news_panel: True
@ -120,8 +122,10 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
activate_ltm: false # Activate vectorizing previous conversations
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files

View File

@ -75,39 +75,6 @@ def select_database(data:DatabaseSelectionParameters):
if lollmsElfServer.config.auto_save:
lollmsElfServer.config.save_config()
if lollmsElfServer.config.data_vectorization_activate and lollmsElfServer.config.activate_ltm:
try:
ASCIIColors.yellow("0- Detected discussion vectorization request")
folder = lollmsElfServer.lollms_paths.personal_discussions_path/"vectorized_dbs"
folder.mkdir(parents=True, exist_ok=True)
lollmsElfServer.long_term_memory = TextVectorizer(
vectorization_method=VectorizationMethod.TFIDF_VECTORIZER,#=VectorizationMethod.BM25_VECTORIZER,
database_path=folder/lollmsElfServer.config.discussion_db_name,
data_visualization_method=VisualizationMethod.PCA,#VisualizationMethod.PCA,
save_db=True
)
ASCIIColors.yellow("1- Exporting discussions")
lollmsElfServer.info("Exporting discussions")
discussions = lollmsElfServer.db.export_all_as_markdown_list_for_vectorization()
ASCIIColors.yellow("2- Adding discussions to vectorizer")
lollmsElfServer.info("Adding discussions to vectorizer")
index = 0
nb_discussions = len(discussions)
for (title,discussion) in tqdm(discussions):
lollmsElfServer.sio.emit('update_progress',{'value':int(100*(index/nb_discussions))})
index += 1
if discussion!='':
skill = lollmsElfServer.learn_from_discussion(title, discussion)
lollmsElfServer.long_term_memory.add_document(title, skill, chunk_size=lollmsElfServer.config.data_vectorization_chunk_size, overlap_size=lollmsElfServer.config.data_vectorization_overlap_size, force_vectorize=False, add_as_a_bloc=False)
ASCIIColors.yellow("3- Indexing database")
lollmsElfServer.info("Indexing database",True, None)
lollmsElfServer.long_term_memory.index()
ASCIIColors.yellow("Ready")
except Exception as ex:
lollmsElfServer.error(f"Couldn't vectorize the database:{ex}")
return {"status":False}
return {"status":True}

View File

@ -0,0 +1,23 @@
"""
project: lollms_webui
file: lollms_skills_library.py
author: ParisNeo
description:
This module contains a set of FastAPI routes that allow user to interact with the skills library.
"""
from fastapi import APIRouter, Request
from lollms_webui import LOLLMSWebUI
from pydantic import BaseModel
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception
from lollms.security import sanitize_path
from ascii_colors import ASCIIColors
from lollms.databases.discussions_database import DiscussionsDB, Discussion
from typing import List
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
import tqdm
from pathlib import Path

View File

@ -33,7 +33,7 @@ lollmsElfServer = LOLLMSElfServer.get_instance()
def add_events(sio:socketio):
@sio.on('upgrade_vectorization')
def upgrade_vectorization():
if lollmsElfServer.config.data_vectorization_activate and lollmsElfServer.config.activate_ltm:
if lollmsElfServer.config.data_vectorization_activate and lollmsElfServer.config.activate_skills_lib:
try:
run_async(partial(sio.emit,'show_progress'))
lollmsElfServer.sio.sleep(0)

View File

@ -166,7 +166,7 @@ class LollmsMotionCtrl:
self.default_sampler = sampler
self.default_steps = steps
self.session = requests.Session(lollms_paths)
self.session = requests.Session()
if username and password:
self.set_auth(username, password)

View File

@ -282,7 +282,7 @@ class LollmsSD:
self.default_sampler = sampler
self.default_steps = steps
self.session = requests.Session(lollms_paths)
self.session = requests.Session()
if username and password:
self.set_auth(username, password)

View File

@ -1,9 +1,23 @@
# =================== Lord Of Large Language Models Configuration file ===========================
version: 40
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 73
binding_name: null
model_name: null
model_variant: null
model_type: null
show_news_panel: True
# Security measures
turn_on_code_execution: True
turn_on_code_validation: True
turn_on_open_file_validation: False
turn_on_send_file_validation: False
force_accept_remote_access: false
# Server information
headless_server_mode: False
allowed_origins: []
# Host information
host: localhost
@ -37,11 +51,19 @@ user_avatar: default_user.svg
use_user_informations_in_discussion: false
# UI parameters
discussion_db_name: database.db
discussion_db_name: default
# Automatic updates
debug: False
debug_log_file_path: ""
auto_update: true
auto_sync_personalities: true
auto_sync_extensions: true
auto_sync_bindings: true
auto_sync_models: true
auto_save: true
auto_title: false
# Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
@ -49,16 +71,46 @@ hardware_mode: nvidia-tensorcores
# Automatically open the browser
auto_show_browser: true
# copy to clipboard
copy_to_clipboard_add_all_details: false
# Voice service
enable_voice_service: false
xtts_base_url: http://127.0.0.1:8020
xtts_base_url: http://localhost:8020
auto_read: false
current_voice: null
current_language: en
# Image generation service
enable_sd_service: false
sd_base_url: http://127.0.0.1:7860
sd_base_url: http://localhost:7860
# ollama service
enable_ollama_service: false
ollama_base_url: http://localhost:11434
# petals service
enable_petals_service: false
petals_base_url: http://localhost:8064
petals_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
# elastic search service
elastic_search_service: false
elastic_search_url: http://localhost:9200
# vll service
enable_vllm_service: false
vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
vllm_max_num_seqs: 256
# Audio
media_on: false
@ -70,8 +122,10 @@ audio_auto_send_input: true
audio_silenceTimer: 5000
# Data vectorization
activate_ltm: false # Activate vectorizing previous conversations
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
@ -86,7 +140,14 @@ data_vectorization_build_keys_words: false # If true, when querrying the databas
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Activate internet search
activate_internet_search: false
internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select
internet_quick_search: False # If active the search engine will not load and read the webpages
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
# Helpers
pdf_latex_path: null
@ -94,3 +155,13 @@ pdf_latex_path: null
positive_boost: null
negative_boost: null
force_output_language_to_be: null
fun_mode: False
# webui configurations
show_code_of_conduct: true
activate_audio_infos: true
# whisper configuration
whisper_model: base