moved to the new rag module

This commit is contained in:
Saifeddine ALOUI 2024-06-19 02:46:16 +02:00
parent a011195b86
commit 647371e772
7 changed files with 103 additions and 37 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 114
version: 115
binding_name: null
model_name: null
model_variant: null
@ -240,6 +240,9 @@ audio_silenceTimer: 5000
# Data vectorization
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
rag_vectorizer: bert # possible values bert, tfidf, word2vec
rag_chunk_size: 512 # number of tokens per chunk
rag_n_chunks: 4 #Number of chunks to recover from the database
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

View File

@ -93,21 +93,33 @@ class LollmsApplication(LoLLMsCom):
self.load_rag_dbs()
except Exception as ex:
trace_exception(ex)
for entry in self.config.rag_databases:
if "mounted" in entry:
parts = entry.split("::")
if not PackageManager.check_package_installed("lollmsvectordb"):
PackageManager.install_package("lollmsvectordb")
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
from lollmsvectordb import VectorDatabase
from lollmsvectordb.text_document_loader import TextDocumentsLoader
v = BERTVectorizer()
vdb = VectorDatabase(Path(parts[1])/"db_name.sqlite", v)
vdb.build_index()
self.active_rag_dbs.append({"name":parts[0],"path":parts[1],"vectorizer":vdb})
try:
if "mounted" in entry:
parts = entry.split("::")
database_name = parts[0]
database_path = parts[1]
if not PackageManager.check_package_installed_with_version("lollmsvectordb","0.3.0"):
PackageManager.install_or_update("lollmsvectordb")
from lollmsvectordb import VectorDatabase
from lollmsvectordb.text_document_loader import TextDocumentsLoader
from lollmsvectordb.tokenizers.tiktoken_tokenizer import TikTokenTokenizer
if self.config.rag_vectorizer == "bert":
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
v = BERTVectorizer()
elif self.config.rag_vectorizer == "tfidf":
from lollmsvectordb.vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer()
vdb = VectorDatabase(Path(database_path)/"db_name.sqlite", v, self.model if self.model else TikTokenTokenizer(), n_neighbors=self.config.rag_n_chunks)
vdb.build_index()
self.active_rag_dbs.append({"name":database_name,"path":database_path,"vectorizer":vdb})
self.config.save_config()
except Exception as ex:
trace_exception(ex)
self.rt_com = None
if not free_mode:
try:
@ -281,8 +293,15 @@ class LollmsApplication(LoLLMsCom):
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
from lollmsvectordb import VectorDatabase
from lollmsvectordb.text_document_loader import TextDocumentsLoader
v = BERTVectorizer()
vdb = VectorDatabase(Path(parts[1])/"db_name.sqlite", v)
from lollmsvectordb.tokenizers.tiktoken_tokenizer import TikTokenTokenizer
if self.config.rag_vectorizer == "bert":
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
v = BERTVectorizer()
elif self.config.rag_vectorizer == "tfidf":
from lollmsvectordb.vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer()
vdb = VectorDatabase(Path(parts[1])/"db_name.sqlite", v, self.model if self.model else TikTokenTokenizer(), n_neighbors=self.config.rag_n_chunks)
vdb.build_index()
self.active_rag_dbs.append({"name":parts[0],"path":parts[1],"vectorizer":vdb})

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 114
version: 115
binding_name: null
model_name: null
model_variant: null
@ -240,6 +240,9 @@ audio_silenceTimer: 5000
# Data vectorization
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
rag_vectorizer: bert # possible values bert, tfidf, word2vec
rag_chunk_size: 512 # number of tokens per chunk
rag_n_chunks: 4 #Number of chunks to recover from the database
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

View File

@ -35,7 +35,7 @@ def luma_ai_dream_machine_video_creator(prompt: str) -> str:
try:
# Open the Luma AI Dream Machine webpage
webbrowser.open("https://lumalabs.ai/dream-machine/creations")
time.sleep(5) # Wait for the page to load
time.sleep(2) # Wait for the page to load
# Locate the input section and type the prompt
input_image_path = Path(__file__).parent/"input_section_image.png" # Replace with the actual path to your image

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 2.6 KiB

View File

@ -16,13 +16,14 @@ from fastapi.responses import FileResponse
from lollms.binding import BindingBuilder, InstallOption
from lollms.security import sanitize_path_from_endpoint
from ascii_colors import ASCIIColors
from lollms.utilities import load_config, trace_exception, gc, PackageManager
from lollms.utilities import load_config, trace_exception, gc, PackageManager, run_async
from pathlib import Path
from typing import List, Optional, Dict
from lollms.security import check_access
from functools import partial
import os
import re
import threading
# ----------------------- Defining router and main class ------------------------------
router = APIRouter()
lollmsElfServer = LOLLMSElfServer.get_instance()
@ -94,7 +95,7 @@ def open_file(file_types: List[str]) -> Optional[Path]:
print(f"An error occurred: {e}")
return None
def select_rag_database() -> Optional[Dict[str, Path]]:
def select_rag_database(client) -> Optional[Dict[str, Path]]:
"""
Opens a folder selection dialog and then a string input dialog to get the database name.
@ -124,17 +125,26 @@ def select_rag_database() -> Optional[Dict[str, Path]]:
if db_name:
try:
lollmsElfServer.ShowBlockingMessage("Adding a new database.\nVectorizing the database")
if not PackageManager.check_package_installed("lollmsvectordb"):
PackageManager.install_package("lollmsvectordb")
if not PackageManager.check_package_installed_with_version("lollmsvectordb","0.3.0"):
PackageManager.install_or_update("lollmsvectordb")
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
from lollmsvectordb import VectorDatabase
from lollmsvectordb.text_document_loader import TextDocumentsLoader
v = BERTVectorizer()
vdb = VectorDatabase(Path(folder_path)/"db_name.sqlite", v)
from lollmsvectordb.tokenizers.tiktoken_tokenizer import TikTokenTokenizer
if lollmsElfServer.config.rag_vectorizer == "bert":
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
v = BERTVectorizer()
elif lollmsElfServer.config.rag_vectorizer == "tfidf":
from lollmsvectordb.vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer()
vdb = VectorDatabase(Path(folder_path)/"db_name.sqlite", v, lollmsElfServer.model if lollmsElfServer.model else TikTokenTokenizer())
# Get all files in the folder
folder = Path(folder_path)
file_types = [f"*{f}" for f in TextDocumentsLoader.get_supported_file_types]
file_types = [f"*{f}" for f in TextDocumentsLoader.get_supported_file_types()]
files = []
for file_type in file_types:
files.extend(folder.glob(file_type))
@ -144,13 +154,18 @@ def select_rag_database() -> Optional[Dict[str, Path]]:
try:
text = TextDocumentsLoader.read_file(fn)
title = fn.stem # Use the file name without extension as the title
vdb.add_document(title, text)
vdb.add_document(title, text, fn)
print(f"Added document: {title}")
except Exception as e:
print(f"Failed to add document {fn}: {e}")
if vdb.new_data: #New files are added, need reindexing
ASCIIColors.blue("Indexing database ...", end="", flush=True)
vdb.build_index()
ASCIIColors.success("OK")
lollmsElfServer.HideBlockingMessage()
return {"database_name": db_name, "database_path": Path(folder_path)}
except:
run_async(partial(lollmsElfServer.sio.emit,'rag_db_added ', {"database_name": db_name, "database_path": Path(folder_path)}, to=client.client_id))
except Exception as ex:
trace_exception(ex)
lollmsElfServer.HideBlockingMessage()
else:
return None
@ -218,8 +233,10 @@ async def add_rag_database(database_infos: SelectDatabase):
"""
Selects and names a database
"""
check_access(lollmsElfServer, database_infos.client_id)
return select_rag_database()
client = check_access(lollmsElfServer, database_infos.client_id)
lollmsElfServer.rag_thread = threading.Thread(target=select_rag_database, args=[client])
lollmsElfServer.rag_thread.start()
return True
@router.post("/toggle_mount_rag_database")
def toggle_mount_rag_database(database_infos: MountDatabase):
@ -233,12 +250,18 @@ def toggle_mount_rag_database(database_infos: MountDatabase):
if not PackageManager.check_package_installed("lollmsvectordb"):
PackageManager.install_package("lollmsvectordb")
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
from lollmsvectordb import VectorDatabase
from lollmsvectordb.text_document_loader import TextDocumentsLoader
v = BERTVectorizer()
vdb = VectorDatabase(Path(path)/"db_name.sqlite", v)
from lollmsvectordb.tokenizers.tiktoken_tokenizer import TikTokenTokenizer
if lollmsElfServer.config.rag_vectorizer == "bert":
from lollmsvectordb.vectorizers.bert_vectorizer import BERTVectorizer
v = BERTVectorizer()
elif lollmsElfServer.config.rag_vectorizer == "tfidf":
from lollmsvectordb.vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer()
vdb = VectorDatabase(Path(path)/"db_name.sqlite", v, lollmsElfServer.model if lollmsElfServer.model else TikTokenTokenizer(), n_neighbors=lollmsElfServer.config.rag_n_chunks)
vdb.build_index()
lollmsElfServer.active_rag_dbs.append({"name":database_infos.database_name,"path":path,"vectorizer":vdb})
lollmsElfServer.config.save_config()

View File

@ -17,7 +17,7 @@ import subprocess
import gc
import shutil
from typing import List
from typing import List, Optional
from PIL import Image
import requests
@ -986,7 +986,25 @@ class PackageManager:
trace_exception(ex)
ASCIIColors.error("Something is wrong with your library.\nIt looks installed, but I am not able to call it.\nTry to reinstall it.")
return False
@staticmethod
def check_package_installed_with_version(package_name: str, min_version: Optional[str] = None) -> bool:
try:
import pkg_resources
# Summon the library from the depths of the Python abyss
package = importlib.import_module(package_name)
if min_version:
# Check if the library is at least at the specified version
installed_version = pkg_resources.get_distribution(package_name).version
if pkg_resources.parse_version(installed_version) < pkg_resources.parse_version(min_version):
raise ImportError(f"Version {installed_version} is less than the required {min_version}.")
return True
except ImportError as ex:
print(f"Oopsie daisy! The library '{package_name}' is playing hide and seek. Error: {ex}")
return False
except Exception as ex:
print(f"Yikes! Something went bananas with your library. Error: {ex}")
return False
@staticmethod
def safe_import(module_name, library_name=None):
if not PackageManager.check_package_installed(module_name):