diff --git a/configs/config.yaml b/configs/config.yaml index a4912f3..4ee8e3c 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 137 +version: 138 binding_name: null model_name: null model_variant: null @@ -273,7 +273,7 @@ audio_silenceTimer: 5000 # Data vectorization rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data -rag_vectorizer: semantic # possible values semantic, tfidf, openai +rag_vectorizer: tfidf # possible values semantic, tfidf, openai rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk diff --git a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml index 87c2b66..8dfe638 100644 --- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml @@ -248,7 +248,7 @@ audio_silenceTimer: 5000 # Data vectorization rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data -rag_vectorizer: semantic # possible values semantic, tfidf, openai +rag_vectorizer: tfidf # possible values semantic, tfidf, openai rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk diff --git a/elf_test_cfg/personal/configs/lollms_elf_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_config.yaml index 87c2b66..8dfe638 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml @@ -248,7 +248,7 @@ audio_silenceTimer: 5000 # Data vectorization rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data -rag_vectorizer: semantic # possible values semantic, tfidf, openai +rag_vectorizer: tfidf # possible values semantic, tfidf, openai rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk diff --git a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml index 87c2b66..8dfe638 100644 --- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml +++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml @@ -248,7 +248,7 @@ audio_silenceTimer: 5000 # Data vectorization rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data -rag_vectorizer: semantic # possible values semantic, tfidf, openai +rag_vectorizer: tfidf # possible values semantic, tfidf, openai rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk diff --git a/environment.yaml b/environment.yaml deleted file mode 100644 index 2c28d1d..0000000 --- a/environment.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: lollms_env -channels: - - defaults - - conda-forge # Adds a wider selection of packages, especially for less common ones -dependencies: - - python=3.11 - - numpy=1.26.* - - pandas - - pillow>=9.5.0 - - pyyaml - - requests - - rich - - scipy - - tqdm - - setuptools - - wheel - - psutil - - pytest - - gitpython - - beautifulsoup4 - - packaging - - fastapi - - uvicorn - - pydantic - - selenium - - aiofiles - - pip # Conda will manage pip installation - - pip: - - colorama - - ascii-colors>=0.4.2 - - python-multipart - - python-socketio - - python-socketio[client] - - python-socketio[asyncio_client] - - tiktoken - - pipmaster>=0.1.7 - - lollmsvectordb>=1.1.0 - - freedom-search>=0.1.9 - - scrapemaster>=0.2.0 - - lollms_client>=0.7.5 - - zipfile36 - - freedom_search diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml index a4912f3..4ee8e3c 100644 --- a/lollms/configs/config.yaml +++ b/lollms/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 137 +version: 138 binding_name: null model_name: null model_variant: null @@ -273,7 +273,7 @@ audio_silenceTimer: 5000 # Data vectorization rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data -rag_vectorizer: semantic # possible values semantic, tfidf, openai +rag_vectorizer: tfidf # possible values semantic, tfidf, openai rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk diff --git a/lollms/databases/skills_database.py b/lollms/databases/skills_database.py index 5e4421b..3406570 100644 --- a/lollms/databases/skills_database.py +++ b/lollms/databases/skills_database.py @@ -1,14 +1,14 @@ import sqlite3 -from lollmsvectordb import VectorDatabase, SemanticVectorizer +from lollmsvectordb import VectorDatabase, TFIDFVectorizer from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer import numpy as np from ascii_colors import ASCIIColors class SkillsLibrary: - def __init__(self, db_path, model_name: str = 'sentence-transformers/bert-base-nli-mean-tokens', chunk_size:int=512, overlap:int=0, n_neighbors:int=5): + def __init__(self, db_path, chunk_size:int=512, overlap:int=0, n_neighbors:int=5): self.db_path =db_path self._initialize_db() - self.vectorizer = VectorDatabase(db_path, SemanticVectorizer(model_name), TikTokenTokenizer(),chunk_size, overlap, n_neighbors) + self.vectorizer = VectorDatabase(db_path, TFIDFVectorizer(), TikTokenTokenizer(),chunk_size, overlap, n_neighbors) ASCIIColors.green("Vecorizer ready") diff --git a/lollms/functions/prompting/image_gen_prompts.py b/lollms/functions/prompting/image_gen_prompts.py index 339dc2e..34256e3 100644 --- a/lollms/functions/prompting/image_gen_prompts.py +++ b/lollms/functions/prompting/image_gen_prompts.py @@ -337,9 +337,9 @@ def get_image_gen_prompt(agent_name, number_of_entries=5) -> Tuple[str, str]: """ try: from lollmsvectordb.vector_database import VectorDatabase - from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer + from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer - db = VectorDatabase("", SemanticVectorizer(), TikTokenTokenizer(), number_of_entries) + db = VectorDatabase("", TFIDFVectorizer(), TikTokenTokenizer(), number_of_entries) image_gen_prompts = get_prompts_list() for entry in image_gen_prompts: diff --git a/lollms/functions/prompting/system_prompts.py b/lollms/functions/prompting/system_prompts.py index ab657dd..b7a007a 100644 --- a/lollms/functions/prompting/system_prompts.py +++ b/lollms/functions/prompting/system_prompts.py @@ -134,9 +134,9 @@ def get_system_prompt(agent_name, number_of_entries=5) -> Tuple[str, str]: """ try: from lollmsvectordb.vector_database import VectorDatabase - from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer + from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer - db = VectorDatabase("", SemanticVectorizer(), TikTokenTokenizer(), number_of_entries) + db = VectorDatabase("", TFIDFVectorizer(), TikTokenTokenizer(), number_of_entries) system_prompts = get_prompts() diff --git a/lollms/personality.py b/lollms/personality.py index cb12088..1b9347c 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -17,8 +17,6 @@ from lollms.utilities import PromptReshaper, PackageManager, discussion_path_to_ from lollms.com import NotificationType, NotificationDisplayType from lollms.client_session import Session, Client from lollmsvectordb.vector_database import VectorDatabase -from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer -from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer from lollmsvectordb.text_document_loader import TextDocumentsLoader from lollmsvectordb.database_elements.document import Document import pkg_resources diff --git a/lollms/server/configs/config.yaml b/lollms/server/configs/config.yaml index 0c751e1..e4b3ace 100644 --- a/lollms/server/configs/config.yaml +++ b/lollms/server/configs/config.yaml @@ -248,7 +248,7 @@ audio_silenceTimer: 5000 # Data vectorization rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data -rag_vectorizer: semantic # possible values semantic, tfidf, openai +rag_vectorizer: tfidf # possible values semantic, tfidf, openai rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk diff --git a/lollms/server/endpoints/lollms_file_system.py b/lollms/server/endpoints/lollms_file_system.py index 703add4..9468d67 100644 --- a/lollms/server/endpoints/lollms_file_system.py +++ b/lollms/server/endpoints/lollms_file_system.py @@ -128,7 +128,6 @@ def select_rag_database(client) -> Optional[Dict[str, Path]]: if not PackageManager.check_package_installed_with_version("lollmsvectordb","0.6.0"): PackageManager.install_or_update("lollmsvectordb") - from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer from lollmsvectordb import VectorDatabase from lollmsvectordb.text_document_loader import TextDocumentsLoader from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer diff --git a/lollms/server/endpoints/lollms_rag.py b/lollms/server/endpoints/lollms_rag.py index c888cbe..913ca65 100644 --- a/lollms/server/endpoints/lollms_rag.py +++ b/lollms/server/endpoints/lollms_rag.py @@ -13,8 +13,6 @@ from typing import List, Optional, Union from pathlib import Path from lollmsvectordb.database_elements.chunk import Chunk from lollmsvectordb.vector_database import VectorDatabase -from lollmsvectordb.lollms_vectorizers.semantic_vectorizer import SemanticVectorizer -from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer import sqlite3 import secrets import time diff --git a/personal_data/configs/lollms_discord_local_config.yaml b/personal_data/configs/lollms_discord_local_config.yaml index 0c751e1..e4b3ace 100644 --- a/personal_data/configs/lollms_discord_local_config.yaml +++ b/personal_data/configs/lollms_discord_local_config.yaml @@ -248,7 +248,7 @@ audio_silenceTimer: 5000 # Data vectorization rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data -rag_vectorizer: semantic # possible values semantic, tfidf, openai +rag_vectorizer: tfidf # possible values semantic, tfidf, openai rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_parameters: null # Parameters of the model in json format rag_chunk_size: 512 # number of tokens per chunk