mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 20:57:58 +00:00
moved skills database to the new system
This commit is contained in:
parent
0e176402e3
commit
488229b912
@ -263,7 +263,7 @@ class LollmsApplication(LoLLMsCom):
|
||||
|
||||
|
||||
def _generate_text(self, prompt):
|
||||
max_tokens = self.config.ctx_size - self.model.get_nb_tokens(prompt)
|
||||
max_tokens = min(self.config.ctx_size - self.model.get_nb_tokens(prompt),self.config.max_n_predict)
|
||||
generated_text = self.model.generate(prompt, max_tokens)
|
||||
return generated_text.strip()
|
||||
|
||||
|
@ -1,11 +1,15 @@
|
||||
import sqlite3
|
||||
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
|
||||
from lollmsvectordb import VectorDatabase, BERTVectorizer
|
||||
from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
|
||||
import numpy as np
|
||||
from ascii_colors import ASCIIColors
|
||||
class SkillsLibrary:
|
||||
|
||||
def __init__(self, db_path):
|
||||
def __init__(self, db_path, model_name: str = 'bert-base-nli-mean-tokens', chunk_size:int=512, overlap:int=0, n_neighbors:int=5):
|
||||
self.db_path =db_path
|
||||
self._initialize_db()
|
||||
self.vectorizer = VectorDatabase("", BERTVectorizer(), TikTokenTokenizer(),chunk_size, overlap, n_neighbors)
|
||||
ASCIIColors.green("Vecorizer ready")
|
||||
|
||||
|
||||
def _initialize_db(self):
|
||||
@ -121,37 +125,38 @@ class SkillsLibrary:
|
||||
return res
|
||||
|
||||
def query_vector_db(self, query_, top_k=3, max_dist=1000):
|
||||
vectorizer = TextVectorizer(VectorizationMethod.TFIDF_VECTORIZER)
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
cursor = conn.cursor()
|
||||
# Use direct string concatenation for the MATCH expression.
|
||||
# Ensure text is safely escaped to avoid SQL injection.
|
||||
query = "SELECT id, title FROM skills_library"
|
||||
query = "SELECT id, title, content FROM skills_library"
|
||||
cursor.execute(query)
|
||||
res = cursor.fetchall()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
skills = []
|
||||
skill_titles = []
|
||||
if len(res)>0:
|
||||
for entry in res:
|
||||
vectorizer.add_document(entry[0],entry[1])
|
||||
vectorizer.index()
|
||||
self.vectorizer.add_document(entry[0],"Title:"+entry[1]+"\n"+entry[2])
|
||||
self.vectorizer.build_index()
|
||||
|
||||
skill_titles, sorted_similarities, document_ids = vectorizer.recover_text(query_, top_k)
|
||||
for skill_title, sim, id in zip(skill_titles, sorted_similarities, document_ids):
|
||||
if np.linalg.norm(sim[1])<max_dist:
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
cursor = conn.cursor()
|
||||
chunks = self.vectorizer.search(query_, top_k)
|
||||
for chunk in chunks:
|
||||
if chunk.distance<max_dist:
|
||||
skills.append(chunk.text)
|
||||
skill_titles.append(chunk.doc.title)
|
||||
# conn = sqlite3.connect(self.db_path)
|
||||
# cursor = conn.cursor()
|
||||
# Use direct string concatenation for the MATCH expression.
|
||||
# Ensure text is safely escaped to avoid SQL injection.
|
||||
query = "SELECT content FROM skills_library WHERE id = ?"
|
||||
cursor.execute(query, (id,))
|
||||
res = cursor.fetchall()
|
||||
skills.append(res[0])
|
||||
cursor.close()
|
||||
conn.close()
|
||||
else:
|
||||
skill_titles = []
|
||||
#query = "SELECT content FROM skills_library WHERE id = ?"
|
||||
#cursor.execute(query, (chunk.chunk_id,))
|
||||
#res = cursor.fetchall()
|
||||
#skills.append(res[0])
|
||||
#cursor.close()
|
||||
#conn.close()
|
||||
|
||||
return skill_titles, skills
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user