upgraded vector db

This commit is contained in:
Saifeddine ALOUI 2024-06-26 01:27:11 +02:00
parent 09d80f12da
commit cb32fcb474
5 changed files with 37 additions and 8 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 118
version: 120
binding_name: null
model_name: null
model_variant: null
@ -249,6 +249,8 @@ rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

View File

@ -860,6 +860,7 @@ class LollmsApplication(LoLLMsCom):
Returns:
Tuple[str, str, List[str]]: The prepared query, original message content, and tokenized query.
"""
documentation_entries = []
start_header_id_template = self.config.start_header_id_template
end_header_id_template = self.config.end_header_id_template
@ -1039,7 +1040,7 @@ class LollmsApplication(LoLLMsCom):
])
query = self.personality.fast_gen(q, max_generation_size=256, show_progress=True, callback=self.personality.sink)
self.personality.step_end("Building vector store query")
ASCIIColors.cyan(f"Query: {query}")
ASCIIColors.magenta(f"Query: {query}")
self.personality.step(f"Query: {query}")
else:
query = current_message.content
@ -1058,6 +1059,7 @@ class LollmsApplication(LoLLMsCom):
results+=r
n_neighbors = self.active_rag_dbs[0]["vectorizer"].n_neighbors
sorted_results = sorted(results, key=lambda x: x.distance)[:n_neighbors]
for chunk in sorted_results:
document_infos = f"{separator_template}".join([
f"{start_header_id_template}document chunk{end_header_id_template}",
@ -1065,7 +1067,13 @@ class LollmsApplication(LoLLMsCom):
f"source_document_path:{chunk.doc.path}",
f"content:\n{chunk.text}\n"
])
documentation_entries.append({
"document_title":chunk.doc.title,
"document_path":chunk.doc.path,
"chunk_content":chunk.text,
"chunk_size":chunk.nb_tokens,
"distance":chunk.distance,
})
documentation += document_infos
if (len(client.discussion.text_files) > 0) and client.discussion.vectorizer is not None:
@ -1300,6 +1308,7 @@ class LollmsApplication(LoLLMsCom):
"internet_search_infos":internet_search_infos,
"internet_search_results":internet_search_results,
"documentation":documentation,
"documentation_entries":documentation_entries,
"knowledge":knowledge,
"knowledge_infos":knowledge_infos,
"user_description":user_description,
@ -1311,7 +1320,8 @@ class LollmsApplication(LoLLMsCom):
"ai_prefix":ai_prefix,
"extra":""
}
if self.config.debug:
ASCIIColors.hilight(documentation,"source_document_title", ASCIIColors.color_yellow, ASCIIColors.color_red, False)
# Return the prepared query, original message content, and tokenized query
return prompt_data, current_message.content, tokens, context_details, internet_search_infos

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 118
version: 120
binding_name: null
model_name: null
model_variant: null
@ -249,6 +249,8 @@ rag_clean_chunks: true #Removed all uinecessary spaces and line returns
rag_follow_subfolders: true #if true the vectorizer will vectorize the content of subfolders too
rag_check_new_files_at_startup: false #if true, the vectorizer will automatically check for any new files in the folder and adds it to the database
rag_preprocess_chunks: false #if true, an LLM will preprocess the content of the chunk before writing it in a simple format
rag_activate_multi_hops: false #if true, we use multi hops algorithm to do multiple researches until the AI has enough data
rag_min_nb_tokens_in_chunk: 10 #this removed any useless junk ith less than x tokens
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database

View File

@ -315,6 +315,19 @@ class AIPersonality:
if callback:
callback(full_text, MSG_TYPE.MSG_TYPE_FULL)
def ui(self, ui_text:str, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
"""This sends ui text to front end
Args:
ui_text (dict): The ui code to be sent to the front end
callback (callable, optional): A callable with this signature (str, MSG_TYPE) to send the text to. Defaults to None.
"""
if not callback and self.callback:
callback = self.callback
if callback:
callback(ui_text, MSG_TYPE.MSG_TYPE_UI)
def full_invisible_to_ai(self, full_text:str, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
"""This sends full text to front end (INVISIBLE to AI)
@ -690,8 +703,6 @@ class AIPersonality:
# TODO : add show progress
gen = self.generate(prompt, max_generation_size, temperature = temperature, top_k = top_k, top_p=top_p, repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, callback=callback, show_progress=show_progress).strip().replace("</s>", "").replace("<s>", "")
if debug:
self.print_prompt("prompt", prompt+gen)
return gen
@ -767,6 +778,9 @@ class AIPersonality:
repeat_penalty=self.model_repeat_penalty if repeat_penalty is None else repeat_penalty,
repeat_last_n = self.model_repeat_last_n if repeat_last_n is None else repeat_last_n,
).strip()
if debug:
self.print_prompt("prompt", prompt+self.bot_says)
return self.bot_says
def setCallback(self, callback: Callable[[str, MSG_TYPE, dict, list], bool]):

View File

@ -678,9 +678,9 @@ async def set_active_personality_settings(request: Request):
class PersonalityInfos(BaseModel):
client_id: str
category:str
name:str
language:Optional[str] = None
@router.post("/copy_to_custom_personas")
async def copy_to_custom_personas(data: PersonalityInfos):
@ -689,6 +689,7 @@ async def copy_to_custom_personas(data: PersonalityInfos):
"""
import shutil
client = check_access(lollmsElfServer, data.client_id)
category = sanitize_path(data.category)
name = sanitize_path(data.name)