synced with new safestore

This commit is contained in:
Saifeddine ALOUI 2024-02-10 21:35:20 +01:00
parent eda4d02626
commit 7551e7fa02
4 changed files with 174 additions and 24 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file =========================== # =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 55 version: 56
binding_name: null binding_name: null
model_name: null model_name: null
@ -125,7 +125,8 @@ internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select internet_nb_search_pages: 3 # number of pages to select
internet_quick_search: False # If active the search engine will not load and read the webpages
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
# Helpers # Helpers
pdf_latex_path: null pdf_latex_path: null

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file =========================== # =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 55 version: 56
binding_name: null binding_name: null
model_name: null model_name: null
@ -125,7 +125,8 @@ internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select internet_nb_search_pages: 3 # number of pages to select
internet_quick_search: False # If active the search engine will not load and read the webpages
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
# Helpers # Helpers
pdf_latex_path: null pdf_latex_path: null

View File

@ -33,7 +33,9 @@ def get_relevant_text_block(
url, url,
driver, driver,
config, config,
vectorizer vectorizer,
title=None,
brief=None
): ):
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# Load the webpage # Load the webpage
@ -50,7 +52,12 @@ def get_relevant_text_block(
all_text = soup.get_text() all_text = soup.get_text()
# Example: Remove leading/trailing whitespace and multiple consecutive line breaks # Example: Remove leading/trailing whitespace and multiple consecutive line breaks
vectorizer.add_document(url,all_text, config.internet_vectorization_chunk_size, config.internet_vectorization_overlap_size) document_id = {
'url':url
}
document_id["title"] = title
document_id["brief"] = brief
vectorizer.add_document(document_id,all_text, config.internet_vectorization_chunk_size, config.internet_vectorization_overlap_size)
def extract_results(url, max_num, driver=None): def extract_results(url, max_num, driver=None):
@ -112,15 +119,8 @@ def extract_results(url, max_num, driver=None):
pass pass
return results_list return results_list
def internet_search(query, chromedriver_path, config, model = None): def internet_search(query, chromedriver_path, config, model = None, quick_search:bool=False):
""" """
Perform an internet search using the provided query.
Args:
query (str): The search query.
Returns:
dict: The search result as a dictionary.
""" """
from selenium import webdriver from selenium import webdriver
@ -153,17 +153,21 @@ def internet_search(query, chromedriver_path, config, model = None):
config.internet_nb_search_pages, config.internet_nb_search_pages,
driver driver
) )
# Close the browser
driver.quit()
for i, result in enumerate(results): for i, result in enumerate(results):
title = result["title"] title = result["title"]
brief = result["brief"] brief = result["brief"]
href = result["href"] href = result["href"]
get_relevant_text_block(href, driver, config, vectorizer) if quick_search:
vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
else:
get_relevant_text_block(href, driver, config, vectorizer, title, brief)
nb_non_empty += 1 nb_non_empty += 1
if nb_non_empty>=config.internet_nb_search_pages: if nb_non_empty>=config.internet_nb_search_pages:
break break
vectorizer.index() vectorizer.index()
# Close the browser docs, sorted_similarities, document_ids = vectorizer.recover_text(query, config.internet_vectorization_nb_chunks)
driver.quit() return docs, sorted_similarities, document_ids
docs, sorted_similarities = vectorizer.recover_text(query, config.internet_vectorization_nb_chunks)
return docs, sorted_similarities

View File

@ -327,6 +327,45 @@ Date: {{date}}
if callback: if callback:
callback(full_text, MSG_TYPE.MSG_TYPE_FULL_INVISIBLE_TO_USER) callback(full_text, MSG_TYPE.MSG_TYPE_FULL_INVISIBLE_TO_USER)
def build_prompt(self, prompt_parts:List[str], sacrifice_id:int=-1, context_size:int=None, minimum_spare_context_size:int=None):
"""
Builds the prompt for code generation.
Args:
prompt_parts (List[str]): A list of strings representing the parts of the prompt.
sacrifice_id (int, optional): The ID of the part to sacrifice.
context_size (int, optional): The size of the context.
minimum_spare_context_size (int, optional): The minimum spare context size.
Returns:
str: The built prompt.
"""
if context_size is None:
context_size = self.config.ctx_size
if minimum_spare_context_size is None:
minimum_spare_context_size = self.config.min_n_predict
if sacrifice_id == -1 or len(prompt_parts[sacrifice_id])<50:
return "\n".join([s for s in prompt_parts if s!=""])
else:
part_tokens=[]
nb_tokens=0
for i,part in enumerate(prompt_parts):
tk = self.model.tokenize(part)
part_tokens.append(tk)
if i != sacrifice_id:
nb_tokens += len(tk)
if len(part_tokens[sacrifice_id])>0:
sacrifice_tk = part_tokens[sacrifice_id]
sacrifice_tk= sacrifice_tk[-(context_size-nb_tokens-minimum_spare_context_size):]
sacrifice_text = self.model.detokenize(sacrifice_tk)
else:
sacrifice_text = ""
prompt_parts[sacrifice_id] = sacrifice_text
return "\n".join([s for s in prompt_parts if s!=""])
def add_collapsible_entry(self, title, content): def add_collapsible_entry(self, title, content):
return "\n".join( return "\n".join(
[ [
@ -343,13 +382,117 @@ Date: {{date}}
f' </details>\n' f' </details>\n'
]) ])
def internet_search(self, query ): def internet_search(self, query, quick_search:bool=False):
""" """
Do internet search and return the result Do internet search and return the result
""" """
from lollms.internet import internet_search from lollms.internet import internet_search
return internet_search(query, "", self.config, self.model) return internet_search(query, "", self.config, self.model, quick_search=quick_search)
def sink(self, s=None,i=None,d=None):
pass
def yes_no(self, question: str, context:str="", max_answer_length: int = 50, conditionning="") -> bool:
"""
Analyzes the user prompt and answers whether it is asking to generate an image.
Args:
question (str): The user's message.
max_answer_length (int, optional): The maximum length of the generated answer. Defaults to 50.
conditionning: An optional system message to put at the beginning of the prompt
Returns:
bool: True if the user prompt is asking to generate an image, False otherwise.
"""
return self.multichoice_question(question, ["no","yes"], context, max_answer_length, conditionning=conditionning)>0
def multichoice_question(self, question: str, possible_answers:list, context:str = "", max_answer_length: int = 50, conditionning="") -> int:
"""
Interprets a multi-choice question from a users response. This function expects only one choice as true. All other choices are considered false. If none are correct, returns -1.
Args:
question (str): The multi-choice question posed by the user.
possible_ansers (List[Any]): A list containing all valid options for the chosen value. For each item in the list, either 'True', 'False', None or another callable should be passed which will serve as the truth test function when checking against the actual user input.
max_answer_length (int, optional): Maximum string length allowed while interpreting the users' responses. Defaults to 50.
conditionning: An optional system message to put at the beginning of the prompt
Returns:
int: Index of the selected option within the possible_ansers list. Or -1 if there was not match found among any of them.
"""
choices = "\n".join([f"{i}. {possible_answer}" for i, possible_answer in enumerate(possible_answers)])
elements = [conditionning] if conditionning!="" else []
elements += [
"!@>instructions:",
"Answer this multi choices question.",
"Answer with an id from the possible answers.",
"Do not answer with an id outside this possible answers.",
]
if context!="":
elements+=[
"!@>Context:",
f"{context}",
]
elements += [
f"!@>question: {question}",
"!@>possible answers:",
f"{choices}",
]
elements += ["!@>answer:"]
prompt = self.build_prompt(elements)
gen = self.generate(prompt, max_answer_length, temperature=0.1, top_k=50, top_p=0.9, repeat_penalty=1.0, repeat_last_n=50, callback=self.sink).strip().replace("</s>","").replace("<s>","")
selection = gen.strip().split()[0].replace(",","").replace(".","")
self.print_prompt("Multi choice selection",prompt+gen)
try:
return int(selection)
except:
ASCIIColors.cyan("Model failed to answer the question")
return -1
def multichoice_ranking(self, question: str, possible_answers:list, context:str = "", max_answer_length: int = 50, conditionning="") -> int:
"""
Ranks answers for a question from best to worst. returns a list of integers
Args:
question (str): The multi-choice question posed by the user.
possible_ansers (List[Any]): A list containing all valid options for the chosen value. For each item in the list, either 'True', 'False', None or another callable should be passed which will serve as the truth test function when checking against the actual user input.
max_answer_length (int, optional): Maximum string length allowed while interpreting the users' responses. Defaults to 50.
conditionning: An optional system message to put at the beginning of the prompt
Returns:
int: Index of the selected option within the possible_ansers list. Or -1 if there was not match found among any of them.
"""
choices = "\n".join([f"{i}. {possible_answer}" for i, possible_answer in enumerate(possible_answers)])
elements = [conditionning] if conditionning!="" else []
elements += [
"!@>instructions:",
"Answer this multi choices question.",
"Answer with an id from the possible answers.",
"Do not answer with an id outside this possible answers.",
f"!@>question: {question}",
"!@>possible answers:",
f"{choices}",
]
if context!="":
elements+=[
"!@>Context:",
f"{context}",
]
elements += ["!@>answer:"]
prompt = self.build_prompt(elements)
gen = self.generate(prompt, max_answer_length, temperature=0.1, top_k=50, top_p=0.9, repeat_penalty=1.0, repeat_last_n=50).strip().replace("</s>","").replace("<s>","")
self.print_prompt("Multi choice ranking",prompt+gen)
if gen.index("]")>=0:
try:
ranks = eval(gen.split("]")[0]+"]")
return ranks
except:
ASCIIColors.red("Model failed to rank inputs")
return None
else:
ASCIIColors.red("Model failed to rank inputs")
return None
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None): def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
"""This triggers a step start """This triggers a step start
@ -2093,11 +2236,12 @@ class APScript(StateMachine):
f' </details>\n' f' </details>\n'
]) ])
def internet_search(self, query ): def internet_search(self, query, quick_search:bool=False ):
""" """
Do internet search and return the result Do internet search and return the result
""" """
return self.personality.internet_search(query) return self.personality.internet_search(query, quick_search=quick_search)
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None): def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
"""This triggers a step start """This triggers a step start