diff --git a/configs/config.yaml b/configs/config.yaml
index cd9a8b0..822dcf2 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
-version: 55
+version: 56
binding_name: null
model_name: null
@@ -125,7 +125,8 @@ internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select
-
+internet_quick_search: False # If active the search engine will not load and read the webpages
+internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
# Helpers
pdf_latex_path: null
diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml
index cd9a8b0..822dcf2 100644
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
-version: 55
+version: 56
binding_name: null
model_name: null
@@ -125,7 +125,8 @@ internet_vectorization_chunk_size: 512 # chunk size
internet_vectorization_overlap_size: 128 # overlap between chunks size
internet_vectorization_nb_chunks: 2 # number of chunks to use
internet_nb_search_pages: 3 # number of pages to select
-
+internet_quick_search: False # If active the search engine will not load and read the webpages
+internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
# Helpers
pdf_latex_path: null
diff --git a/lollms/internet.py b/lollms/internet.py
index fcf313b..a416961 100644
--- a/lollms/internet.py
+++ b/lollms/internet.py
@@ -33,7 +33,9 @@ def get_relevant_text_block(
url,
driver,
config,
- vectorizer
+ vectorizer,
+ title=None,
+ brief=None
):
from bs4 import BeautifulSoup
# Load the webpage
@@ -50,7 +52,12 @@ def get_relevant_text_block(
all_text = soup.get_text()
# Example: Remove leading/trailing whitespace and multiple consecutive line breaks
- vectorizer.add_document(url,all_text, config.internet_vectorization_chunk_size, config.internet_vectorization_overlap_size)
+ document_id = {
+ 'url':url
+ }
+ document_id["title"] = title
+ document_id["brief"] = brief
+ vectorizer.add_document(document_id,all_text, config.internet_vectorization_chunk_size, config.internet_vectorization_overlap_size)
def extract_results(url, max_num, driver=None):
@@ -112,15 +119,8 @@ def extract_results(url, max_num, driver=None):
pass
return results_list
-def internet_search(query, chromedriver_path, config, model = None):
+def internet_search(query, chromedriver_path, config, model = None, quick_search:bool=False):
"""
- Perform an internet search using the provided query.
-
- Args:
- query (str): The search query.
-
- Returns:
- dict: The search result as a dictionary.
"""
from selenium import webdriver
@@ -153,17 +153,21 @@ def internet_search(query, chromedriver_path, config, model = None):
config.internet_nb_search_pages,
driver
)
+ # Close the browser
+ driver.quit()
+
for i, result in enumerate(results):
title = result["title"]
brief = result["brief"]
href = result["href"]
- get_relevant_text_block(href, driver, config, vectorizer)
+ if quick_search:
+ vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
+ else:
+ get_relevant_text_block(href, driver, config, vectorizer, title, brief)
nb_non_empty += 1
if nb_non_empty>=config.internet_nb_search_pages:
break
vectorizer.index()
- # Close the browser
- driver.quit()
- docs, sorted_similarities = vectorizer.recover_text(query, config.internet_vectorization_nb_chunks)
- return docs, sorted_similarities
+ docs, sorted_similarities, document_ids = vectorizer.recover_text(query, config.internet_vectorization_nb_chunks)
+ return docs, sorted_similarities, document_ids
diff --git a/lollms/personality.py b/lollms/personality.py
index 4575760..c5c6dbb 100644
--- a/lollms/personality.py
+++ b/lollms/personality.py
@@ -327,6 +327,45 @@ Date: {{date}}
if callback:
callback(full_text, MSG_TYPE.MSG_TYPE_FULL_INVISIBLE_TO_USER)
+
+
+ def build_prompt(self, prompt_parts:List[str], sacrifice_id:int=-1, context_size:int=None, minimum_spare_context_size:int=None):
+ """
+ Builds the prompt for code generation.
+
+ Args:
+ prompt_parts (List[str]): A list of strings representing the parts of the prompt.
+ sacrifice_id (int, optional): The ID of the part to sacrifice.
+ context_size (int, optional): The size of the context.
+ minimum_spare_context_size (int, optional): The minimum spare context size.
+
+ Returns:
+ str: The built prompt.
+ """
+ if context_size is None:
+ context_size = self.config.ctx_size
+ if minimum_spare_context_size is None:
+ minimum_spare_context_size = self.config.min_n_predict
+
+ if sacrifice_id == -1 or len(prompt_parts[sacrifice_id])<50:
+ return "\n".join([s for s in prompt_parts if s!=""])
+ else:
+ part_tokens=[]
+ nb_tokens=0
+ for i,part in enumerate(prompt_parts):
+ tk = self.model.tokenize(part)
+ part_tokens.append(tk)
+ if i != sacrifice_id:
+ nb_tokens += len(tk)
+ if len(part_tokens[sacrifice_id])>0:
+ sacrifice_tk = part_tokens[sacrifice_id]
+ sacrifice_tk= sacrifice_tk[-(context_size-nb_tokens-minimum_spare_context_size):]
+ sacrifice_text = self.model.detokenize(sacrifice_tk)
+ else:
+ sacrifice_text = ""
+ prompt_parts[sacrifice_id] = sacrifice_text
+ return "\n".join([s for s in prompt_parts if s!=""])
+
def add_collapsible_entry(self, title, content):
return "\n".join(
[
@@ -343,13 +382,117 @@ Date: {{date}}
f' \n'
])
- def internet_search(self, query ):
+ def internet_search(self, query, quick_search:bool=False):
"""
Do internet search and return the result
"""
from lollms.internet import internet_search
- return internet_search(query, "", self.config, self.model)
+ return internet_search(query, "", self.config, self.model, quick_search=quick_search)
+ def sink(self, s=None,i=None,d=None):
+ pass
+
+ def yes_no(self, question: str, context:str="", max_answer_length: int = 50, conditionning="") -> bool:
+ """
+ Analyzes the user prompt and answers whether it is asking to generate an image.
+
+ Args:
+ question (str): The user's message.
+ max_answer_length (int, optional): The maximum length of the generated answer. Defaults to 50.
+ conditionning: An optional system message to put at the beginning of the prompt
+ Returns:
+ bool: True if the user prompt is asking to generate an image, False otherwise.
+ """
+ return self.multichoice_question(question, ["no","yes"], context, max_answer_length, conditionning=conditionning)>0
+
+ def multichoice_question(self, question: str, possible_answers:list, context:str = "", max_answer_length: int = 50, conditionning="") -> int:
+ """
+ Interprets a multi-choice question from a users response. This function expects only one choice as true. All other choices are considered false. If none are correct, returns -1.
+
+ Args:
+ question (str): The multi-choice question posed by the user.
+ possible_ansers (List[Any]): A list containing all valid options for the chosen value. For each item in the list, either 'True', 'False', None or another callable should be passed which will serve as the truth test function when checking against the actual user input.
+ max_answer_length (int, optional): Maximum string length allowed while interpreting the users' responses. Defaults to 50.
+ conditionning: An optional system message to put at the beginning of the prompt
+
+ Returns:
+ int: Index of the selected option within the possible_ansers list. Or -1 if there was not match found among any of them.
+ """
+ choices = "\n".join([f"{i}. {possible_answer}" for i, possible_answer in enumerate(possible_answers)])
+ elements = [conditionning] if conditionning!="" else []
+ elements += [
+ "!@>instructions:",
+ "Answer this multi choices question.",
+ "Answer with an id from the possible answers.",
+ "Do not answer with an id outside this possible answers.",
+ ]
+ if context!="":
+ elements+=[
+ "!@>Context:",
+ f"{context}",
+ ]
+ elements += [
+ f"!@>question: {question}",
+ "!@>possible answers:",
+ f"{choices}",
+ ]
+ elements += ["!@>answer:"]
+ prompt = self.build_prompt(elements)
+
+ gen = self.generate(prompt, max_answer_length, temperature=0.1, top_k=50, top_p=0.9, repeat_penalty=1.0, repeat_last_n=50, callback=self.sink).strip().replace("","").replace("","")
+ selection = gen.strip().split()[0].replace(",","").replace(".","")
+ self.print_prompt("Multi choice selection",prompt+gen)
+ try:
+ return int(selection)
+ except:
+ ASCIIColors.cyan("Model failed to answer the question")
+ return -1
+
+ def multichoice_ranking(self, question: str, possible_answers:list, context:str = "", max_answer_length: int = 50, conditionning="") -> int:
+ """
+ Ranks answers for a question from best to worst. returns a list of integers
+
+ Args:
+ question (str): The multi-choice question posed by the user.
+ possible_ansers (List[Any]): A list containing all valid options for the chosen value. For each item in the list, either 'True', 'False', None or another callable should be passed which will serve as the truth test function when checking against the actual user input.
+ max_answer_length (int, optional): Maximum string length allowed while interpreting the users' responses. Defaults to 50.
+ conditionning: An optional system message to put at the beginning of the prompt
+
+ Returns:
+ int: Index of the selected option within the possible_ansers list. Or -1 if there was not match found among any of them.
+ """
+ choices = "\n".join([f"{i}. {possible_answer}" for i, possible_answer in enumerate(possible_answers)])
+ elements = [conditionning] if conditionning!="" else []
+ elements += [
+ "!@>instructions:",
+ "Answer this multi choices question.",
+ "Answer with an id from the possible answers.",
+ "Do not answer with an id outside this possible answers.",
+ f"!@>question: {question}",
+ "!@>possible answers:",
+ f"{choices}",
+ ]
+ if context!="":
+ elements+=[
+ "!@>Context:",
+ f"{context}",
+ ]
+
+ elements += ["!@>answer:"]
+ prompt = self.build_prompt(elements)
+
+ gen = self.generate(prompt, max_answer_length, temperature=0.1, top_k=50, top_p=0.9, repeat_penalty=1.0, repeat_last_n=50).strip().replace("","").replace("","")
+ self.print_prompt("Multi choice ranking",prompt+gen)
+ if gen.index("]")>=0:
+ try:
+ ranks = eval(gen.split("]")[0]+"]")
+ return ranks
+ except:
+ ASCIIColors.red("Model failed to rank inputs")
+ return None
+ else:
+ ASCIIColors.red("Model failed to rank inputs")
+ return None
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
"""This triggers a step start
@@ -2093,12 +2236,13 @@ class APScript(StateMachine):
f' \n'
])
- def internet_search(self, query ):
+ def internet_search(self, query, quick_search:bool=False ):
"""
Do internet search and return the result
"""
- return self.personality.internet_search(query)
-
+ return self.personality.internet_search(query, quick_search=quick_search)
+
+
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
"""This triggers a step start