mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 04:37:54 +00:00
synced with new safestore
This commit is contained in:
parent
eda4d02626
commit
7551e7fa02
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 55
|
||||
version: 56
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -125,7 +125,8 @@ internet_vectorization_chunk_size: 512 # chunk size
|
||||
internet_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
internet_nb_search_pages: 3 # number of pages to select
|
||||
|
||||
internet_quick_search: False # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 55
|
||||
version: 56
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -125,7 +125,8 @@ internet_vectorization_chunk_size: 512 # chunk size
|
||||
internet_vectorization_overlap_size: 128 # overlap between chunks size
|
||||
internet_vectorization_nb_chunks: 2 # number of chunks to use
|
||||
internet_nb_search_pages: 3 # number of pages to select
|
||||
|
||||
internet_quick_search: False # If active the search engine will not load and read the webpages
|
||||
internet_activate_search_decision: False # If active the ai decides by itself if it needs to do search
|
||||
# Helpers
|
||||
pdf_latex_path: null
|
||||
|
||||
|
@ -33,7 +33,9 @@ def get_relevant_text_block(
|
||||
url,
|
||||
driver,
|
||||
config,
|
||||
vectorizer
|
||||
vectorizer,
|
||||
title=None,
|
||||
brief=None
|
||||
):
|
||||
from bs4 import BeautifulSoup
|
||||
# Load the webpage
|
||||
@ -50,7 +52,12 @@ def get_relevant_text_block(
|
||||
|
||||
all_text = soup.get_text()
|
||||
# Example: Remove leading/trailing whitespace and multiple consecutive line breaks
|
||||
vectorizer.add_document(url,all_text, config.internet_vectorization_chunk_size, config.internet_vectorization_overlap_size)
|
||||
document_id = {
|
||||
'url':url
|
||||
}
|
||||
document_id["title"] = title
|
||||
document_id["brief"] = brief
|
||||
vectorizer.add_document(document_id,all_text, config.internet_vectorization_chunk_size, config.internet_vectorization_overlap_size)
|
||||
|
||||
|
||||
def extract_results(url, max_num, driver=None):
|
||||
@ -112,15 +119,8 @@ def extract_results(url, max_num, driver=None):
|
||||
pass
|
||||
return results_list
|
||||
|
||||
def internet_search(query, chromedriver_path, config, model = None):
|
||||
def internet_search(query, chromedriver_path, config, model = None, quick_search:bool=False):
|
||||
"""
|
||||
Perform an internet search using the provided query.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
|
||||
Returns:
|
||||
dict: The search result as a dictionary.
|
||||
"""
|
||||
|
||||
from selenium import webdriver
|
||||
@ -153,17 +153,21 @@ def internet_search(query, chromedriver_path, config, model = None):
|
||||
config.internet_nb_search_pages,
|
||||
driver
|
||||
)
|
||||
# Close the browser
|
||||
driver.quit()
|
||||
|
||||
for i, result in enumerate(results):
|
||||
title = result["title"]
|
||||
brief = result["brief"]
|
||||
href = result["href"]
|
||||
get_relevant_text_block(href, driver, config, vectorizer)
|
||||
if quick_search:
|
||||
vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
|
||||
else:
|
||||
get_relevant_text_block(href, driver, config, vectorizer, title, brief)
|
||||
nb_non_empty += 1
|
||||
if nb_non_empty>=config.internet_nb_search_pages:
|
||||
break
|
||||
vectorizer.index()
|
||||
|
||||
# Close the browser
|
||||
driver.quit()
|
||||
docs, sorted_similarities = vectorizer.recover_text(query, config.internet_vectorization_nb_chunks)
|
||||
return docs, sorted_similarities
|
||||
docs, sorted_similarities, document_ids = vectorizer.recover_text(query, config.internet_vectorization_nb_chunks)
|
||||
return docs, sorted_similarities, document_ids
|
||||
|
@ -327,6 +327,45 @@ Date: {{date}}
|
||||
|
||||
if callback:
|
||||
callback(full_text, MSG_TYPE.MSG_TYPE_FULL_INVISIBLE_TO_USER)
|
||||
|
||||
|
||||
def build_prompt(self, prompt_parts:List[str], sacrifice_id:int=-1, context_size:int=None, minimum_spare_context_size:int=None):
|
||||
"""
|
||||
Builds the prompt for code generation.
|
||||
|
||||
Args:
|
||||
prompt_parts (List[str]): A list of strings representing the parts of the prompt.
|
||||
sacrifice_id (int, optional): The ID of the part to sacrifice.
|
||||
context_size (int, optional): The size of the context.
|
||||
minimum_spare_context_size (int, optional): The minimum spare context size.
|
||||
|
||||
Returns:
|
||||
str: The built prompt.
|
||||
"""
|
||||
if context_size is None:
|
||||
context_size = self.config.ctx_size
|
||||
if minimum_spare_context_size is None:
|
||||
minimum_spare_context_size = self.config.min_n_predict
|
||||
|
||||
if sacrifice_id == -1 or len(prompt_parts[sacrifice_id])<50:
|
||||
return "\n".join([s for s in prompt_parts if s!=""])
|
||||
else:
|
||||
part_tokens=[]
|
||||
nb_tokens=0
|
||||
for i,part in enumerate(prompt_parts):
|
||||
tk = self.model.tokenize(part)
|
||||
part_tokens.append(tk)
|
||||
if i != sacrifice_id:
|
||||
nb_tokens += len(tk)
|
||||
if len(part_tokens[sacrifice_id])>0:
|
||||
sacrifice_tk = part_tokens[sacrifice_id]
|
||||
sacrifice_tk= sacrifice_tk[-(context_size-nb_tokens-minimum_spare_context_size):]
|
||||
sacrifice_text = self.model.detokenize(sacrifice_tk)
|
||||
else:
|
||||
sacrifice_text = ""
|
||||
prompt_parts[sacrifice_id] = sacrifice_text
|
||||
return "\n".join([s for s in prompt_parts if s!=""])
|
||||
|
||||
def add_collapsible_entry(self, title, content):
|
||||
return "\n".join(
|
||||
[
|
||||
@ -343,13 +382,117 @@ Date: {{date}}
|
||||
f' </details>\n'
|
||||
])
|
||||
|
||||
def internet_search(self, query ):
|
||||
def internet_search(self, query, quick_search:bool=False):
|
||||
"""
|
||||
Do internet search and return the result
|
||||
"""
|
||||
from lollms.internet import internet_search
|
||||
return internet_search(query, "", self.config, self.model)
|
||||
return internet_search(query, "", self.config, self.model, quick_search=quick_search)
|
||||
|
||||
def sink(self, s=None,i=None,d=None):
|
||||
pass
|
||||
|
||||
def yes_no(self, question: str, context:str="", max_answer_length: int = 50, conditionning="") -> bool:
|
||||
"""
|
||||
Analyzes the user prompt and answers whether it is asking to generate an image.
|
||||
|
||||
Args:
|
||||
question (str): The user's message.
|
||||
max_answer_length (int, optional): The maximum length of the generated answer. Defaults to 50.
|
||||
conditionning: An optional system message to put at the beginning of the prompt
|
||||
Returns:
|
||||
bool: True if the user prompt is asking to generate an image, False otherwise.
|
||||
"""
|
||||
return self.multichoice_question(question, ["no","yes"], context, max_answer_length, conditionning=conditionning)>0
|
||||
|
||||
def multichoice_question(self, question: str, possible_answers:list, context:str = "", max_answer_length: int = 50, conditionning="") -> int:
|
||||
"""
|
||||
Interprets a multi-choice question from a users response. This function expects only one choice as true. All other choices are considered false. If none are correct, returns -1.
|
||||
|
||||
Args:
|
||||
question (str): The multi-choice question posed by the user.
|
||||
possible_ansers (List[Any]): A list containing all valid options for the chosen value. For each item in the list, either 'True', 'False', None or another callable should be passed which will serve as the truth test function when checking against the actual user input.
|
||||
max_answer_length (int, optional): Maximum string length allowed while interpreting the users' responses. Defaults to 50.
|
||||
conditionning: An optional system message to put at the beginning of the prompt
|
||||
|
||||
Returns:
|
||||
int: Index of the selected option within the possible_ansers list. Or -1 if there was not match found among any of them.
|
||||
"""
|
||||
choices = "\n".join([f"{i}. {possible_answer}" for i, possible_answer in enumerate(possible_answers)])
|
||||
elements = [conditionning] if conditionning!="" else []
|
||||
elements += [
|
||||
"!@>instructions:",
|
||||
"Answer this multi choices question.",
|
||||
"Answer with an id from the possible answers.",
|
||||
"Do not answer with an id outside this possible answers.",
|
||||
]
|
||||
if context!="":
|
||||
elements+=[
|
||||
"!@>Context:",
|
||||
f"{context}",
|
||||
]
|
||||
elements += [
|
||||
f"!@>question: {question}",
|
||||
"!@>possible answers:",
|
||||
f"{choices}",
|
||||
]
|
||||
elements += ["!@>answer:"]
|
||||
prompt = self.build_prompt(elements)
|
||||
|
||||
gen = self.generate(prompt, max_answer_length, temperature=0.1, top_k=50, top_p=0.9, repeat_penalty=1.0, repeat_last_n=50, callback=self.sink).strip().replace("</s>","").replace("<s>","")
|
||||
selection = gen.strip().split()[0].replace(",","").replace(".","")
|
||||
self.print_prompt("Multi choice selection",prompt+gen)
|
||||
try:
|
||||
return int(selection)
|
||||
except:
|
||||
ASCIIColors.cyan("Model failed to answer the question")
|
||||
return -1
|
||||
|
||||
def multichoice_ranking(self, question: str, possible_answers:list, context:str = "", max_answer_length: int = 50, conditionning="") -> int:
|
||||
"""
|
||||
Ranks answers for a question from best to worst. returns a list of integers
|
||||
|
||||
Args:
|
||||
question (str): The multi-choice question posed by the user.
|
||||
possible_ansers (List[Any]): A list containing all valid options for the chosen value. For each item in the list, either 'True', 'False', None or another callable should be passed which will serve as the truth test function when checking against the actual user input.
|
||||
max_answer_length (int, optional): Maximum string length allowed while interpreting the users' responses. Defaults to 50.
|
||||
conditionning: An optional system message to put at the beginning of the prompt
|
||||
|
||||
Returns:
|
||||
int: Index of the selected option within the possible_ansers list. Or -1 if there was not match found among any of them.
|
||||
"""
|
||||
choices = "\n".join([f"{i}. {possible_answer}" for i, possible_answer in enumerate(possible_answers)])
|
||||
elements = [conditionning] if conditionning!="" else []
|
||||
elements += [
|
||||
"!@>instructions:",
|
||||
"Answer this multi choices question.",
|
||||
"Answer with an id from the possible answers.",
|
||||
"Do not answer with an id outside this possible answers.",
|
||||
f"!@>question: {question}",
|
||||
"!@>possible answers:",
|
||||
f"{choices}",
|
||||
]
|
||||
if context!="":
|
||||
elements+=[
|
||||
"!@>Context:",
|
||||
f"{context}",
|
||||
]
|
||||
|
||||
elements += ["!@>answer:"]
|
||||
prompt = self.build_prompt(elements)
|
||||
|
||||
gen = self.generate(prompt, max_answer_length, temperature=0.1, top_k=50, top_p=0.9, repeat_penalty=1.0, repeat_last_n=50).strip().replace("</s>","").replace("<s>","")
|
||||
self.print_prompt("Multi choice ranking",prompt+gen)
|
||||
if gen.index("]")>=0:
|
||||
try:
|
||||
ranks = eval(gen.split("]")[0]+"]")
|
||||
return ranks
|
||||
except:
|
||||
ASCIIColors.red("Model failed to rank inputs")
|
||||
return None
|
||||
else:
|
||||
ASCIIColors.red("Model failed to rank inputs")
|
||||
return None
|
||||
|
||||
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
|
||||
"""This triggers a step start
|
||||
@ -2093,11 +2236,12 @@ class APScript(StateMachine):
|
||||
f' </details>\n'
|
||||
])
|
||||
|
||||
def internet_search(self, query ):
|
||||
def internet_search(self, query, quick_search:bool=False ):
|
||||
"""
|
||||
Do internet search and return the result
|
||||
"""
|
||||
return self.personality.internet_search(query)
|
||||
return self.personality.internet_search(query, quick_search=quick_search)
|
||||
|
||||
|
||||
def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None):
|
||||
"""This triggers a step start
|
||||
|
Loading…
Reference in New Issue
Block a user