From 03072a36cef484a6ad0616b49b4fcf24e0b433f1 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Sat, 11 May 2024 11:29:47 +0200 Subject: [PATCH] Enhanced internet skills --- lollms/app.py | 4 +-- lollms/internet.py | 87 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 62 insertions(+), 29 deletions(-) diff --git a/lollms/app.py b/lollms/app.py index 1328c1a..5524b87 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -697,7 +697,7 @@ class LollmsApplication(LoLLMsCom): need=True if need: self.personality.step_start("Crafting internet search query") - query = self.personality.fast_gen(f"!@>discussion:\n{discussion[-2048:]}\n!@>system: Read the discussion and craft a web search query suited to recover needed information to reply to last {self.config.user_name} message.\nDo not answer the prompt. Do not add explanations.\n!@>current date: {datetime.now()}!@>websearch query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink) + query = self.personality.fast_gen(f"!@>discussion:\n{discussion[-2048:]}\n!@>system: Read the discussion and craft a web search query suited to recover needed information to reply to last {self.config.user_name} message.\nDo not answer the prompt. Do not add explanations.\n!@>current date: {datetime.now()}\n!@>websearch query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink) self.personality.step_end("Crafting internet search query") self.personality.step(f"web search query: {query}") @@ -706,7 +706,7 @@ class LollmsApplication(LoLLMsCom): else: self.personality.step_start("Performing Internet search (advanced mode: slower but more advanced)") - internet_search_results=f"!@>important information: Use the internet search results data to answer {self.config.user_name}'s last message. It is strictly forbidden to give the user an answer without having actual proof from the documentation.\n!@>Web search results:\n" + internet_search_results=f"!@>instructions: Use the internet search results data to answer {self.config.user_name}. Try to extract information from the websearch and use it to perform the requested task or answer the question. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.\n!@>Web search results:\n" docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search) for doc, infos,document_id in zip(docs, sorted_similarities, document_ids): diff --git a/lollms/internet.py b/lollms/internet.py index 81eed13..4c37729 100644 --- a/lollms/internet.py +++ b/lollms/internet.py @@ -148,40 +148,73 @@ def scrape_and_save(url, file_path=None, lollms_com=None, chromedriver_path=None return text_content + def get_relevant_text_block( - url, - driver, - internet_vectorization_chunk_size, internet_vectorization_overlap_size, - vectorizer, - title=None, - brief=None, - wait_step_delay=0.5 - ): + url, + driver, + internet_vectorization_chunk_size, + internet_vectorization_overlap_size, + vectorizer, + title=None, + brief=None, + wait_step_delay=0.5 +): + from bs4 import BeautifulSoup + import time try: - from bs4 import BeautifulSoup - # Load the webpage + # Chargez la page web avec le driver passé en paramètre driver.get(url) - wait_for_page(driver, wait_step_delay) - - # Wait for JavaScript to execute and get the final page source + # Attendez que le JavaScript s'exécute, avec un délai d'attente progressif si nécessaire + time.sleep(wait_step_delay) html_content = driver.page_source - - # Parse the HTML content soup = BeautifulSoup(html_content, "html.parser") - # Example: Remove all