diff --git a/lollms/app.py b/lollms/app.py index 63c2375..97ccc26 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -582,15 +582,21 @@ class LollmsApplication(LoLLMsCom): self.personality.step_end("Crafting internet search query") self.personality.step(f"web search query: {query}") - self.personality.step_start("Performing Internet search") + if self.config.internet_quick_search: + self.personality.step_start("Performing Internet search (quick mode)") + else: + self.personality.step_start("Performing Internet search (advanced mode: slower but more advanced)") internet_search_results=f"!@>important information: Use the internet search results data to answer {self.config.user_name}'s last message. It is strictly forbidden to give the user an answer without having actual proof from the documentation.\n!@>Web search results:\n" - docs, sorted_similarities, document_ids = self.personality.internet_search(query, self.config.internet_quick_search) + docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search) for doc, infos,document_id in zip(docs, sorted_similarities, document_ids): internet_search_infos.append(document_id) internet_search_results += f"search result chunk:\nchunk_infos:{document_id['url']}\nchunk_title:{document_id['title']}\ncontent:{doc}" - self.personality.step_end("Performing Internet search") + if self.config.internet_quick_search: + self.personality.step_end("Performing Internet search (quick mode)") + else: + self.personality.step_end("Performing Internet search (advanced mode: slower but more advanced)") if self.personality.persona_data_vectorizer: if documentation=="": diff --git a/lollms/internet.py b/lollms/internet.py index 8567e90..81e86ab 100644 --- a/lollms/internet.py +++ b/lollms/internet.py @@ -82,7 +82,27 @@ def prepare_chrome_driver(chromedriver_path = None): driver = webdriver.Chrome(options=chrome_options) return driver -def scrape_and_save(url, file_path=None, lollms_com=None, chromedriver_path=None, wait_step_delay=1): +def press_buttons(driver, buttons_to_press=['accept']): + from selenium.webdriver.common.by import By + from bs4 import BeautifulSoup + + # Parse the HTML content using BeautifulSoup + soup = BeautifulSoup(driver.page_source, 'html.parser') + + # Find the button that contains the text "accept" (case-insensitive) + for button_to_press in buttons_to_press: + button = soup.find('button', text=lambda t: button_to_press in t.lower()) + + if button: + # Click the button using Selenium + button_element = driver.find_element(By.XPATH, "//button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'accept')]") + button_element.click() + print("Button clicked!") + else: + print("Button not found.") + + +def scrape_and_save(url, file_path=None, lollms_com=None, chromedriver_path=None, wait_step_delay=1, buttons_to_press=['accept']): if not PackageManager.check_package_installed("selenium"): PackageManager.install_package("selenium") if not PackageManager.check_package_installed("bs4"): @@ -92,7 +112,7 @@ def scrape_and_save(url, file_path=None, lollms_com=None, chromedriver_path=None from selenium import webdriver from selenium.common.exceptions import TimeoutException - from selenium.webdriver.common.by import By + from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC @@ -101,32 +121,7 @@ def scrape_and_save(url, file_path=None, lollms_com=None, chromedriver_path=None # Navigate to the URL driver.get(url) wait_for_page(driver, wait_step_delay) - - # Parse the HTML content using BeautifulSoup - soup = BeautifulSoup(driver.page_source, 'html.parser') - - # Find the button that contains the text "accept" (case-insensitive) - accept_button = soup.find('button', text=lambda t: 'accept' in t.lower()) - - if accept_button: - # Click the button using Selenium - button_element = driver.find_element(By.XPATH, "//button[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'accept')]") - button_element.click() - print("Button clicked!") - else: - print("Button not found.") - # Find and click the "Continue reading" button (if available) - try: - continue_button = WebDriverWait(driver, 0).until( - EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Continue reading')]")) - ) - continue_button.click() - wait_for_page(driver, wait_step_delay) - # Wait for the additional content to load - time.sleep(5) - except: - if lollms_com: - lollms_com.info("No 'Continue reading' button found. Proceeding with the current content.") + press_buttons(driver, buttons_to_press) # Parse the HTML content using BeautifulSoup soup = BeautifulSoup(driver.page_source, 'html.parser') @@ -248,7 +243,41 @@ def extract_results(url, max_num, driver=None, wait_step_delay=0.5): pass return results_list -def internet_search(query, chromedriver_path, config, model = None, quick_search:bool=False): +def internet_search(query, config, chromedriver_path=None, quick_search:bool=False, buttons_to_press=['acccept']): + """ + """ + + from selenium import webdriver + from selenium.webdriver.chrome.options import Options + from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod + + search_results = [] + + nb_non_empty = 0 + # Configure Chrome options + driver = prepare_chrome_driver(chromedriver_path) + + results = extract_results( + f"https://duckduckgo.com/?q={format_url_parameter(query)}&t=h_&ia=web", + config.internet_nb_search_pages, + driver + ) + + for i, result in enumerate(results): + title = result["title"] + brief = result["brief"] + href = result["href"] + if quick_search: + search_results.append({'url':href, 'title':title, 'brief': brief, 'content':""}) + else: + search_results.append({'url':href, 'title':title, 'brief': brief, 'content':scrape_and_save(href, chromedriver_path=chromedriver_path, buttons_to_press=buttons_to_press)}) + nb_non_empty += 1 + if nb_non_empty>=config.internet_nb_search_pages: + break + + return search_results + +def internet_search_with_vectorization(query, chromedriver_path, config, model = None, quick_search:bool=False, vectorize=True): """ """ diff --git a/lollms/personality.py b/lollms/personality.py index e4a830f..71106e2 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -384,12 +384,12 @@ class AIPersonality: f' \n' ]) - def internet_search(self, query, quick_search:bool=False): + def internet_search_with_vectorization(self, query, quick_search:bool=False): """ Do internet search and return the result """ - from lollms.internet import internet_search - return internet_search(query, "", self.config, self.model, quick_search=quick_search) + from lollms.internet import internet_search_with_vectorization + return internet_search_with_vectorization(query, "", self.config, self.model, quick_search=quick_search) def sink(self, s=None,i=None,d=None): pass @@ -2258,11 +2258,11 @@ class APScript(StateMachine): f' \n' ]) - def internet_search(self, query, quick_search:bool=False ): + def internet_search_with_vectorization(self, query, quick_search:bool=False ): """ Do internet search and return the result """ - return self.personality.internet_search(query, quick_search=quick_search) + return self.personality.internet_search_with_vectorization(query, quick_search=quick_search) def step_start(self, step_text, callback: Callable[[str, MSG_TYPE, dict, list], bool]=None): diff --git a/lollms/services/sd/lollms_sd.py b/lollms/services/sd/lollms_sd.py index 6e56b57..acb8aed 100644 --- a/lollms/services/sd/lollms_sd.py +++ b/lollms/services/sd/lollms_sd.py @@ -287,10 +287,10 @@ class LollmsSD: # process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) if share: - run_script_in_env("autosd", script_path +" --share", cwd=self.sd_folder) + run_script_in_env("autosd", str(script_path) +" --share", cwd=self.sd_folder) # subprocess.Popen("conda activate " + str(script_path) +" --share", cwd=self.sd_folder) else: - run_script_in_env("autosd", script_path, cwd=self.sd_folder) + run_script_in_env("autosd", str(script_path), cwd=self.sd_folder) # subprocess.Popen(script_path, cwd=self.sd_folder) else: ASCIIColors.info("Running on linux/MacOs")