Added prompts list

2025-04-11 20:59:56 +00:00 · 2024-07-21 21:50:24 +02:00 · 2024-07-21 21:50:24 +02:00 · e58077fd89
commit e58077fd89
parent 794e5a5b8b
7 changed files with 147 additions and 194 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 127
+version: 128
 binding_name: null
 model_name: null
 model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
 end_ai_message_id_template: ""

 system_message_template: "system"
+use_continue_message: true

 seed: -1
 ctx_size: 4084
--- a/lollms/app.py
+++ b/lollms/app.py
@ -975,7 +975,7 @@ class LollmsApplication(LoLLMsCom):
                    if len(chunks)>0:
                        for chunk in chunks:
                            internet_search_infos.append(chunk.doc.title)
-                            internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{chunk.doc.path}\nchunk_title:{chunk.doc.title}\ncontent:{doc}\n"
+                            internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{chunk.doc.path}\nchunk_title:{chunk.doc.title}\ncontent:{chunk.text}\n"
                    else:
                        internet_search_results += "The search response was empty!\nFailed to recover useful information from the search engine.\n"
                    if self.config.internet_quick_search:
@ -1287,12 +1287,12 @@ class LollmsApplication(LoLLMsCom):
                    # Tokenize the message content
                    if self.config.use_model_name_in_discussions:
                        if message.model:
-                            msg = f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}({message.model}){end_ai_header_id_template  if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip() + f"{self.separator_template}"
+                            msg =  f"{self.separator_template}" + f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}({message.model}){end_ai_header_id_template  if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
                        else:
-                            msg = f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template  if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip() + f"{self.separator_template}"
+                            msg = f"{self.separator_template}" + f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template  if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
                        message_tokenized = self.model.tokenize(msg)
                    else:
-                        msg_value= f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template  if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip() + f"{self.separator_template}"
+                        msg_value= f"{self.separator_template}" + f"{start_ai_header_id_template if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.start_user_header_id_template}{message.sender}{end_ai_header_id_template  if message.sender_type == SENDER_TYPES.SENDER_TYPES_AI else self.end_user_header_id_template}" + message.content.strip()
                        message_tokenized = self.model.tokenize(
                            msg_value
                        )
@ -1344,7 +1344,7 @@ class LollmsApplication(LoLLMsCom):
        else:
            ai_prefix = ""
        # Build the final prompt by concatenating the conditionning and discussion messages
-        prompt_data = conditionning + internet_search_results + documentation + knowledge + user_description + discussion_messages + positive_boost + negative_boost + fun_mode + (start_ai_header_id_template + ai_prefix + end_ai_header_id_template if not is_continue else '')
+        prompt_data = conditionning + internet_search_results + documentation + knowledge + user_description + discussion_messages + positive_boost + negative_boost + fun_mode + (self.separator_template + start_ai_header_id_template + ai_prefix + end_ai_header_id_template if not is_continue else '' if not self.config.use_continue_message else end_ai_header_id_template + "CONTINUE FROM HERE And do not open a new markdown code tag" + self.separator_template + start_ai_header_id_template + ai_prefix + end_ai_header_id_template)

        # Tokenize the prompt data
        tokens = self.model.tokenize(prompt_data)
--- a/lollms/code_parser.py
+++ b/lollms/code_parser.py
@ -1,6 +1,7 @@
 import re
 from lollms.utilities import PackageManager
-from pipmaster import PackageManager
+import pipmaster as pm
+
 from bs4 import BeautifulSoup
 def compress_js(js_code):
    # Patterns to match function, class, and variable declarations
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 127
+version: 128
 binding_name: null
 model_name: null
 model_variant: null
@ -43,6 +43,7 @@ end_ai_header_id_template: ": "
 end_ai_message_id_template: ""

 system_message_template: "system"
+use_continue_message: true

 seed: -1
 ctx_size: 4084
--- a/lollms/internet.py
+++ b/lollms/internet.py
@ -2,6 +2,10 @@ from ascii_colors import ASCIIColors, trace_exception
 from lollms.utilities import PackageManager
 import time
 import re
+from freedom_search import InternetSearchEnhancer
+from scrapemaster import ScrapeMaster
+from pathlib import Path
+from lollmsvectordb import VectorDatabase

 def get_favicon_url(url):
    import requests
@ -105,122 +109,113 @@ def press_buttons(driver, buttons_to_press=['accept']):
        except:
            ASCIIColors.warning(f"Couldn't press button {button_to_press} in this page.")

-def scrape_and_save(url, file_path=None, lollms_com=None, chromedriver_path=None, wait_step_delay=1, buttons_to_press=['accept'], max_size=None):
-    if not PackageManager.check_package_installed("selenium"):
-        PackageManager.install_package("selenium")
-    if not PackageManager.check_package_installed("bs4"):
-        PackageManager.install_package("bs4")

-    from bs4 import BeautifulSoup
-        
-    from selenium import webdriver
-    from selenium.common.exceptions import TimeoutException
-    
-    from selenium.webdriver.support.ui import WebDriverWait
-    from selenium.webdriver.support import expected_conditions as EC

-    driver = prepare_chrome_driver(chromedriver_path)

-    # Navigate to the URL
-    driver.get(url)
-    wait_for_page(driver, wait_step_delay)
-    press_buttons(driver, buttons_to_press)
+import os
+from scrapemaster import ScrapeMaster

-    # Parse the HTML content using BeautifulSoup
-    soup = BeautifulSoup(driver.page_source, 'html.parser')
-    
-    # Find all the text content in the webpage
-    text_content = soup.get_text()
-    text_content = re.sub(r'\n+', '\n', text_content)
+def scrape_and_save(url, file_path:str|Path=None, use_selenium=False, follow_links=False, max_depth=3, lollms_com=None, chromedriver_path=None, wait_step_delay=1, buttons_to_press=['accept'], max_size=None):
+    """
+    Scrapes text and image data from a specified URL and saves the images to a given file path.

-    
+    Parameters:
+    - url (str): The URL to scrape data from.
+    - file_path (str, optional): The markdown file path where scraped texts will be saved. If None, texts will not be saved.
+    - lollms_com (str, optional): A specific parameter for the ScrapeMaster, if needed.
+    - chromedriver_path (str, optional): The path to the ChromeDriver executable for web scraping.
+    - wait_step_delay (int, optional): The delay in seconds to wait between steps during scraping. Default is 1 second.
+    - buttons_to_press (list, optional): A list of button identifiers to press during the scraping process. Default is ['accept'].
+    - max_size (int, optional): The maximum size of images to scrape. If None, all images will be scraped.
+
+    Returns:
+    dict: A dictionary containing scraped texts and image URLs.
+    """
+    # Initialize the scraper with the provided URL
+    scraper = ScrapeMaster(url)
+
+    # Optionally handle button presses
+    #for button in buttons_to_press:
+    #    scraper.press_button(button)
+
+    # Create a subfolder for images if file_path is provided
    if file_path:
-        if max_size and len(text_content)< max_size:
-            # Save the text content as a text file
-            with open(file_path, 'w', encoding="utf-8") as file:
-                file.write(text_content)
-            if lollms_com:
-                lollms_com.info(f"Webpage content saved to {file_path}")
+        file_path = Path(file_path)
+        images_folder = os.path.join(file_path.parent, 'images')
+        os.makedirs(images_folder, exist_ok=True)
+
+    # Perform the scraping
+    if follow_links:
+        results = scraper.scrape_website(max_depth=max_depth, output_dir=file_path.parent, prefix=file_path.stem+'_')
+    else:
+        results = scraper.scrape_all(output_dir = images_folder, use_selenium=use_selenium)
+
+    # Save scraped texts to the markdown file
+    if file_path:
+        with open(file_path, 'w') as md_file:
+            for text in results['texts']:
+                md_file.write(text + '\n\n')
+        print(f"Texts saved to {file_path}")
+        print(f"Images saved to {images_folder}")
+
+    return {
+        'texts': results['texts'],
+        'image_urls': results['image_urls']
+    }
+
+
+# def scrape_and_save(url, file_path=None, lollms_com=None, chromedriver_path=None, wait_step_delay=1, buttons_to_press=['accept'], max_size=None):
+#     if not PackageManager.check_package_installed("selenium"):
+#         PackageManager.install_package("selenium")
+#     if not PackageManager.check_package_installed("bs4"):
+#         PackageManager.install_package("bs4")
+
+#     from bs4 import BeautifulSoup
+        
+#     from selenium import webdriver
+#     from selenium.common.exceptions import TimeoutException
+    
+#     from selenium.webdriver.support.ui import WebDriverWait
+#     from selenium.webdriver.support import expected_conditions as EC
+
+#     driver = prepare_chrome_driver(chromedriver_path)
+
+#     # Navigate to the URL
+#     driver.get(url)
+#     wait_for_page(driver, wait_step_delay)
+#     press_buttons(driver, buttons_to_press)
+
+#     # Parse the HTML content using BeautifulSoup
+#     soup = BeautifulSoup(driver.page_source, 'html.parser')
+    
+#     # Find all the text content in the webpage
+#     text_content = soup.get_text()
+#     text_content = re.sub(r'\n+', '\n', text_content)
+
+    
+#     if file_path:
+#         if max_size and len(text_content)< max_size:
+#             # Save the text content as a text file
+#             with open(file_path, 'w', encoding="utf-8") as file:
+#                 file.write(text_content)
+#             if lollms_com:
+#                 lollms_com.info(f"Webpage content saved to {file_path}")
            
-    # Close the driver
-    driver.quit()
+#     # Close the driver
+#     driver.quit()


-    return text_content
+#     return text_content


 def get_relevant_text_block(
    url,
-    driver,
-    internet_vectorization_chunk_size,
-    internet_vectorization_overlap_size,
-    vectorizer,
+    vectorizer:VectorDatabase,
    title=None,
-    brief=None,
-    wait_step_delay=0.5,
-    query="",
-    asses_using_llm=True,
-    yes_no=None
 ):
-    from bs4 import BeautifulSoup
-    import time
-    try:
-        # Chargez la page web avec le driver passé en paramètre
-        driver.get(url)
-        # Attendez que le JavaScript s'exécute, avec un délai d'attente progressif si nécessaire
-        time.sleep(wait_step_delay)
-        html_content = driver.page_source
-        soup = BeautifulSoup(html_content, "html.parser")
-
-        # Supprimez les éléments non désirés
-        for script_or_style in soup(["script", "style", "header", "footer", "nav", "aside"]):
-            script_or_style.decompose()
-
-        # Ciblez l'élément contenant le texte principal
-        article = soup.find('article')
-        if article:
-            text_block = ''
-            sections = article.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li'])
-            for element in sections:
-                if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
-                    if text_block:
-                        text_block += '\n\n'
-                    text_block += element.get_text(strip=True)
-                    text_block += '\n'
-                else:
-                    text_block += element.get_text(strip=True) + '\n'
-
-            document_id = {
-                'url':url
-            }
-            document_id["title"] = title
-            document_id["brief"] = brief
-            text_block=text_block.strip()
-            if asses_using_llm and yes_no is not None:
-                if yes_no(f"Is this content relevant to the query: {query}", text_block):
-                    vectorizer.add_document(document_id,text_block, internet_vectorization_chunk_size, internet_vectorization_overlap_size)
-            else:
-                vectorizer.add_document(document_id,text_block, internet_vectorization_chunk_size, internet_vectorization_overlap_size)
-            return True
-        else:
-            body = soup.body
-            if body:
-                text_block = body.get_text(strip=True)
-                document_id = {
-                    'url':url
-                }
-                document_id["title"] = title
-                document_id["brief"] = brief
-                text_block=text_block.strip()
-
-                vectorizer.add_document(document_id,text_block, internet_vectorization_chunk_size, internet_vectorization_overlap_size)
-                return True
-            else:
-                ASCIIColors.warning("No data found in his page.")
-                return False
-    except Exception as ex:
-        ASCIIColors.warning(f"Couldn't scrape: {url}")
-        return False
+    sm = ScrapeMaster(url)
+    result = sm.scrape_all()
+    vectorizer.add_document(title if title else url, result["content"], url)
        


@ -346,26 +341,19 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n

    formatted_text = ""
    nb_non_empty = 0
-    # Configure Chrome options
-    driver = prepare_chrome_driver(chromedriver_path)
-    qquery = format_url_parameter(query)
-    url = f"https://duckduckgo.com/?q={qquery}&t=h_&ia=web"
-    results = extract_results(
-                                url,
-                                internet_nb_search_pages,
-                                driver
-                            )
+    ise = InternetSearchEnhancer()
+    results = ise.search(query)
    
    if len(results)>0:
        for i, result in enumerate(results):
            ASCIIColors.orange(f"Processing result:{result['title']}")
            title = result["title"]
-            brief = result["brief"]
-            href = result["href"]
+            brief = result["snippet"]
+            href = result["url"]
            if quick_search:
                vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
            else:
-                get_relevant_text_block(href, driver, internet_vectorization_chunk_size, internet_vectorization_overlap_size, vectorizer, title, brief, query=query, asses_using_llm=asses_using_llm, yes_no=yes_no)
+                get_relevant_text_block(href, vectorizer, title)
            nb_non_empty += 1
            if nb_non_empty>=internet_nb_search_pages:
                break
@ -373,7 +361,5 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
        chunks = vectorizer.search(query, internet_vectorization_nb_chunks)
    else:
        chunks = []
-    # Close the browser
-    driver.quit()

    return chunks
--- a/lollms/server/endpoints/lollms_personalities_infos.py
+++ b/lollms/server/endpoints/lollms_personalities_infos.py
@ -99,6 +99,7 @@ def get_all_personalities():
                            personality_info['installed'] = (lollmsElfServer.lollms_paths.personal_configuration_path/f"personality_{personality_folder.stem}.yaml").exists() or personality_info['has_scripts']
                            personality_info['help'] = config_data.get('help', '')
                            personality_info['commands'] = config_data.get('commands', '')
+                            personality_info['prompts_list'] = config_data.get('prompts_list', [])
                        languages_path = personality_folder/ 'languages'

                        real_assets_path = personality_folder/ 'assets'
--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@ -964,13 +964,18 @@ class AdvancedGarbageCollector:
        gc.collect()


+
 class PackageManager:
    @staticmethod
-    def install_package(package_name):
-        import subprocess
-        import sys
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", package_name])
-        
+    def install_package(package_name, index_url=None, extra_args=None):
+        cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
+        if index_url:
+            cmd.extend(["--index-url", index_url])
+        if extra_args:
+            cmd.extend(extra_args)
+        cmd.append(package_name)
+        subprocess.check_call(cmd)
+
    @staticmethod
    def check_package_installed(package_name):
        try:
@ -979,131 +984,89 @@ class PackageManager:
        except ImportError:
            return False
        except Exception as ex:
-            trace_exception(ex)
-            ASCIIColors.error("Something is wrong with your library.\nIt looks installed, but I am not able to call it.\nTry to reinstall it.")
+            print(f"Error checking package: {ex}")
            return False
+
    @staticmethod
    def check_package_installed_with_version(package_name: str, min_version: Optional[str] = None) -> bool:
        try:
            import pkg_resources
-            # Summon the library from the depths of the Python abyss
            package = importlib.import_module(package_name)
            if min_version:
-                # Check if the library is at least at the specified version
                installed_version = pkg_resources.get_distribution(package_name).version
                if pkg_resources.parse_version(installed_version) < pkg_resources.parse_version(min_version):
                    raise ImportError(f"Version {installed_version} is less than the required {min_version}.")
            return True
        except ImportError as ex:
-            print(f"Oopsie daisy! The library '{package_name}' is playing hide and seek. Error: {ex}")
+            print(f"Package '{package_name}' is not installed or version requirement not met. Error: {ex}")
            return False
        except Exception as ex:
-            print(f"Yikes! Something went bananas with your library. Error: {ex}")
+            print(f"Error checking package: {ex}")
            return False
-           
+
    @staticmethod
-    def safe_import(module_name, library_name=None):
+    def safe_import(module_name, library_name=None, index_url=None, extra_args=None):
        if not PackageManager.check_package_installed(module_name):
            print(f"{module_name} module not found. Installing...")
-            if library_name:
-                PackageManager.install_package(library_name)
-            else:
-                PackageManager.install_package(module_name)
+            PackageManager.install_package(library_name or module_name, index_url, extra_args)
        globals()[module_name] = importlib.import_module(module_name)
        print(f"{module_name} module imported successfully.")

    @staticmethod
    def get_installed_version(package):
-        """
-        Get the installed version of a Python package.
-
-        Args:
-            package (str): The name of the package to check.
-
-        Returns:
-            str: The installed version of the package, or None if the package is not installed.
-        """
        try:
            output = subprocess.check_output([sys.executable, "-m", "pip", "show", package], universal_newlines=True)
            for line in output.splitlines():
                if line.startswith("Version:"):
                    version = line.split(":", 1)[1].strip()
-                    print(f"The installed version of {package} is {version}. It's like finding out your favorite ice cream flavor!")
+                    print(f"The installed version of {package} is {version}.")
                    return version
            return None
        except subprocess.CalledProcessError as e:
-            print(f"Error getting version for {package}: {e}. The version is playing hide and seek!")
+            print(f"Error getting version for {package}: {e}")
            return None

    @staticmethod
-    def install_or_update(package):
-        """
-        Install or update a Python package.
-
-        Args:
-            package (str): The name of the package to install or update.
-
-        Returns:
-            bool: True if the package was installed or updated successfully, False otherwise.
-        """
+    def install_or_update(package, index_url=None, extra_args=None):
        if PackageManager.check_package_installed(package):
-            print(f"{package} is already installed. Let's see if it needs a makeover!")
+            print(f"{package} is already installed. Checking for updates...")
            installed_version = PackageManager.get_installed_version(package)
            if installed_version:
-                print(f"Updating {package} from version {installed_version}. It's like a software spa day!")
+                print(f"Updating {package} from version {installed_version}.")
                try:
-                    subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", package], check=True)
-                    print(f"Successfully updated {package}. New version, who dis?")
+                    PackageManager.install_package(package, index_url, extra_args)
+                    print(f"Successfully updated {package}.")
                    return True
                except subprocess.CalledProcessError as e:
-                    print(f"Error updating {package}: {e}. The update fairy took a day off!")
+                    print(f"Error updating {package}: {e}")
                    return False
        else:
-            print(f"{package} is not installed. Time to add it to your collection!")
-            return PackageManager.install_package(package)
+            print(f"{package} is not installed. Installing...")
+            return PackageManager.install_package(package, index_url, extra_args)

    @staticmethod
    def uninstall_package(package):
-        """
-        Uninstall a Python package.
-
-        Args:
-            package (str): The name of the package to uninstall.
-
-        Returns:
-            bool: True if the package was uninstalled successfully, False otherwise.
-        """
        try:
            subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", package], check=True)
-            print(f"Successfully uninstalled {package}. Goodbye, old friend!")
+            print(f"Successfully uninstalled {package}.")
            return True
        except subprocess.CalledProcessError as e:
-            print(f"Error uninstalling {package}: {e}. Uninstallation wizard failed!")
+            print(f"Error uninstalling {package}: {e}")
            return False

    @staticmethod
-    def reinstall(package):
-        """
-        Reinstall a Python package.
-
-        Args:
-            package (str): The name of the package to reinstall.
-
-        Returns:
-            bool: True if the package was reinstalled successfully, False otherwise.
-        """
+    def reinstall(package, index_url=None, extra_args=None):
        if PackageManager.check_package_installed(package):
-            print(f"{package} is already installed. Let's give it a fresh start!")
+            print(f"{package} is already installed. Uninstalling for fresh installation...")
            if PackageManager.uninstall_package(package):
-                print(f"{package} uninstalled successfully. Now, let's reinstall it.")
-                return PackageManager.install_package(package)
+                print(f"{package} uninstalled successfully. Now reinstalling.")
+                return PackageManager.install_package(package, index_url, extra_args)
            else:
                print(f"Failed to uninstall {package}. Reinstallation aborted.")
                return False
        else:
            print(f"{package} is not installed. Installing it now.")
-            return PackageManager.install_package(package)
-
+            return PackageManager.install_package(package, index_url, extra_args)
 class GitManager:
    @staticmethod
    def git_pull(folder_path):