From 3eb3750a0fd9ea759e571ff398a8eceebc3e1704 Mon Sep 17 00:00:00 2001
From: Saifeddine ALOUI <aloui.seifeddine@gmail.com>
Date: Fri, 14 Jun 2024 01:43:49 +0200
Subject: [PATCH] enhanced in every side

---
 configs/config.yaml                           |  6 ++---
 lollms/app.py                                 | 23 +++++++++++++++----
 lollms/configs/config.yaml                    |  6 ++---
 lollms/functions/generate_image.py            |  6 ++---
 .../youtube/download_transcript_by_channel.py |  2 ++
 lollms/internet.py                            | 16 +++++++++----
 lollms/personality.py                         | 14 ++++++-----
 7 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/configs/config.yaml b/configs/config.yaml
index 9b80d07..5973670 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -263,9 +263,9 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
 # Activate internet search
 activate_internet_search: false
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
+internet_vectorization_overlap_size: 0 # overlap between chunks size
+internet_vectorization_nb_chunks: 4 # number of chunks to use
+internet_nb_search_pages: 8 # number of pages to select
 internet_quick_search: false # If active the search engine will not load and read the webpages
 internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
diff --git a/lollms/app.py b/lollms/app.py
index 2ded091..a1de40f 100644
--- a/lollms/app.py
+++ b/lollms/app.py
@@ -863,14 +863,29 @@ class LollmsApplication(LoLLMsCom):
                     discussion = self.recover_discussion(client_id)
                 if self.config.internet_activate_search_decision:
                     self.personality.step_start(f"Requesting if {self.personality.name} needs to search internet to answer the user")
-                    need = not self.personality.yes_no(f"{start_header_id_template}{system_message_template}{end_header_id_template}Answer the question with yes or no. Don't add any extra explanation.{separator_template}{start_header_id_template}user: Do you have enough information to give a satisfactory answer to {self.config.user_name}'s request without internet search? (If you do not know or you can't answer 0 (no)", discussion)
+                    q = f"{separator_template}".join([
+                        f"{start_header_id_template}{system_message_template}{end_header_id_template}",
+                        f"Answer the question with yes or no. Don't add any extra explanation.",
+                        f"{start_user_header_id_template}user{end_user_header_id_template}",
+                        f"Do you have enough information to give a satisfactory answer to {self.config.user_name}'s request without internet search?",
+                        "(If you do not know or you can't answer the question, return 0 (no)"
+                    ])
+                    need = not self.personality.yes_no(q, discussion)
                     self.personality.step_end(f"Requesting if {self.personality.name} needs to search internet to answer the user")
                     self.personality.step("Yes" if need else "No")
                 else:
                     need=True
                 if need:
                     self.personality.step_start("Crafting internet search query")
-                    query = self.personality.fast_gen(f"{start_header_id_template}discussion{end_header_id_template}\n{discussion[-2048:]}{separator_template}{start_header_id_template}system: Read the discussion and craft a web search query suited to recover needed information to reply to last {self.config.user_name} message.\nDo not answer the prompt. Do not add explanations.{separator_template}{start_header_id_template}current date: {datetime.now()}{separator_template}{start_header_id_template}websearch query: ", max_generation_size=256, show_progress=True, callback=self.personality.sink)
+                    q = f"{separator_template}".join([
+                        f"{start_header_id_template}discussion{end_header_id_template}",
+                        f"{discussion[-2048:]}{start_header_id_template}system{end_header_id_template}",
+                        f"Read the discussion and craft a web search query suited to recover needed information to reply to last {self.config.user_name} message.",
+                        f"Do not answer the prompt. Do not add explanations.",
+                        f"{start_header_id_template}current date{end_header_id_template}{datetime.now()}",
+                        f"{start_header_id_template}websearch query{end_header_id_template}"
+                    ])
+                    query = self.personality.fast_gen(q, max_generation_size=256, show_progress=True, callback=self.personality.sink)
                     self.personality.step_end("Crafting internet search query")
                     self.personality.step(f"web search query: {query}")
 
@@ -879,9 +894,9 @@ class LollmsApplication(LoLLMsCom):
                     else:
                         self.personality.step_start("Performing Internet search (advanced mode: slower but more advanced)")
 
-                    internet_search_results=f"{start_header_id_template}instructions{end_header_id_template}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{separator_template}{start_header_id_template}Web search results:\n"
+                    internet_search_results=f"{start_header_id_template}{system_message_template}{end_header_id_template}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{separator_template}{start_header_id_template}Web search results{end_header_id_template}\n"
 
-                    docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search)
+                    docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=True)
                     
                     if len(docs)>0:
                         for doc, infos,document_id in zip(docs, sorted_similarities, document_ids):
diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml
index 9b80d07..5973670 100644
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@@ -263,9 +263,9 @@ data_vectorization_make_persistance: false # If true, the data will be persistan
 # Activate internet search
 activate_internet_search: false
 internet_vectorization_chunk_size: 512 # chunk size
-internet_vectorization_overlap_size: 128 # overlap between chunks size
-internet_vectorization_nb_chunks: 2 # number of chunks to use
-internet_nb_search_pages: 3 # number of pages to select
+internet_vectorization_overlap_size: 0 # overlap between chunks size
+internet_vectorization_nb_chunks: 4 # number of chunks to use
+internet_nb_search_pages: 8 # number of pages to select
 internet_quick_search: false # If active the search engine will not load and read the webpages
 internet_activate_search_decision: false # If active the ai decides by itself if it needs to do search
 # Helpers
diff --git a/lollms/functions/generate_image.py b/lollms/functions/generate_image.py
index 1ac7019..3ce3c03 100644
--- a/lollms/functions/generate_image.py
+++ b/lollms/functions/generate_image.py
@@ -96,21 +96,21 @@ def build_image_function(processor, client):
             return {
                     "function_name": "build_image",
                     "function": partial(build_image, processor=processor, client=client),
-                    "function_description": "Builds and shows an image from a prompt and width and height parameters. A square 1024x1024, a portrait woudl be 1024x1820 or landscape 1820x1024.",
+                    "function_description": "Builds and shows an image from a prompt and width and height parameters. A square 1024x1024, a portrait woudl be 1024x1820 or landscape 1820x1024. Width and height have to be divisible by 8.",
                     "function_parameters": [{"name": "prompt", "type": "str"}, {"name": "negative_prompt", "type": "str"}, {"name": "width", "type": "int"}, {"name": "height", "type": "int"}]                
                 }
         else:
             return {
                     "function_name": "build_image",
                     "function": partial(build_image, processor=processor, client=client, negative_prompt=processor.config.default_negative_prompt),
-                    "function_description": "Builds and shows an image from a prompt and width and height parameters. A square 1024x1024, a portrait woudl be 1024x1820 or landscape 1820x1024.",
+                    "function_description": "Builds and shows an image from a prompt and width and height parameters. A square 1024x1024, a portrait woudl be 1024x1820 or landscape 1820x1024. Width and height have to be divisible by 8.",
                     "function_parameters": [{"name": "prompt", "type": "str"}, {"name": "width", "type": "int"}, {"name": "height", "type": "int"}]                
                 }
     else:
         return {
                 "function_name": "build_image",
                 "function": partial(build_image, processor=processor, client=client, negative_prompt=""),
-                "function_description": "Builds and shows an image from a prompt and width and height parameters. A square 1024x1024, a portrait woudl be 1024x1820 or landscape 1820x1024.",
+                "function_description": "Builds and shows an image from a prompt and width and height parameters. A square 1024x1024, a portrait woudl be 1024x1820 or landscape 1820x1024. Width and height have to be divisible by 8.",
                 "function_parameters": [{"name": "prompt", "type": "str"}, {"name": "width", "type": "int"}, {"name": "height", "type": "int"}]                
             }
 
diff --git a/lollms/functions/youtube/download_transcript_by_channel.py b/lollms/functions/youtube/download_transcript_by_channel.py
index fe50ad7..2d2d1a5 100644
--- a/lollms/functions/youtube/download_transcript_by_channel.py
+++ b/lollms/functions/youtube/download_transcript_by_channel.py
@@ -32,6 +32,8 @@ def download_channel_transcripts(channel_url: str, output_folder: str) -> str:
     str: A message indicating the status of the download process.
     """
     try:
+        if output_folder=="":
+            return "Please set the transcription output path in lollmz personality sdettings"
         # Create output folder if it doesn't exist
         output_folder_path = pathlib.Path(output_folder)
         output_folder_path.mkdir(parents=True, exist_ok=True)
diff --git a/lollms/internet.py b/lollms/internet.py
index cf8fe58..02a5fa9 100644
--- a/lollms/internet.py
+++ b/lollms/internet.py
@@ -157,7 +157,10 @@ def get_relevant_text_block(
     vectorizer,
     title=None,
     brief=None,
-    wait_step_delay=0.5
+    wait_step_delay=0.5,
+    query="",
+    asses_using_llm=True,
+    yes_no=None
 ):
     from bs4 import BeautifulSoup
     import time
@@ -193,7 +196,11 @@ def get_relevant_text_block(
             document_id["title"] = title
             document_id["brief"] = brief
             text_block=text_block.strip()
-            vectorizer.add_document(document_id,text_block, internet_vectorization_chunk_size, internet_vectorization_overlap_size)
+            if asses_using_llm and yes_no is not None:
+                if yes_no(f"Is this content relevant to the query: {query}", text_block):
+                    vectorizer.add_document(document_id,text_block, internet_vectorization_chunk_size, internet_vectorization_overlap_size)
+            else:
+                vectorizer.add_document(document_id,text_block, internet_vectorization_chunk_size, internet_vectorization_overlap_size)
             return True
         else:
             body = soup.body
@@ -205,6 +212,7 @@ def get_relevant_text_block(
                 document_id["title"] = title
                 document_id["brief"] = brief
                 text_block=text_block.strip()
+
                 vectorizer.add_document(document_id,text_block, internet_vectorization_chunk_size, internet_vectorization_overlap_size)
                 return True
             else:
@@ -314,7 +322,7 @@ def internet_search(query, internet_nb_search_pages, chromedriver_path=None, qui
 
     return search_results
 
-def internet_search_with_vectorization(query, chromedriver_path=None, internet_nb_search_pages=5, internet_vectorization_chunk_size=512, internet_vectorization_overlap_size=20, internet_vectorization_nb_chunks=4, model = None, quick_search:bool=False, vectorize=True):
+def internet_search_with_vectorization(query, chromedriver_path=None, internet_nb_search_pages=5, internet_vectorization_chunk_size=512, internet_vectorization_overlap_size=20, internet_vectorization_nb_chunks=4, model = None, quick_search:bool=False, vectorize=True, asses_using_llm=True, yes_no=None):
     """
     """
 
@@ -343,7 +351,7 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
         if quick_search:
             vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
         else:
-            get_relevant_text_block(href, driver, internet_vectorization_chunk_size, internet_vectorization_overlap_size, vectorizer, title, brief)
+            get_relevant_text_block(href, driver, internet_vectorization_chunk_size, internet_vectorization_overlap_size, vectorizer, title, brief, query=query, asses_using_llm=asses_using_llm, yes_no=yes_no)
         nb_non_empty += 1
         if nb_non_empty>=internet_nb_search_pages:
             break
diff --git a/lollms/personality.py b/lollms/personality.py
index 853394e..0443659 100644
--- a/lollms/personality.py
+++ b/lollms/personality.py
@@ -396,19 +396,21 @@ class AIPersonality:
         f' </details>\n'
         ])
 
-    def internet_search_with_vectorization(self, query, quick_search:bool=False):
+    def internet_search_with_vectorization(self, query, quick_search:bool=False, asses_using_llm=True):
         """
         Do internet search and return the result
         """
         from lollms.internet import internet_search_with_vectorization
         return internet_search_with_vectorization(
                                                     query,
-                                                    internet_nb_search_pages=self.config.internet_nb_search_pages,
-                                                    internet_vectorization_chunk_size=self.config.internet_vectorization_chunk_size,
-                                                    internet_vectorization_overlap_size=self.config.internet_vectorization_overlap_size,
-                                                    internet_vectorization_nb_chunks=self.config.internet_vectorization_nb_chunks,
+                                                    internet_nb_search_pages=int(self.config.internet_nb_search_pages),
+                                                    internet_vectorization_chunk_size=int(self.config.internet_vectorization_chunk_size),
+                                                    internet_vectorization_overlap_size=int(self.config.internet_vectorization_overlap_size),
+                                                    internet_vectorization_nb_chunks=int(self.config.internet_vectorization_nb_chunks),
                                                     model = self.model,
-                                                    quick_search=quick_search
+                                                    quick_search=quick_search,
+                                                    asses_using_llm=asses_using_llm,
+                                                    yes_no = self.yes_no
                                                     )
 
     def sink(self, s=None,i=None,d=None):