enhanced tools and xtts

2025-04-15 14:36:34 +00:00 · 2024-07-16 01:00:08 +02:00 · 2024-07-16 01:00:08 +02:00 · 6f40981651
commit 6f40981651
parent 3a00e968fb
5 changed files with 49 additions and 24 deletions
--- a/lollms/app.py
+++ b/lollms/app.py
@ -256,7 +256,13 @@ class LollmsApplication(LoLLMsCom):
        return self.tasks_library.summarize_text(
            message_content, 
            "\n".join([
-                "Extract useful information from this discussion."
+                "Act as Skills library maker.",
+                "The objective is to find out important information from the discussion and store them as text that can be used in the future to remember those information.",
+                "Format the output as sections if applicable:",
+                "Global context: Explain in a sentense or two the subject of the discussion",
+                "Interesting things (if applicable): If you find interesting information or something that was discovered or built in this discussion, list it here with enough details to be reproducible just by reading this text.",
+                "Code snippet (if applicable): If there are important code snippets, write them here in a markdown code tag.",
+                "Make the output easy to understand."
            ]),
            doc_name="discussion",
            callback=callback)
@ -971,7 +977,7 @@ class LollmsApplication(LoLLMsCom):
                    else:
                        self.personality.step_start("Performing Internet search (advanced mode: slower but more advanced)")

-                    internet_search_results=f"{self.start_header_id_template}{system_message_template}{self.end_header_id_template}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{self.separator_template}{self.start_header_id_template}Web search results{self.end_header_id_template}\n"
+                    internet_search_results=f"{self.system_full_header}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{self.separator_template}{self.start_header_id_template}Web search results{self.end_header_id_template}\n"

                    docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)
                    
--- a/lollms/databases/skills_database.py
+++ b/lollms/databases/skills_database.py
@ -138,7 +138,7 @@ class SkillsLibrary:
        skill_titles = []        
        if len(res)>0:
            for entry in res:
-                self.vectorizer.add_document(entry[0],"Title:"+entry[1]+"\n"+entry[2])
+                self.vectorizer.add_document(entry[1],"Title:"+entry[1]+"\n"+entry[2])
            self.vectorizer.build_index()
            
            chunks = self.vectorizer.search(query_, top_k)
--- a/lollms/internet.py
+++ b/lollms/internet.py
@ -322,15 +322,28 @@ def internet_search(query, internet_nb_search_pages, chromedriver_path=None, qui

    return search_results

-def internet_search_with_vectorization(query, chromedriver_path=None, internet_nb_search_pages=5, internet_vectorization_chunk_size=512, internet_vectorization_overlap_size=20, internet_vectorization_nb_chunks=4, model = None, quick_search:bool=False, vectorize=True, asses_using_llm=True, yes_no=None):
+def internet_search_with_vectorization(query, chromedriver_path=None, internet_nb_search_pages=5, internet_vectorization_chunk_size=512, internet_vectorization_overlap_size=20, internet_vectorization_nb_chunks=4, model = None, quick_search:bool=False, vectorizer = "bert", vectorize=True, asses_using_llm=True, yes_no=None):
    """
    """

    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
-    from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod
+    from lollmsvectordb import VectorDatabase
+    from lollmsvectordb import VectorDatabase
+    from lollmsvectordb.text_document_loader import TextDocumentsLoader
+    from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer

-    vectorizer = TextVectorizer(VectorizationMethod.TFIDF_VECTORIZER, model = model)
+    if vectorizer == "bert":
+        from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
+        v = BERTVectorizer()
+    elif vectorizer == "tfidf":
+        from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
+        v = TFIDFVectorizer()
+    elif vectorizer == "word2vec":
+        from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
+        v = Word2VecVectorizer()
+
+    vectorizer = VectorDatabase("", v, TikTokenTokenizer())

    formatted_text = ""
    nb_non_empty = 0
@ -343,21 +356,26 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
                                driver
                            )
    
-    for i, result in enumerate(results):
-        ASCIIColors.orange(f"Processing result:{result['title']}")
-        title = result["title"]
-        brief = result["brief"]
-        href = result["href"]
-        if quick_search:
-            vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
-        else:
-            get_relevant_text_block(href, driver, internet_vectorization_chunk_size, internet_vectorization_overlap_size, vectorizer, title, brief, query=query, asses_using_llm=asses_using_llm, yes_no=yes_no)
-        nb_non_empty += 1
-        if nb_non_empty>=internet_nb_search_pages:
-            break
-    vectorizer.index()
+    if len(results)>0:
+        for i, result in enumerate(results):
+            ASCIIColors.orange(f"Processing result:{result['title']}")
+            title = result["title"]
+            brief = result["brief"]
+            href = result["href"]
+            if quick_search:
+                vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
+            else:
+                get_relevant_text_block(href, driver, internet_vectorization_chunk_size, internet_vectorization_overlap_size, vectorizer, title, brief, query=query, asses_using_llm=asses_using_llm, yes_no=yes_no)
+            nb_non_empty += 1
+            if nb_non_empty>=internet_nb_search_pages:
+                break
+        docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
+        vectorizer.build_index()
+    else:
+        docs = ["The web search has failed. Try using another query"]
+        sorted_similarities = [0]
+        document_ids = ["duckduckgo.com"]
    # Close the browser
    driver.quit()

-    docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
    return docs, sorted_similarities, document_ids
--- a/lollms/server/endpoints/lollms_tts.py
+++ b/lollms/server/endpoints/lollms_tts.py
@ -14,7 +14,7 @@ from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
 from lollms.main_config import BaseConfig
-from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
+from lollms.utilities import output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
 from lollms.security import sanitize_path, validate_path, check_access
 from pathlib import Path
 from ascii_colors import ASCIIColors
@ -190,7 +190,8 @@ async def text2Wave(request: LollmsText2AudioRequest):
        # Get the JSON data from the POST request.
        if lollmsElfServer.tts.ready:
            response = lollmsElfServer.tts.tts_file(request.text, request.fn, voice)
-            return response
+            response = output_file_path_to_url(response)
+            return {"url":response}
        else:
            return {"url": None, "error":f"TTS service is not ready yet"}

--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@ -396,14 +396,14 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False,
        sys.exit(1)

    return result
-def output_file_path_to_url(file_path):
+def output_file_path_to_url(file_path:str|Path):
    """
    This function takes a file path as an argument and converts it into a URL format. It first removes the initial part of the file path until the "outputs" string is reached, then replaces backslashes with forward slashes and quotes each segment with urllib.parse.quote() before joining them with forward slashes to form the final URL.

    :param file_path: str, the file path in the format of a Windows system
    :return: str, the converted URL format of the given file path
    """
-
+    file_path = str(file_path)
    url = "/"+file_path[file_path.index("outputs"):].replace("\\","/")
    return "/".join([urllib.parse.quote(p, safe="") for p in url.split("/")])