enhanced tools and xtts

This commit is contained in:
Saifeddine ALOUI 2024-07-16 01:00:08 +02:00
parent 3a00e968fb
commit 6f40981651
5 changed files with 49 additions and 24 deletions

View File

@ -256,7 +256,13 @@ class LollmsApplication(LoLLMsCom):
return self.tasks_library.summarize_text(
message_content,
"\n".join([
"Extract useful information from this discussion."
"Act as Skills library maker.",
"The objective is to find out important information from the discussion and store them as text that can be used in the future to remember those information.",
"Format the output as sections if applicable:",
"Global context: Explain in a sentense or two the subject of the discussion",
"Interesting things (if applicable): If you find interesting information or something that was discovered or built in this discussion, list it here with enough details to be reproducible just by reading this text.",
"Code snippet (if applicable): If there are important code snippets, write them here in a markdown code tag.",
"Make the output easy to understand."
]),
doc_name="discussion",
callback=callback)
@ -971,7 +977,7 @@ class LollmsApplication(LoLLMsCom):
else:
self.personality.step_start("Performing Internet search (advanced mode: slower but more advanced)")
internet_search_results=f"{self.start_header_id_template}{system_message_template}{self.end_header_id_template}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{self.separator_template}{self.start_header_id_template}Web search results{self.end_header_id_template}\n"
internet_search_results=f"{self.system_full_header}Use the web search results data to answer {self.config.user_name}. Try to extract information from the web search and use it to perform the requested task or answer the question. Do not come up with information that is not in the websearch results. Try to stick to the websearch results and clarify if your answer was based on the resuts or on your own culture. If you don't know how to perform the task, then tell the user politely that you need more data inputs.{self.separator_template}{self.start_header_id_template}Web search results{self.end_header_id_template}\n"
docs, sorted_similarities, document_ids = self.personality.internet_search_with_vectorization(query, self.config.internet_quick_search, asses_using_llm=self.config.activate_internet_pages_judgement)

View File

@ -138,7 +138,7 @@ class SkillsLibrary:
skill_titles = []
if len(res)>0:
for entry in res:
self.vectorizer.add_document(entry[0],"Title:"+entry[1]+"\n"+entry[2])
self.vectorizer.add_document(entry[1],"Title:"+entry[1]+"\n"+entry[2])
self.vectorizer.build_index()
chunks = self.vectorizer.search(query_, top_k)

View File

@ -322,15 +322,28 @@ def internet_search(query, internet_nb_search_pages, chromedriver_path=None, qui
return search_results
def internet_search_with_vectorization(query, chromedriver_path=None, internet_nb_search_pages=5, internet_vectorization_chunk_size=512, internet_vectorization_overlap_size=20, internet_vectorization_nb_chunks=4, model = None, quick_search:bool=False, vectorize=True, asses_using_llm=True, yes_no=None):
def internet_search_with_vectorization(query, chromedriver_path=None, internet_nb_search_pages=5, internet_vectorization_chunk_size=512, internet_vectorization_overlap_size=20, internet_vectorization_nb_chunks=4, model = None, quick_search:bool=False, vectorizer = "bert", vectorize=True, asses_using_llm=True, yes_no=None):
"""
"""
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod
from lollmsvectordb import VectorDatabase
from lollmsvectordb import VectorDatabase
from lollmsvectordb.text_document_loader import TextDocumentsLoader
from lollmsvectordb.lollms_tokenizers.tiktoken_tokenizer import TikTokenTokenizer
vectorizer = TextVectorizer(VectorizationMethod.TFIDF_VECTORIZER, model = model)
if vectorizer == "bert":
from lollmsvectordb.lollms_vectorizers.bert_vectorizer import BERTVectorizer
v = BERTVectorizer()
elif vectorizer == "tfidf":
from lollmsvectordb.lollms_vectorizers.tfidf_vectorizer import TFIDFVectorizer
v = TFIDFVectorizer()
elif vectorizer == "word2vec":
from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
v = Word2VecVectorizer()
vectorizer = VectorDatabase("", v, TikTokenTokenizer())
formatted_text = ""
nb_non_empty = 0
@ -343,21 +356,26 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
driver
)
for i, result in enumerate(results):
ASCIIColors.orange(f"Processing result:{result['title']}")
title = result["title"]
brief = result["brief"]
href = result["href"]
if quick_search:
vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
else:
get_relevant_text_block(href, driver, internet_vectorization_chunk_size, internet_vectorization_overlap_size, vectorizer, title, brief, query=query, asses_using_llm=asses_using_llm, yes_no=yes_no)
nb_non_empty += 1
if nb_non_empty>=internet_nb_search_pages:
break
vectorizer.index()
if len(results)>0:
for i, result in enumerate(results):
ASCIIColors.orange(f"Processing result:{result['title']}")
title = result["title"]
brief = result["brief"]
href = result["href"]
if quick_search:
vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
else:
get_relevant_text_block(href, driver, internet_vectorization_chunk_size, internet_vectorization_overlap_size, vectorizer, title, brief, query=query, asses_using_llm=asses_using_llm, yes_no=yes_no)
nb_non_empty += 1
if nb_non_empty>=internet_nb_search_pages:
break
docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
vectorizer.build_index()
else:
docs = ["The web search has failed. Try using another query"]
sorted_similarities = [0]
document_ids = ["duckduckgo.com"]
# Close the browser
driver.quit()
docs, sorted_similarities, document_ids = vectorizer.recover_text(query, internet_vectorization_nb_chunks)
return docs, sorted_similarities, document_ids

View File

@ -14,7 +14,7 @@ from pydantic import BaseModel
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
from lollms.main_config import BaseConfig
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
from lollms.utilities import output_file_path_to_url, detect_antiprompt, remove_text_from_string, trace_exception, find_first_available_file_index, add_period, PackageManager
from lollms.security import sanitize_path, validate_path, check_access
from pathlib import Path
from ascii_colors import ASCIIColors
@ -190,7 +190,8 @@ async def text2Wave(request: LollmsText2AudioRequest):
# Get the JSON data from the POST request.
if lollmsElfServer.tts.ready:
response = lollmsElfServer.tts.tts_file(request.text, request.fn, voice)
return response
response = output_file_path_to_url(response)
return {"url":response}
else:
return {"url": None, "error":f"TTS service is not ready yet"}

View File

@ -396,14 +396,14 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False,
sys.exit(1)
return result
def output_file_path_to_url(file_path):
def output_file_path_to_url(file_path:str|Path):
"""
This function takes a file path as an argument and converts it into a URL format. It first removes the initial part of the file path until the "outputs" string is reached, then replaces backslashes with forward slashes and quotes each segment with urllib.parse.quote() before joining them with forward slashes to form the final URL.
:param file_path: str, the file path in the format of a Windows system
:return: str, the converted URL format of the given file path
"""
file_path = str(file_path)
url = "/"+file_path[file_path.index("outputs"):].replace("\\","/")
return "/".join([urllib.parse.quote(p, safe="") for p in url.split("/")])