From ce7b936f4fd1246953de51faf408b6a6bc13d27b Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Sun, 21 Jul 2024 22:37:12 +0200 Subject: [PATCH] fix --- lollms/app.py | 6 +++++- lollms/internet.py | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lollms/app.py b/lollms/app.py index 59c084b..eafb4b2 100644 --- a/lollms/app.py +++ b/lollms/app.py @@ -974,7 +974,11 @@ class LollmsApplication(LoLLMsCom): if len(chunks)>0: for chunk in chunks: - internet_search_infos.append(chunk.doc.title) + internet_search_infos.append({ + "title":chunk.doc.title, + "url":chunk.doc.path, + "brief":chunk.text + }) internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{chunk.doc.path}\nchunk_title:{chunk.doc.title}\ncontent:{chunk.text}\n" else: internet_search_results += "The search response was empty!\nFailed to recover useful information from the search engine.\n" diff --git a/lollms/internet.py b/lollms/internet.py index a6b710c..2fc8b92 100644 --- a/lollms/internet.py +++ b/lollms/internet.py @@ -337,12 +337,12 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer v = Word2VecVectorizer() - vectorizer = VectorDatabase("", v, TikTokenTokenizer()) + vectorizer = VectorDatabase("", v, TikTokenTokenizer(), internet_vectorization_chunk_size, internet_vectorization_overlap_size) formatted_text = "" nb_non_empty = 0 ise = InternetSearchEnhancer() - results = ise.search(query) + results = ise.search(query, num_results=internet_nb_search_pages) if len(results)>0: for i, result in enumerate(results): @@ -351,7 +351,7 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n brief = result["snippet"] href = result["url"] if quick_search: - vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief) + vectorizer.add_document(title, brief, href) else: get_relevant_text_block(href, vectorizer, title) nb_non_empty += 1