This commit is contained in:
Saifeddine ALOUI 2024-07-21 22:37:12 +02:00
parent e58077fd89
commit ce7b936f4f
2 changed files with 8 additions and 4 deletions

View File

@ -974,7 +974,11 @@ class LollmsApplication(LoLLMsCom):
if len(chunks)>0:
for chunk in chunks:
internet_search_infos.append(chunk.doc.title)
internet_search_infos.append({
"title":chunk.doc.title,
"url":chunk.doc.path,
"brief":chunk.text
})
internet_search_results += f"{self.start_header_id_template}search result chunk{self.end_header_id_template}\nchunk_infos:{chunk.doc.path}\nchunk_title:{chunk.doc.title}\ncontent:{chunk.text}\n"
else:
internet_search_results += "The search response was empty!\nFailed to recover useful information from the search engine.\n"

View File

@ -337,12 +337,12 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
from lollmsvectordb.lollms_vectorizers.word2vec_vectorizer import Word2VecVectorizer
v = Word2VecVectorizer()
vectorizer = VectorDatabase("", v, TikTokenTokenizer())
vectorizer = VectorDatabase("", v, TikTokenTokenizer(), internet_vectorization_chunk_size, internet_vectorization_overlap_size)
formatted_text = ""
nb_non_empty = 0
ise = InternetSearchEnhancer()
results = ise.search(query)
results = ise.search(query, num_results=internet_nb_search_pages)
if len(results)>0:
for i, result in enumerate(results):
@ -351,7 +351,7 @@ def internet_search_with_vectorization(query, chromedriver_path=None, internet_n
brief = result["snippet"]
href = result["url"]
if quick_search:
vectorizer.add_document({'url':href, 'title':title, 'brief': brief}, brief)
vectorizer.add_document(title, brief, href)
else:
get_relevant_text_block(href, vectorizer, title)
nb_non_empty += 1