sync and updated settings vectorization section

This commit is contained in:
Saifeddine ALOUI 2024-12-08 02:19:47 +01:00
parent b0be10f1c6
commit 9c7d7aeb2e
6 changed files with 250 additions and 412 deletions

View File

@ -281,7 +281,7 @@ audio_silenceTimer: 5000
# Data vectorization # Data vectorization
rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data rag_databases: [] # This is the list of paths to database sources. Each database is a folder containing data
rag_vectorizer: semantic # possible values semantic, tfidf, openai rag_vectorizer: semantic # possible values semantic, tfidf, openai
rag_vectorizer_model: sentence-transformers/bert-base-nli-mean-tokens # The model name if applicable rag_vectorizer_model: "BAAI/bge-m3" # The model name if applicable
rag_vectorizer_parameters: null # Parameters of the model in json format rag_vectorizer_parameters: null # Parameters of the model in json format
rag_chunk_size: 512 # number of tokens per chunk rag_chunk_size: 512 # number of tokens per chunk
rag_overlap: 0 # number of tokens of overlap rag_overlap: 0 # number of tokens of overlap
@ -305,20 +305,9 @@ activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database skills_lib_database_name: "default" # Default skills database
max_summary_size: 512 # in tokens max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"
data_vectorization_sentense_transformer_model: "BAAI/bge-m3" # you can use another model by setting its name here or its path
data_vectorization_save_db: true # For each new session, new files rag_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
data_vectorization_chunk_size: 512 # chunk size rag_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
data_vectorization_overlap_size: 128 # overlap between chunks size
data_vectorization_nb_chunks: 2 # number of chunks to use
data_vectorization_put_chunk_informations_into_context: false # if true then each chunk will be preceded by its information which may waste some context space but allow the ai to point where it found th einformation
data_vectorization_build_keys_words: true # If true, when querrying the database, we use keywords generated from the user prompt instead of the prompt itself.
data_vectorization_force_first_chunk: false # If true, the first chunk of the document will systematically be used
data_vectorization_make_persistance: false # If true, the data will be persistant webween runs
# Activate internet search # Activate internet search
activate_internet_search: false activate_internet_search: false

@ -1 +1 @@
Subproject commit 1a26259e86396dafe2151b7d5b2d0972b6843127 Subproject commit 1fa9b0b37b2d4d6e710d677776334e00ed1333b8

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

4
web/dist/index.html vendored
View File

@ -6,8 +6,8 @@
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-svg.js"></script> <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-svg.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LoLLMS WebUI</title> <title>LoLLMS WebUI</title>
<script type="module" crossorigin src="/assets/index-BzGsYtS_.js"></script> <script type="module" crossorigin src="/assets/index-98J5TYdO.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-BMbzMYXz.css"> <link rel="stylesheet" crossorigin href="/assets/index-VVi1JRr7.css">
</head> </head>
<body> <body>
<div id="app"></div> <div id="app"></div>

View File

@ -1328,52 +1328,17 @@
</Card> </Card>
<Card title="Data Vectorization" :is_subcard="true" class="pb-2 m-2"> <Card title="Data Vectorization" :is_subcard="true" class="pb-2 m-2">
<table class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500"> <table class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500">
<tr> <tr>
<td style="min-width: 200px;"> <td style="min-width: 200px;">
<label for="data_vectorization_save_db" class="text-sm font-bold" style="margin-right: 1rem;">Save vectorized database:</label> <label for="rag_build_keys_words" class="text-sm font-bold" style="margin-right: 1rem;">Reformulate prompt before querying database (advised):</label>
</td> </td>
<td> <td>
<div class="flex flex-row"> <div class="flex flex-row">
<input <input
type="checkbox" type="checkbox"
id="data_vectorization_save_db" id="rag_build_keys_words"
required required
v-model="configFile.data_vectorization_save_db" v-model="configFile.rag_build_keys_words"
@change="settingsChanged=true"
class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</div>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_visualize_on_vectorization" class="text-sm font-bold" style="margin-right: 1rem;">show vectorized data:</label>
</td>
<td>
<div class="flex flex-row">
<input
type="checkbox"
id="data_vectorization_visualize_on_vectorization"
required
v-model="configFile.data_vectorization_visualize_on_vectorization"
@change="settingsChanged=true"
class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</div>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_build_keys_words" class="text-sm font-bold" style="margin-right: 1rem;">Reformulate prompt before querying database (advised):</label>
</td>
<td>
<div class="flex flex-row">
<input
type="checkbox"
id="data_vectorization_build_keys_words"
required
v-model="configFile.data_vectorization_build_keys_words"
@change="settingsChanged=true" @change="settingsChanged=true"
class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600" class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
> >
@ -1382,32 +1347,15 @@
</tr> </tr>
<tr> <tr>
<td style="min-width: 200px;"> <td style="min-width: 200px;">
<label for="data_vectorization_force_first_chunk" class="text-sm font-bold" style="margin-right: 1rem;">Force adding the first chunk of the file to the context:</label> <label for="rag_put_chunk_informations_into_context" class="text-sm font-bold" style="margin-right: 1rem;">Put Chunk Information Into Context:</label>
</td> </td>
<td> <td>
<div class="flex flex-row"> <div class="flex flex-row">
<input <input
type="checkbox" type="checkbox"
id="data_vectorization_force_first_chunk" id="rag_put_chunk_informations_into_context"
required required
v-model="configFile.data_vectorization_force_first_chunk" v-model="configFile.rag_put_chunk_informations_into_context"
@change="settingsChanged=true"
class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</div>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_put_chunk_informations_into_context" class="text-sm font-bold" style="margin-right: 1rem;">Put Chunk Information Into Context:</label>
</td>
<td>
<div class="flex flex-row">
<input
type="checkbox"
id="data_vectorization_put_chunk_informations_into_context"
required
v-model="configFile.data_vectorization_put_chunk_informations_into_context"
@change="settingsChanged=true" @change="settingsChanged=true"
class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600" class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
> >
@ -1416,58 +1364,7 @@
</tr> </tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_method" class="text-sm font-bold" style="margin-right: 1rem;">Data vectorization method:</label>
</td>
<td>
<select
id="data_vectorization_method"
required
v-model="configFile.data_vectorization_method"
@change="settingsChanged=true"
class="w-full mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
<option value="tfidf_vectorizer">tfidf Vectorizer</option>
<option value="bm25_vectorizer">bm25 Vectorizer</option>
<option value="model_embedding">Model Embedding</option>
<option value="sentense_transformer">Sentense Transformer</option>
</select>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_sentense_transformer_model" class="text-sm font-bold" style="margin-right: 1rem;">Data vectorization model (for Sentense Transformer):</label>
</td>
<td style="width: 100%;">
<input
type="text"
id="data_vectorization_sentense_transformer_model"
required
v-model="configFile.data_vectorization_sentense_transformer_model"
@change="settingsChanged=true"
class="w-full w-full mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600 dark:bg-gray-600"
>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_visualization_method" class="text-sm font-bold" style="margin-right: 1rem;">Data visualization method:</label>
</td>
<td>
<select
id="data_visualization_method"
required
v-model="configFile.data_visualization_method"
@change="settingsChanged=true"
class="w-full mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
<option value="PCA">PCA</option>
<option value="TSNE">TSNE</option>
</select>
</td>
</tr>
<tr> <tr>
<td style="min-width: 200px;"> <td style="min-width: 200px;">
<label for="data_vectorization_save_db" class="text-sm font-bold" style="margin-right: 1rem;">Save the new files to the database (The database wil always grow and continue to be the same over many sessions):</label> <label for="data_vectorization_save_db" class="text-sm font-bold" style="margin-right: 1rem;">Save the new files to the database (The database wil always grow and continue to be the same over many sessions):</label>
@ -1485,55 +1382,7 @@
</div> </div>
</td> </td>
</tr> </tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_chunk_size" class="text-sm font-bold" style="margin-right: 1rem;">Data vectorization chunk size(tokens):</label>
</td>
<td>
<input id="data_vectorization_chunk_size" v-model="configFile.data_vectorization_chunk_size"
@change="settingsChanged=true"
type="range" min="0" max="64000" step="1"
class="flex-none h-2 mt-14 mb-2 w-full bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 focus:ring-blue-500 focus:border-blue-500 dark:border-gray-600 dark:placeholder-gray-400 dark:focus:ring-blue-500 dark:focus:border-blue-500">
<input v-model="configFile.data_vectorization_chunk_size"
type="number"
@change="settingsChanged=true"
class="w-full mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_overlap_size" class="text-sm font-bold" style="margin-right: 1rem;">Data vectorization overlap size(tokens):</label>
</td>
<td>
<input id="data_vectorization_overlap_size" v-model="configFile.data_vectorization_overlap_size"
@change="settingsChanged=true"
type="range" min="0" max="64000" step="1"
class="flex-none h-2 mt-14 mb-2 w-full bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 focus:ring-blue-500 focus:border-blue-500 dark:border-gray-600 dark:placeholder-gray-400 dark:focus:ring-blue-500 dark:focus:border-blue-500">
<input v-model="configFile.data_vectorization_overlap_size"
type="number"
@change="settingsChanged=true"
class="w-full mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_overlap_size" class="text-sm font-bold" style="margin-right: 1rem;">Number of chunks to use for each message:</label>
</td>
<td>
<input id="data_vectorization_nb_chunks" v-model="configFile.data_vectorization_nb_chunks"
@change="settingsChanged=true"
type="range" min="0" max="1000" step="1"
class="flex-none h-2 mt-14 mb-2 w-full bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700 focus:ring-blue-500 focus:border-blue-500 dark:border-gray-600 dark:placeholder-gray-400 dark:focus:ring-blue-500 dark:focus:border-blue-500">
<input v-model="configFile.data_vectorization_nb_chunks"
type="number"
@change="settingsChanged=true"
class="w-full mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</td>
</tr>
</table> </table>
</Card> </Card>