tts_to_file now tts_to_audio

This commit is contained in:
Saifeddine ALOUI 2024-05-05 01:17:41 +02:00
parent e80c380e9a
commit 72dc903842
8 changed files with 227 additions and 288 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 88
version: 90
binding_name: null
model_name: null
model_variant: null
@ -86,8 +86,8 @@ xtts_base_url: http://localhost:8020
xtts_use_deepspeed: false
xtts_use_streaming_mode: true
auto_read: false
current_voice: null
current_language: en
xtts_current_voice: null
xtts_current_language: en
# Image generation service
enable_sd_service: false
@ -145,11 +145,9 @@ audio_silenceTimer: 5000
# Data vectorization
activate_skills_lib: false # Activate vectorizing previous conversations
skills_lib_database_name: "default" # Default skills database
summerize_discussion: false # activate discussion summary (better but adds computation time)
max_summary_size: 512 # in tokens
data_vectorization_visualize_on_vectorization: false
use_files: true # Activate using files
data_vectorization_activate: true # To activate/deactivate data vectorization
data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
data_visualization_method: "PCA" #"PCA" or "TSNE"

@ -1 +1 @@
Subproject commit ec5428c68feaef544b582f862055d6204eecb7db
Subproject commit 32c7f3ef9f330029e1958fbb964bb233a6d29e9d

View File

@ -444,6 +444,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
use_deep_speed=self.config.xtts_use_deepspeed,
use_streaming_mode=self.config.xtts_use_streaming_mode
)
except:
self.warning(f"Personality {personality.name} request using custom voice but couldn't load XTTS")
except Exception as ex:
@ -1066,12 +1067,12 @@ class LOLLMSWebUI(LOLLMSElfServer):
self.nb_received_tokens = 0
self.start_time = datetime.now()
if self.model is not None:
if self.model.binding_type==BindingType.TEXT_IMAGE and len(self.personality.image_files)>0:
if self.model.binding_type==BindingType.TEXT_IMAGE and len(client.discussion.image_files)>0:
ASCIIColors.info(f"warmup for generating up to {n_predict} tokens")
if self.config["override_personality_model_parameters"]:
output = self.model.generate_with_images(
prompt,
self.personality.image_files,
client.discussion.image_files,
callback=callback,
n_predict=n_predict,
temperature=self.config['temperature'],
@ -1097,7 +1098,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
])
output = self.model.generate_with_images(
prompt,
self.personality.image_files,
client.discussion.image_files,
callback=callback,
n_predict=min(n_predict,self.personality.model_n_predicts),
temperature=self.personality.model_temperature,
@ -1109,7 +1110,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
n_threads=self.config['n_threads']
)
try:
post_processed_output = process_ai_output(output, self.personality.image_files, client.discussion.discussion_folder)
post_processed_output = process_ai_output(output, client.discussion.image_files, client.discussion.discussion_folder)
if len(post_processed_output)!=output:
self.process_chunk(post_processed_output, MSG_TYPE.MSG_TYPE_FULL,client_id=client_id)
except Exception as ex:
@ -1198,48 +1199,23 @@ class LOLLMSWebUI(LOLLMSElfServer):
use_deep_speed=self.config.xtts_use_deepspeed,
use_streaming_mode=self.config.xtts_use_streaming_mode
)
language = convert_language_name(self.personality.language)
self.tts.set_speaker_folder(Path(self.personality.audio_samples[0]).parent)
fn = self.personality.name.lower().replace(' ',"_").replace('.','')
fn = f"{fn}_{message_id}.wav"
url = f"audio/{fn}"
self.tts.tts_to_file(client.generated_text, Path(self.personality.audio_samples[0]).name, f"{fn}", language=language)
fl = f"\n".join([
f"<audio controls>",
f' <source src="{url}" type="audio/wav">',
f' Your browser does not support the audio element.',
f'</audio>'
])
self.process_chunk("Generating voice output", MSG_TYPE.MSG_TYPE_STEP_END, {'status':True},client_id=client_id)
self.process_chunk(fl,MSG_TYPE.MSG_TYPE_UI, client_id=client_id)
"""
self.info("Creating audio output",10)
self.personality.step_start("Creating audio output")
if not PackageManager.check_package_installed("tortoise"):
PackageManager.install_package("tortoise-tts")
from tortoise import utils, api
import sounddevice as sd
if self.tts is None:
self.tts = api.TextToSpeech( kv_cache=True, half=True)
reference_clips = [utils.audio.load_audio(str(p), 22050) for p in self.personality.audio_samples]
tk = self.model.tokenize(client.generated_text)
if len(tk)>100:
chunk_size = 100
for i in range(0, len(tk), chunk_size):
chunk = self.model.detokenize(tk[i:i+chunk_size])
if i==0:
pcm_audio = self.tts.tts_with_preset(chunk, voice_samples=reference_clips, preset='fast').numpy().flatten()
else:
pcm_audio = np.concatenate([pcm_audio, self.tts.tts_with_preset(chunk, voice_samples=reference_clips, preset='ultra_fast').numpy().flatten()])
if self.tts.ready:
language = convert_language_name(self.personality.language)
self.tts.set_speaker_folder(Path(self.personality.audio_samples[0]).parent)
fn = self.personality.name.lower().replace(' ',"_").replace('.','')
fn = f"{fn}_{message_id}.wav"
url = f"audio/{fn}"
self.tts.tts_to_file(client.generated_text, Path(self.personality.audio_samples[0]).name, f"{fn}", language=language)
fl = f"\n".join([
f"<audio controls>",
f' <source src="{url}" type="audio/wav">',
f' Your browser does not support the audio element.',
f'</audio>'
])
self.process_chunk("Generating voice output", MSG_TYPE.MSG_TYPE_STEP_END, {'status':True},client_id=client_id)
self.process_chunk(fl,MSG_TYPE.MSG_TYPE_UI, client_id=client_id)
else:
pcm_audio = self.tts.tts_with_preset(client.generated_text, voice_samples=reference_clips, preset='fast').numpy().flatten()
sd.play(pcm_audio, 22050)
self.personality.step_end("Creating audio output")
"""
self.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.")
except Exception as ex:
ASCIIColors.error("Couldn't read")

BIN
voice_sample_110.wav Normal file

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

4
web/dist/index.html vendored
View File

@ -6,8 +6,8 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LoLLMS WebUI - Welcome</title>
<script type="module" crossorigin src="/assets/index-a47cf3cb.js"></script>
<link rel="stylesheet" href="/assets/index-ea2b1cac.css">
<script type="module" crossorigin src="/assets/index-9dd9fa43.js"></script>
<link rel="stylesheet" href="/assets/index-69152375.css">
</head>
<body>
<div id="app"></div>

View File

@ -865,23 +865,6 @@
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="current_language" class="text-sm font-bold" style="margin-right: 1rem;">Force AI to answer in this language:</label>
</td>
<td>
<div class="flex flex-row">
<input
type="text"
id="current_language"
required
v-model="configFile.current_language"
@change="settingsChanged=true"
class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</div>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="fun_mode" class="text-sm font-bold" style="margin-right: 1rem;">Fun mode:</label>
</td>
@ -1101,23 +1084,6 @@
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="summerize_discussion" class="text-sm font-bold" style="margin-right: 1rem;">Activate Continuous Learning from discussions:</label>
</td>
<td>
<div class="flex flex-row">
<input
type="checkbox"
id="summerize_discussion"
required
v-model="configFile.summerize_discussion"
@change="settingsChanged=true"
class="mt-1 px-2 py-1 border border-gray-300 rounded dark:bg-gray-600"
>
</div>
</td>
</tr>
<tr>
<td style="min-width: 200px;">
<label for="data_vectorization_visualize_on_vectorization" class="text-sm font-bold" style="margin-right: 1rem;">show vectorized data:</label>
</td>
@ -1983,11 +1949,11 @@
<tr>
<td style="min-width: 200px;">
<label for="current_voice" class="text-sm font-bold" style="margin-right: 1rem;">Current voice:</label>
<label for="xtts_current_voice" class="text-sm font-bold" style="margin-right: 1rem;">Current voice:</label>
</td>
<td>
<div class="flex flex-row">
<select v-model="current_voice" @change="settingsChanged=true" :disabled="!enable_voice_service">
<select v-model="xtts_current_voice" @change="settingsChanged=true" :disabled="!enable_voice_service">
<option v-for="voice in voices" :key="voice" :value="voice">
{{ voice }}
</option>
@ -3460,10 +3426,10 @@ export default {
uploadLogo(event){
const file = event.target.files[0]; // Get the selected file
const formData = new FormData(); // Create a FormData object
formData.append('avatar', file); // Add the file to the form data with the key 'avatar'
formData.append('logo', file); // Add the file to the form data with the key 'avatar'
console.log("Uploading avatar")
// Make an API request to upload the avatar
axios.post('/upload_avatar', formData)
axios.post('/upload_logo', formData)
.then(response => {
console.log("Logo uploaded successfully")
@ -3472,9 +3438,8 @@ export default {
const fileName = response.data.fileName;
console.log("response",response);
this.app_custom_logo = fileName; // Update the user_avatar value with the file name
this.configFile.app_custom_logo=fileName;
this.update_setting("app_custom_logo", fileName, ()=>{}).then(()=>{})
this.$store.state.config.app_custom_logo=fileName;
this.settingsChanged = true
})
.catch(error => {
console.error('Error uploading avatar:', error);
@ -3495,8 +3460,8 @@ export default {
const fileName = response.data.fileName;
console.log("response",response);
this.user_avatar = fileName; // Update the user_avatar value with the file name
this.configFile.user_avatar=fileName;
this.update_setting("user_avatar", fileName, ()=>{}).then(()=>{})
this.$store.state.config.user_avatar=fileName;
this.settingsChanged = true
})
.catch(error => {
console.error('Error uploading avatar:', error);
@ -5532,23 +5497,23 @@ export default {
this.$store.state.config.current_language = value
},
},
current_voice:{
xtts_current_voice:{
get() {
if (this.$store.state.config.current_voice===null || this.$store.state.config.current_voice===undefined){
console.log("current voice", this.$store.state.config.current_voice)
if (this.$store.state.config.xtts_current_voice===null || this.$store.state.config.xtts_current_voice===undefined){
console.log("current voice", this.$store.state.config.xtts_current_voice)
return "main_voice";
}
return this.$store.state.config.current_voice;
return this.$store.state.config.xtts_current_voice;
},
set(value) {
// You should not set the value directly here; use the updateSetting method instead
if(value=="main_voice" || value===undefined){
console.log("Current voice set to None")
this.$store.state.config.current_voice = null
this.$store.state.config.xtts_current_voice = null
}
else{
console.log("Current voice set to ",value)
this.$store.state.config.current_voice = value
this.$store.state.config.xtts_current_voice = value
}
},
},