tts_to_file now tts_to_audio

2025-04-16 23:18:53 +00:00 · 2024-05-05 01:17:41 +02:00 · 2024-05-05 01:17:41 +02:00 · 72dc903842
commit 72dc903842
parent e80c380e9a
8 changed files with 227 additions and 288 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 88
+version: 90
 binding_name: null
 model_name: null
 model_variant: null
@ -86,8 +86,8 @@ xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
-current_voice: null
-current_language: en
+xtts_current_voice: null
+xtts_current_language: en

 # Image generation service
 enable_sd_service: false
@ -145,11 +145,9 @@ audio_silenceTimer: 5000
 # Data vectorization
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summerize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit ec5428c68feaef544b582f862055d6204eecb7db
+Subproject commit 32c7f3ef9f330029e1958fbb964bb233a6d29e9d
--- a/lollms_webui.py
+++ b/lollms_webui.py
@ -444,6 +444,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
                                                        use_deep_speed=self.config.xtts_use_deepspeed,
                                                        use_streaming_mode=self.config.xtts_use_streaming_mode
                                                        )
+                            
                        except:
                            self.warning(f"Personality {personality.name} request using custom voice but couldn't load XTTS")
                except Exception as ex:
@ -1066,12 +1067,12 @@ class LOLLMSWebUI(LOLLMSElfServer):
        self.nb_received_tokens = 0
        self.start_time = datetime.now()
        if self.model is not None:
-            if self.model.binding_type==BindingType.TEXT_IMAGE and len(self.personality.image_files)>0:
+            if self.model.binding_type==BindingType.TEXT_IMAGE and len(client.discussion.image_files)>0:
                ASCIIColors.info(f"warmup for generating up to {n_predict} tokens")
                if self.config["override_personality_model_parameters"]:
                    output = self.model.generate_with_images(
                        prompt,
-                        self.personality.image_files,
+                        client.discussion.image_files,
                        callback=callback,
                        n_predict=n_predict,
                        temperature=self.config['temperature'],
@ -1097,7 +1098,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
                    ])
                    output = self.model.generate_with_images(
                        prompt,
-                        self.personality.image_files,
+                        client.discussion.image_files,
                        callback=callback,
                        n_predict=min(n_predict,self.personality.model_n_predicts),
                        temperature=self.personality.model_temperature,
@ -1109,7 +1110,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
                        n_threads=self.config['n_threads']
                    )
                    try:
-                        post_processed_output = process_ai_output(output, self.personality.image_files, client.discussion.discussion_folder)
+                        post_processed_output = process_ai_output(output, client.discussion.image_files, client.discussion.discussion_folder)
                        if len(post_processed_output)!=output:
                            self.process_chunk(post_processed_output, MSG_TYPE.MSG_TYPE_FULL,client_id=client_id)
                    except Exception as ex:
@ -1198,48 +1199,23 @@ class LOLLMSWebUI(LOLLMSElfServer):
                                                        use_deep_speed=self.config.xtts_use_deepspeed,
                                                        use_streaming_mode=self.config.xtts_use_streaming_mode                                                        
                                                    )
-                            language = convert_language_name(self.personality.language)
-                            self.tts.set_speaker_folder(Path(self.personality.audio_samples[0]).parent)
-                            fn = self.personality.name.lower().replace(' ',"_").replace('.','')    
-                            fn = f"{fn}_{message_id}.wav"
-                            url = f"audio/{fn}"
-                            self.tts.tts_to_file(client.generated_text, Path(self.personality.audio_samples[0]).name, f"{fn}", language=language)
-                            fl = f"\n".join([
-                            f"<audio controls>",
-                            f'    <source src="{url}" type="audio/wav">',
-                            f'    Your browser does not support the audio element.',
-                            f'</audio>'                        
-                            ])
-                            self.process_chunk("Generating voice output", MSG_TYPE.MSG_TYPE_STEP_END, {'status':True},client_id=client_id)
-                            self.process_chunk(fl,MSG_TYPE.MSG_TYPE_UI, client_id=client_id)
-                            
-                            """
-                            self.info("Creating audio output",10)
-                            self.personality.step_start("Creating audio output")
-                            if not PackageManager.check_package_installed("tortoise"):
-                                PackageManager.install_package("tortoise-tts")
-                            from tortoise import utils, api
-                            import sounddevice as sd
-                            if self.tts is None:
-                                self.tts = api.TextToSpeech( kv_cache=True, half=True)
-                            reference_clips = [utils.audio.load_audio(str(p), 22050) for p in self.personality.audio_samples]
-                            tk = self.model.tokenize(client.generated_text)
-                            if len(tk)>100:
-                                chunk_size = 100
-                                
-                                for i in range(0, len(tk), chunk_size):
-                                    chunk = self.model.detokenize(tk[i:i+chunk_size])
-                                    if i==0:
-                                        pcm_audio = self.tts.tts_with_preset(chunk, voice_samples=reference_clips, preset='fast').numpy().flatten()
-                                    else:
-                                        pcm_audio = np.concatenate([pcm_audio, self.tts.tts_with_preset(chunk, voice_samples=reference_clips, preset='ultra_fast').numpy().flatten()])
+                            if self.tts.ready:
+                                language = convert_language_name(self.personality.language)
+                                self.tts.set_speaker_folder(Path(self.personality.audio_samples[0]).parent)
+                                fn = self.personality.name.lower().replace(' ',"_").replace('.','')    
+                                fn = f"{fn}_{message_id}.wav"
+                                url = f"audio/{fn}"
+                                self.tts.tts_to_file(client.generated_text, Path(self.personality.audio_samples[0]).name, f"{fn}", language=language)
+                                fl = f"\n".join([
+                                f"<audio controls>",
+                                f'    <source src="{url}" type="audio/wav">',
+                                f'    Your browser does not support the audio element.',
+                                f'</audio>'                        
+                                ])
+                                self.process_chunk("Generating voice output", MSG_TYPE.MSG_TYPE_STEP_END, {'status':True},client_id=client_id)
+                                self.process_chunk(fl,MSG_TYPE.MSG_TYPE_UI, client_id=client_id)
                            else:
-                                pcm_audio = self.tts.tts_with_preset(client.generated_text, voice_samples=reference_clips, preset='fast').numpy().flatten()
-                            sd.play(pcm_audio, 22050)
-                            self.personality.step_end("Creating audio output")                        
-                            """
-
-
+                                self.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.") 

                        except Exception as ex:
                            ASCIIColors.error("Couldn't read")
--- a/voice_sample_110.wav
+++ b/voice_sample_110.wav
--- a/web/dist/assets/index-69152375.css
+++ b/web/dist/assets/index-69152375.css
--- a/web/dist/assets/index-9dd9fa43.js
+++ b/web/dist/assets/index-9dd9fa43.js
--- a/web/dist/index.html
+++ b/web/dist/index.html
@ -6,8 +6,8 @@
    
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LoLLMS WebUI - Welcome</title>
-    <script type="module" crossorigin src="/assets/index-a47cf3cb.js"></script>
-    <link rel="stylesheet" href="/assets/index-ea2b1cac.css">
+    <script type="module" crossorigin src="/assets/index-9dd9fa43.js"></script>
+    <link rel="stylesheet" href="/assets/index-69152375.css">
  </head>
  <body>
    <div id="app"></div>
--- a/web/src/views/SettingsView.vue
+++ b/web/src/views/SettingsView.vue
@ -865,23 +865,6 @@
                                        </td>
                                        </tr>   
                                        <tr>
-                                        <td style="min-width: 200px;">
-                                            <label for="current_language" class="text-sm font-bold" style="margin-right: 1rem;">Force AI to answer in this language:</label>
-                                        </td>
-                                        <td>
-                                            <div class="flex flex-row">
-                                            <input
-                                            type="text"
-                                            id="current_language"
-                                            required
-                                            v-model="configFile.current_language"
-                                            @change="settingsChanged=true"
-                                            class="mt-1 px-2 py-1 border border-gray-300 rounded  dark:bg-gray-600"
-                                            >
-                                            </div>
-                                        </td>
-                                        </tr>
-                                        <tr>
                                        <td style="min-width: 200px;">
                                            <label for="fun_mode" class="text-sm font-bold" style="margin-right: 1rem;">Fun mode:</label>
                                        </td>
@ -1101,23 +1084,6 @@
                            </td>
                            </tr>                                  
                            <tr>
-                            <td style="min-width: 200px;">
-                                <label for="summerize_discussion" class="text-sm font-bold" style="margin-right: 1rem;">Activate Continuous Learning from discussions:</label>
-                            </td>
-                            <td>
-                                <div class="flex flex-row">
-                                <input
-                                type="checkbox"
-                                id="summerize_discussion"
-                                required
-                                v-model="configFile.summerize_discussion"
-                                @change="settingsChanged=true"
-                                class="mt-1 px-2 py-1 border border-gray-300 rounded  dark:bg-gray-600"
-                                >
-                                </div>
-                            </td>
-                            </tr>                                         
-                            <tr>
                            <td style="min-width: 200px;">
                                <label for="data_vectorization_visualize_on_vectorization" class="text-sm font-bold" style="margin-right: 1rem;">show vectorized data:</label>
                            </td>
@ -1983,11 +1949,11 @@

                            <tr>
                            <td style="min-width: 200px;">
-                                <label for="current_voice" class="text-sm font-bold" style="margin-right: 1rem;">Current voice:</label>
+                                <label for="xtts_current_voice" class="text-sm font-bold" style="margin-right: 1rem;">Current voice:</label>
                            </td>
                            <td>
                                <div class="flex flex-row">
-                                    <select v-model="current_voice" @change="settingsChanged=true" :disabled="!enable_voice_service">
+                                    <select v-model="xtts_current_voice" @change="settingsChanged=true" :disabled="!enable_voice_service">
                                    <option v-for="voice in voices" :key="voice" :value="voice">
                                        {{ voice }}
                                    </option>
@ -3460,10 +3426,10 @@ export default {
        uploadLogo(event){
            const file = event.target.files[0]; // Get the selected file
            const formData = new FormData(); // Create a FormData object
-            formData.append('avatar', file); // Add the file to the form data with the key 'avatar'
+            formData.append('logo', file); // Add the file to the form data with the key 'avatar'
            console.log("Uploading avatar")
            // Make an API request to upload the avatar
-            axios.post('/upload_avatar', formData)
+            axios.post('/upload_logo', formData)
                .then(response => {
                    console.log("Logo uploaded successfully")
                    
@ -3472,9 +3438,8 @@ export default {
                    const fileName = response.data.fileName;
                    console.log("response",response);
                    this.app_custom_logo = fileName; // Update the user_avatar value with the file name
-                    this.configFile.app_custom_logo=fileName;
-
-                    this.update_setting("app_custom_logo", fileName, ()=>{}).then(()=>{})
+                    this.$store.state.config.app_custom_logo=fileName;
+                    this.settingsChanged = true
                })
                .catch(error => {
                console.error('Error uploading avatar:', error);
@ -3495,8 +3460,8 @@ export default {
                    const fileName = response.data.fileName;
                    console.log("response",response);
                    this.user_avatar = fileName; // Update the user_avatar value with the file name
-                    this.configFile.user_avatar=fileName;
-                    this.update_setting("user_avatar", fileName, ()=>{}).then(()=>{})
+                    this.$store.state.config.user_avatar=fileName;
+                    this.settingsChanged = true
                })
                .catch(error => {
                    console.error('Error uploading avatar:', error);
@ -5532,23 +5497,23 @@ export default {
                this.$store.state.config.current_language = value
            },
        },
-        current_voice:{
+        xtts_current_voice:{
            get() {
-                if (this.$store.state.config.current_voice===null || this.$store.state.config.current_voice===undefined){
-                    console.log("current voice", this.$store.state.config.current_voice)
+                if (this.$store.state.config.xtts_current_voice===null || this.$store.state.config.xtts_current_voice===undefined){
+                    console.log("current voice", this.$store.state.config.xtts_current_voice)
                    return "main_voice";
                }
-                return this.$store.state.config.current_voice;
+                return this.$store.state.config.xtts_current_voice;
            },
            set(value) {
                // You should not set the value directly here; use the updateSetting method instead
                if(value=="main_voice" || value===undefined){
                    console.log("Current voice set to None")
-                    this.$store.state.config.current_voice = null
+                    this.$store.state.config.xtts_current_voice = null
                }
                else{
                    console.log("Current voice set to ",value)
-                    this.$store.state.config.current_voice = value
+                    this.$store.state.config.xtts_current_voice = value
                }
            },
        },