last version

2025-04-15 14:46:34 +00:00 · 2024-05-05 20:57:11 +02:00 · 2024-05-05 20:57:11 +02:00 · d627defd1d
commit d627defd1d
parent 3c35edb0d5
10 changed files with 315 additions and 227 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 91
+version: 92
 binding_name: null
 model_name: null
 model_variant: null
@ -88,6 +88,14 @@ xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
 xtts_current_language: en
+xtts_stream_chunk_size: 100
+xtts_temperature: 0.75
+xtts_length_penalty: 1.0
+xtts_repetition_penalty: 5.0
+xtts_top_k: 50
+xtts_top_p: 0.85
+xtts_speed: 1
+xtts_enable_text_splitting: true

 # Image generation service
 enable_sd_service: false
--- a/events/lollms_interactive_events.py
+++ b/events/lollms_interactive_events.py
@ -18,8 +18,8 @@ from ascii_colors import ASCIIColors
 from lollms.personality import MSG_TYPE, AIPersonality
 from lollms.types import MSG_TYPE, SENDER_TYPES
 from lollms.utilities import load_config, trace_exception, gc
-from lollms.utilities import find_first_available_file_index, convert_language_name, PackageManager, run_async
-from lollms.security import forbid_remote_access
+from lollms.utilities import find_first_available_file_index, convert_language_name, PackageManager, run_async, add_period
+from lollms.security import forbid_remote_access, check_access
 from lollms_webui import LOLLMSWebUI
 from pathlib import Path
 from typing import List
@ -56,6 +56,7 @@ def add_events(sio:socketio):

    @sio.on('start_audio_stream')
    def start_audio_stream(sid):
+        client = check_access(lollmsElfServer, sid)
        if lollmsElfServer.config.headless_server_mode:
            return {"status":False,"error":"Start recording is blocked when in headless mode for obvious security reasons!"}

@ -68,15 +69,38 @@ def add_events(sio:socketio):
            lollmsElfServer.rec_output_folder = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_rec"
            lollmsElfServer.rec_output_folder.mkdir(exist_ok=True, parents=True)
            lollmsElfServer.summoned = False
-            lollmsElfServer.audio_cap = AudioRecorder(client.discussion.discussion_folder/"audio"/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
+            lollmsElfServer.audio_cap = AudioRecorder( client.discussion.discussion_folder/"audio"/"rt.wav", sio, callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
            lollmsElfServer.audio_cap.start_recording()
-        except:
+        except Exception as ex:
+            trace_exception(ex)
            lollmsElfServer.InfoMessage("Couldn't load media library.\nYou will not be able to perform any of the media linked operations. please verify the logs and install any required installations")



    @sio.on('stop_audio_stream')
    def stop_audio_stream(sid):
+        client = check_access(lollmsElfServer, sid)
        lollmsElfServer.info("Stopping audio capture")
-        lollmsElfServer.audio_cap.stop_recording()
+        text = lollmsElfServer.audio_cap.stop_recording()
+        if lollmsElfServer.config.debug:
+            ASCIIColors.yellow(text)
+        
+        ai_text = lollmsElfServer.receive_and_generate(text["text"], client)
+        
+        if lollmsElfServer.tts and lollmsElfServer.tts.ready:
+            personality_audio:Path = lollmsElfServer.personality.personality_package_path/"audio"
+            voice=lollmsElfServer.config.xtts_current_voice
+            if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
+                voices_folder = personality_audio
+            elif voice!="main_voice":
+                voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
+            else:
+                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+            language = lollmsElfServer.config.xtts_current_language# convert_language_name()
+            lollmsElfServer.tts.set_speaker_folder(voices_folder)
+            preprocessed_text= add_period(ai_text)
+            voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
+
+            lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, language=language)
+

--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit fd20ec2859333fb93fd576fecbd712015e1c25e1
+Subproject commit 1377c5b953e47c69b1b44e09be0133f8eef8881d
--- a/lollms_webui.py
+++ b/lollms_webui.py
@ -939,6 +939,9 @@ class LOLLMSWebUI(LOLLMSElfServer):
        Processes a chunk of generated text
        """
        client = self.session.get_client(client_id)
+        if chunk is None:
+            return
+        
        if chunk is not None:
            if not client_id in list(self.session.clients.keys()):
                self.error("Connection lost", client_id=client_id)
@ -1051,7 +1054,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
                self.personality.text_files = client.discussion.text_files
                self.personality.image_files = client.discussion.image_files
                self.personality.audio_files = client.discussion.audio_files
-                self.personality.processor.run_workflow(prompt, full_prompt, callback, context_details,client=client)
+                output = self.personality.processor.run_workflow(prompt, full_prompt, callback, context_details,client=client)
            except Exception as ex:
                trace_exception(ex)
                # Catch the exception and get the traceback as a list of strings
@ -1061,13 +1064,15 @@ class LOLLMSWebUI(LOLLMSElfServer):
                ASCIIColors.error(f"Workflow run failed.\nError:{ex}")
                ASCIIColors.error(traceback_text)
                if callback:
-                    callback(f"Workflow run failed\nError:{ex}", MSG_TYPE.MSG_TYPE_EXCEPTION)                   
+                    callback(f"Workflow run failed\nError:{ex}", MSG_TYPE.MSG_TYPE_EXCEPTION)
+                return          
            print("Finished executing the workflow")
-            return
+            return output


-        self._generate(full_prompt, n_predict, client_id, callback)
+        txt = self._generate(full_prompt, n_predict, client_id, callback)
        ASCIIColors.success("\nFinished executing the generation")
+        return txt

    def _generate(self, prompt, n_predict, client_id, callback=None):
        client = self.session.get_client(client_id)
@ -1307,7 +1312,7 @@ class LOLLMSWebUI(LOLLMSElfServer):
            self.busy=False
            return ""

-    def receive_and_generate(self, text, client:Client, callback):
+    def receive_and_generate(self, text, client:Client, callback=None):
        prompt = text
        try:
            nb_tokens = len(self.model.tokenize(prompt))
@ -1323,5 +1328,14 @@ class LOLLMSWebUI(LOLLMSElfServer):
            parent_message_id=self.message_id,
            nb_tokens=nb_tokens
        )
-        discussion_messages, current_message, tokens, context_details, internet_search_infos = self.prepare_query(client.client_id, -1, False, n_tokens=self.config.min_n_predict, force_using_internet=False)
-        return self.generate(discussion_messages, current_message, context_details, self.config.ctx_size-len(tokens)-1, client.client_id, callback)
+        discussion_messages, current_message, tokens, context_details, internet_search_infos = self.prepare_query(client.client_id, client.discussion.current_message.id, False, n_tokens=self.config.min_n_predict, force_using_internet=False)
+        self.new_message(
+                        client.client_id, 
+                        self.personality.name,
+                        message_type= MSG_TYPE.MSG_TYPE_FULL,
+                        content=""
+        )
+        client.generated_text = ""
+        self.generate(discussion_messages, current_message, context_details, self.config.ctx_size-len(tokens)-1, client.client_id, callback if callback else partial(self.process_chunk, client_id=client.client_id))
+        self.close_message(client.client_id)        
+        return client.generated_text
--- a/web/dist/assets/index-a07e77b6.css
+++ b/web/dist/assets/index-a07e77b6.css
--- a/web/dist/assets/index-a71ecc82.js
+++ b/web/dist/assets/index-a71ecc82.js
--- a/web/dist/index.html
+++ b/web/dist/index.html
@ -6,8 +6,8 @@
    
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LoLLMS WebUI - Welcome</title>
-    <script type="module" crossorigin src="/assets/index-73d394ff.js"></script>
-    <link rel="stylesheet" href="/assets/index-58b402c9.css">
+    <script type="module" crossorigin src="/assets/index-a71ecc82.js"></script>
+    <link rel="stylesheet" href="/assets/index-a07e77b6.css">
  </head>
  <body>
    <div id="app"></div>
--- a/web/src/components/Message.vue
+++ b/web/src/components/Message.vue
@ -520,7 +520,7 @@ export default {
            }
        },
        speak() {
-            if(this.$store.state.config.xtts_enable){
+            if(this.$store.state.config.xtts_enable && this.$store.state.config.xtts_use_streaming_mode){
                this.isSpeaking = true;
                axios.post("./text2Audio",{text:this.message.content}).then(response => {
                    this.isSpeaking = false;
@ -753,12 +753,13 @@ export default {
            }
        },
        'message.content': function (newContent) {
-            if(this.$store.state.config.auto_speak){
-                if(!this.isSpeaking){
-                    // Watch for changes to this.message.content and call the checkForFullSentence method
-                    this.checkForFullSentence();
+            if(this.$store.state.config.auto_speak)
+                if(!(this.$store.state.config.xtts_enable && this.$store.state.config.xtts_use_streaming_mode)){
+                    if(!this.isSpeaking){
+                        // Watch for changes to this.message.content and call the checkForFullSentence method
+                        this.checkForFullSentence();
+                    }
                }
-            }
        },
        'message.ui': function (newContent) {
            console.log("ui changed")
--- a/web/src/views/DiscussionsView.vue
+++ b/web/src/views/DiscussionsView.vue
@ -241,7 +241,7 @@
                <div class="container pt-4 pb-50 mb-50 w-full">
                    <TransitionGroup v-if="discussionArr.length > 0" name="list">
                        <Message v-for="(msg, index) in discussionArr" 
-                            :key="msg.id" :message="msg"  :id="'msg-' + msg.id"
+                            :key="msg.id" :message="msg"  :id="'msg-' + msg.id" :ref="'msg-' + msg.id"
                            :host="host"
                            ref="messages"
                            
@ -1659,12 +1659,12 @@ export default {
            })
        },
        finalMsgEvent(msgObj) {
-            console.log("final", msgObj)
+            let index=0;

            // Last message contains halucination suppression so we need to update the message content too
            this.discussion_id = msgObj.discussion_id
            if (this.currentDiscussion.id == this.discussion_id) {
-                const index = this.discussionArr.findIndex((x) => x.id == msgObj.id)
+                index = this.discussionArr.findIndex((x) => x.id == msgObj.id)
                this.discussionArr[index].content = msgObj.content
                this.discussionArr[index].finished_generating_at = msgObj.finished_generating_at

@ -1682,9 +1682,16 @@ export default {
            this.isGenerating = false
            this.setDiscussionLoading(this.currentDiscussion.id, this.isGenerating)
            this.chime.play()
-            const index = this.discussionArr.findIndex((x) => x.id == msgObj.id)
+            index = this.discussionArr.findIndex((x) => x.id == msgObj.id)
            const messageItem = this.discussionArr[index]            
            messageItem.status_message = "Done"
+            console.log("final", msgObj)
+            if(this.$store.state.config.auto_speak && (this.$store.state.config.xtts_enable && this.$store.state.config.xtts_use_streaming_mode)){
+                index = this.discussionArr.findIndex((x) => x.id == msgObj.id)
+                let message_component = this.$refs['msg-' + msgObj.id][0]
+                console.log(message_component)
+                message_component.speak()
+            }

        },
        copyToClipBoard(messageEntry) {
--- a/web/src/views/SettingsView.vue
+++ b/web/src/views/SettingsView.vue
@ -885,6 +885,22 @@
                                </Card>
                                <Card title="Whisper audio transcription" :is_subcard="true" class="pb-2  m-2">
                                    <table class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500">
+                                        <tr>
+                                        <td style="min-width: 200px;">
+                                            <label for="whisper_activate" class="text-sm font-bold" style="margin-right: 1rem;">Activate Whisper at startup:</label>
+                                        </td>
+                                        <td>
+                                            <div class="flex flex-row">
+                                            <input
+                                            type="checkbox"
+                                            id="whisper_activate"
+                                            required
+                                            v-model="configFile.whisper_activate"
+                                            @change="settingsChanged=true"
+                                            class="mt-1 px-2 py-1 border border-gray-300 rounded  dark:bg-gray-600"
+                                            >
+                                            </div>
+                                        </td>                                        </tr>
                                        <tr>
                                        <td style="min-width: 200px;">
                                            <label for="whisper_model" class="text-sm font-bold" style="margin-right: 1rem;">Whisper model:</label>
@ -904,7 +920,7 @@
                                            </select>
                                            </div>
                                        </td>
-                                        </tr>
+                                        </tr>                                        

                                    </table>
                                </Card>
@ -2022,6 +2038,24 @@
                                </div>
                            </td>
                            </tr>                    
+                            <tr>
+                            <td style="min-width: 200px;">
+                                <label for="xtts_stream_chunk_size" class="text-sm font-bold" style="margin-right: 1rem;">xtts stream chunk size:</label>
+                            </td>
+                            <td>
+                                <div class="flex flex-row">
+                                    <input
+                                        type="text"
+                                        id="xtts_stream_chunk_size"
+                                        required
+                                        v-model="configFile.xtts_stream_chunk_size"
+                                        @change="settingsChanged=true"
+                                        class="mt-1 px-2 py-1 border border-gray-300 rounded  dark:bg-gray-600"
+                                    >
+                                </div>
+                            </td>
+                            </tr>
+
                        </table>

                    </Card>