new xtts

2025-04-15 22:56:37 +00:00 · 2024-05-05 17:28:45 +02:00 · 2024-05-05 17:28:45 +02:00 · 3c35edb0d5
commit 3c35edb0d5
parent ab4ce7a857
13 changed files with 287 additions and 230 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -83,7 +83,7 @@ copy_to_clipboard_add_all_details: false
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
-xtts_use_deepspeed: true
+xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
 xtts_current_voice: null
@ -186,4 +186,5 @@ activate_audio_infos: true


 # whisper configuration
+whisper_activate: false
 whisper_model: base
--- a/endpoints/lollms_advanced.py
+++ b/endpoints/lollms_advanced.py
@ -13,10 +13,11 @@ from pydantic import BaseModel, Field
 from starlette.responses import StreamingResponse
 from lollms.types import MSG_TYPE
 from lollms.main_config import BaseConfig
-from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception, show_yes_no_dialog
+from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception, show_yes_no_dialog, add_period
 from lollms.security import sanitize_path, forbid_remote_access, check_access
 from ascii_colors import ASCIIColors
 from lollms.databases.discussions_database import DiscussionsDB
+from lollms.client_session import Client
 from pathlib import Path
 from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
 import tqdm
@ -54,7 +55,8 @@ from utilities.execution_engines.svg_execution_engine import execute_svg

 router = APIRouter()
 lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
-
+class Identification(BaseModel):
+    client_id:str

 class CodeRequest(BaseModel):
    client_id: str  = Field(...)
@ -403,8 +405,10 @@ async def open_discussion_folder(request: FolderRequest):
        lollmsElfServer.error(ex)
        return {"status": False, "error": "An error occurred while processing the request"}

-@router.get("/start_recording")
-def start_recording():
+@router.post("/start_recording")
+def start_recording(data:Identification):
+    client = check_access(lollmsElfServer, data.client_id)
+
    if lollmsElfServer.config.headless_server_mode:
        return {"status":False,"error":"Start recording is blocked when in headless mode for obvious security reasons!"}

@ -417,14 +421,16 @@ def start_recording():
        lollmsElfServer.rec_output_folder = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_rec"
        lollmsElfServer.rec_output_folder.mkdir(exist_ok=True, parents=True)
        lollmsElfServer.summoned = False
-        lollmsElfServer.audio_cap = AudioRecorder(lollmsElfServer.sio,lollmsElfServer.rec_output_folder/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
+        lollmsElfServer.audio_cap = AudioRecorder(client.discussion.discussion_folder/"audio"/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
        lollmsElfServer.audio_cap.start_recording()
    except:
        lollmsElfServer.InfoMessage("Couldn't load media library.\nYou will not be able to perform any of the media linked operations. please verify the logs and install any required installations")


-@router.get("/stop_recording")
-def stop_recording():
+@router.post("/stop_recording")
+def stop_recording(data:Identification):
+    client = check_access(lollmsElfServer, data.client_id)
+
    if lollmsElfServer.config.headless_server_mode:
        return {"status":False,"error":"Stop recording is blocked when in headless mode for obvious security reasons!"}

@ -433,5 +439,21 @@ def stop_recording():

    lollmsElfServer.info("Stopping audio capture")
    text = lollmsElfServer.audio_cap.stop_recording()
-    return text
+    ai_text = lollmsElfServer.receive_and_generate(text, client, n_predict=lollmsElfServer.config, callback= lollmsElfServer.tasks_library.sink)
+    if lollmsElfServer.tts and lollmsElfServer.tts.ready:
+        personality_audio:Path = lollmsElfServer.personality.personality_package_path/"audio"
+        voice=lollmsElfServer.config.xtts_current_voice
+        if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
+            voices_folder = personality_audio
+        elif voice!="main_voice":
+            voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
+        else:
+            voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+        language = lollmsElfServer.config.xtts_current_language# convert_language_name()
+        lollmsElfServer.tts.set_speaker_folder(voices_folder)
+        preprocessed_text= add_period(ai_text)
+        voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
+
+        lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, language=language)
+    return preprocessed_text

--- a/events/lollms_interactive_events.py
+++ b/events/lollms_interactive_events.py
@ -56,18 +56,25 @@ def add_events(sio:socketio):

    @sio.on('start_audio_stream')
    def start_audio_stream(sid):
+        if lollmsElfServer.config.headless_server_mode:
+            return {"status":False,"error":"Start recording is blocked when in headless mode for obvious security reasons!"}
+
+        if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
+            return {"status":False,"error":"Start recording is blocked when the server is exposed outside for very obvious reasons!"}
+
        lollmsElfServer.info("Starting audio capture")
        try:
            from lollms.media import AudioRecorder
            lollmsElfServer.rec_output_folder = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_rec"
            lollmsElfServer.rec_output_folder.mkdir(exist_ok=True, parents=True)
            lollmsElfServer.summoned = False
-            lollmsElfServer.audio_cap = AudioRecorder(sio,lollmsElfServer.rec_output_folder/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer)
+            lollmsElfServer.audio_cap = AudioRecorder(client.discussion.discussion_folder/"audio"/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
            lollmsElfServer.audio_cap.start_recording()
        except:
            lollmsElfServer.InfoMessage("Couldn't load media library.\nYou will not be able to perform any of the media linked operations. please verify the logs and install any required installations")


+
    @sio.on('stop_audio_stream')
    def stop_audio_stream(sid):
        lollmsElfServer.info("Stopping audio capture")
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit c370b6ecceaa4437de1379ad1e08287b71bb51ca
+Subproject commit fd20ec2859333fb93fd576fecbd712015e1c25e1
--- a/lollms_webui.py
+++ b/lollms_webui.py
@ -21,7 +21,7 @@ from lollms.com import NotificationType, NotificationDisplayType, LoLLMsCom
 from lollms.app import LollmsApplication
 from lollms.utilities import File64BitsManager, PromptReshaper, PackageManager, find_first_available_file_index, run_async, is_asyncio_loop_running, yes_or_no_input, process_ai_output
 from lollms.generation import RECEPTION_MANAGER, ROLE_CHANGE_DECISION, ROLE_CHANGE_OURTPUT
-
+from lollms.client_session import Client
 import git
 import asyncio
 import os
@ -1306,3 +1306,22 @@ class LOLLMSWebUI(LOLLMSElfServer):
            print()
            self.busy=False
            return ""
+
+    def receive_and_generate(self, text, client:Client, callback):
+        prompt = text
+        try:
+            nb_tokens = len(self.model.tokenize(prompt))
+        except:
+            nb_tokens = None
+        ump = self.config.discussion_prompt_separator +self.config.user_name.strip() if self.config.use_user_name_in_discussions else self.personality.user_message_prefix
+        message = client.discussion.add_message(
+            message_type    = MSG_TYPE.MSG_TYPE_FULL.value,
+            sender_type     = SENDER_TYPES.SENDER_TYPES_USER.value,
+            sender          = ump.replace(self.config.discussion_prompt_separator,"").replace(":",""),
+            content         = prompt,
+            metadata        = None,
+            parent_message_id=self.message_id,
+            nb_tokens=nb_tokens
+        )
+        discussion_messages, current_message, tokens, context_details, internet_search_infos = self.prepare_query(client.client_id, -1, False, n_tokens=self.config.min_n_predict, force_using_internet=False)
+        return self.generate(discussion_messages, current_message, context_details, self.config.ctx_size-len(tokens)-1, client.client_id, callback)
--- a/web/dist/assets/index-58b402c9.css
+++ b/web/dist/assets/index-58b402c9.css
--- a/web/dist/assets/index-73d394ff.js
+++ b/web/dist/assets/index-73d394ff.js
--- a/web/dist/index.html
+++ b/web/dist/index.html
@ -6,8 +6,8 @@
    
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LoLLMS WebUI - Welcome</title>
-    <script type="module" crossorigin src="/assets/index-f1cd900e.js"></script>
-    <link rel="stylesheet" href="/assets/index-f6ab05b8.css">
+    <script type="module" crossorigin src="/assets/index-73d394ff.js"></script>
+    <link rel="stylesheet" href="/assets/index-58b402c9.css">
  </head>
  <body>
    <div id="app"></div>
--- a/web/src/components/Message.vue
+++ b/web/src/components/Message.vue
@ -153,7 +153,7 @@
                                    <i data-feather="volume-2"></i>
                                </div>
                            </div>    
-                            <div class="flex flex-row items-center">
+                            <div v-if="this.$store.state.config.xtts_enable && !this.$store.state.config.xtts_use_streaming_mode" class="flex flex-row items-center">
                                <div v-if="!isSynthesizingVoice" class="text-lg hover:text-red-600 duration-75 active:scale-90 p-2 cursor-pointer" 
                                    title="generate_audio"
                                    @click.stop="read()"
@ -214,7 +214,7 @@
                    </div>

                    <DynamicUIRenderer v-if="message.ui !== null && message.ui !== undefined && message.ui !== ''" class="w-full h-full" :code="message.ui"></DynamicUIRenderer>
-                    <audio controls autoplay v-if="audio_url!=null" :key="audio_url">
+                    <audio controls v-if="audio_url!=null" :key="audio_url">
                        <source :src="audio_url" type="audio/wav"  ref="audio_player" >
                        Your browser does not support the audio element.
                    </audio>  
--- a/web/src/components/Navigation.vue
+++ b/web/src/components/Navigation.vue
@ -44,7 +44,7 @@
              ComfyUI
            </RouterLink>
            <RouterLink 
-              v-if="$store.state.config.xtts_enable" 
+              v-if="$store.state.config.xtts_enable && $store.state.config.xtts_use_streaming_mode" 
              :to="{ name: 'interactive' }" 
              class="inline-block border-l border-t border-r rounded-t py-2 px-4 text-blue-700 font-semibold"
              :class="{ 
--- a/web/src/components/WelcomeComponent.vue
+++ b/web/src/components/WelcomeComponent.vue
@ -23,14 +23,20 @@

 <script>
 import storeLogo from '@/assets/logo.png'
+
 export default {
    name: 'WelcomeComponent',
+    computed:{
+        storeLogo(){
+            if (this.$store.state.config){
+                return storeLogo
+            }
+            return this.$store.state.config.app_custom_logo!=''?'/user_infos/'+this.$store.state.config.app_custom_logo:storeLogo
+        },   
+    },
    data(){
-        storeLogo:storeLogo
    },
    setup() {
-
-
        return {}
    }
 }
--- a/web/src/views/PlayGroundView.vue
+++ b/web/src/views/PlayGroundView.vue
@ -135,7 +135,7 @@

              <span>Cursor position {{ cursorPosition }}</span>
            </div>
-            <audio controls autoplay v-if="audio_url!=null"  :key="audio_url">
+            <audio controls v-if="audio_url!=null"  :key="audio_url">
                <source :src="audio_url" type="audio/wav"  ref="audio_player">
                Your browser does not support the audio element.
            </audio>  
@ -913,7 +913,7 @@ export default {
    startRecording(){
      this.pending = true;
      if(!this.is_recording){
-        axios.get('/start_recording').then(response => {
+        axios.post('/start_recording', {client_id:this.$store.state.client_id}).then(response => {
          this.is_recording = true;
          this.pending = false;
          console.log(response.data)
--- a/web/src/views/SettingsView.vue
+++ b/web/src/views/SettingsView.vue
@ -1917,6 +1917,8 @@
                                <div class="flex flex-row">
                                <button class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" @click="reinstallAudioService">install xtts service</button>
                                <button class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" @click="startAudioService">start xtts service</button>
+                                <a class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" :href="this.$store.state.config.xtts_base_url+'/docs'" target="_blank">show xtts service entries</a>
+                                <a class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" href="https://github.com/ParisNeo/xtts-api-server/blob/main/LICENSE" target="_blank">licence</a>
                                </div>
                            </td>
                            </tr>                                        
@ -1939,11 +1941,11 @@
                            </tr> 
                            <tr>
                            <td style="min-width: 200px;">
-                                <label for="current_language" class="text-sm font-bold" style="margin-right: 1rem;">Current language:</label>
+                                <label for="xtts_current_language" class="text-sm font-bold" style="margin-right: 1rem;">Current language:</label>
                            </td>
                            <td>
                                <div class="flex flex-row">
-                                    <select v-model="current_language" @change="settingsChanged=true" :disabled="!xtts_enable">
+                                    <select v-model="xtts_current_language" @change="settingsChanged=true" :disabled="!xtts_enable">
                                    <option v-for="(value, key) in voice_languages" :key="key" :value="value">
                                        {{ key }}
                                    </option>
@ -5495,9 +5497,9 @@ export default {
                this.$store.state.config.xtts_enable = value
            },
        },
-        current_language:{
+        xtts_current_language:{
            get() {
-                return this.$store.state.config.current_language;
+                return this.$store.state.config.xtts_current_language;
            },
            set(value) {
                // You should not set the value directly here; use the updateSetting method instead