fixed multimodal mopdels

2025-04-15 22:46:35 +00:00 · 2024-05-05 01:17:38 +02:00 · 2024-05-05 01:17:38 +02:00 · 32c7f3ef9f
commit 32c7f3ef9f
parent 70320699b1
3 changed files with 35 additions and 20 deletions
--- a/lollms/server/endpoints/lollms_xtts.py
+++ b/lollms/server/endpoints/lollms_xtts.py
@ -122,22 +122,25 @@ async def text2Audio(request: LollmsText2AudioRequest):
                                                    use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
                                                    use_streaming_mode=lollmsElfServer.config.xtts_use_streaming_mode                                                    
                                                )
-            language = lollmsElfServer.config.current_language# convert_language_name()
-            if voice!="main_voice":
-                voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
+            if lollmsElfServer.tts.ready:
+                language = lollmsElfServer.config.xtts_current_language# convert_language_name()
+                if voice!="main_voice":
+                    voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
+                else:
+                    voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+                lollmsElfServer.tts.set_speaker_folder(voices_folder)
+                url = f"audio/{output_fn}"
+                preprocessed_text= add_period(request.text)
+                voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
+                if len(voice_file)==0:
+                    return {"status":False,"error":"Voice not found"}
+                if not lollmsElfServer.config.xtts_use_streaming_mode:
+                    lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
+                    lollmsElfServer.info(f"Voice file ready at {url}")
+                else:
+                    lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
            else:
-                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
-            lollmsElfServer.tts.set_speaker_folder(voices_folder)
-            url = f"audio/{output_fn}"
-            preprocessed_text= add_period(request.text)
-            voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice]
-            if len(voice_file)==0:
-                return {"status":False,"error":"Voice not found"}
-            if not lollmsElfServer.config.xtts_use_streaming_mode:
-                lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
-                lollmsElfServer.info(f"Voice file ready at {url}")
-            else:
-                lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
+                lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.") 
                 
            return {"url": url}
        except Exception as ex:
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -22,6 +22,7 @@ import subprocess
 import time
 import json
 import platform
+import threading
 from dataclasses import dataclass
 from PIL import Image, PngImagePlugin
 from enum import Enum
@ -108,13 +109,14 @@ class LollmsXTTS:
                    app:LollmsApplication, 
                    xtts_base_url=None,
                    share=False,
-                    max_retries=10,
+                    max_retries=20,
                    voice_samples_path="",
                    wait_for_service=True,
                    use_deep_speed=False,
                    use_streaming_mode = True

                    ):
+        self.ready = False
        if xtts_base_url=="" or xtts_base_url=="http://127.0.0.1:8020":
            xtts_base_url = None
        # Get the current directory
@ -157,8 +159,7 @@ class LollmsXTTS:
            self.process = self.run_xtts_api_server()

        # Wait until the service is available at http://127.0.0.1:7860/
-        if wait_for_service:
-            self.wait_for_service(max_retries=max_retries)
+        self.wait_for_service_in_another_thread(max_retries=max_retries)


    def run_xtts_api_server(self):
@ -172,7 +173,12 @@ class LollmsXTTS:
            options += " --streaming-mode --streaming-mode-improve --stream-play-sync"
        process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", wait= False)
        return process
-    
+
+    def wait_for_service_in_another_thread(self, max_retries=150, show_warning=True):
+        thread = threading.Thread(target=self.wait_for_service, args=(max_retries, show_warning))
+        thread.start()
+        return thread
+
    def wait_for_service(self, max_retries = 150, show_warning=True):
        url = f"{self.xtts_base_url}/languages"
        # Adjust this value as needed
@ -186,12 +192,13 @@ class LollmsXTTS:
                    print("Service is available.")
                    if self.app is not None:
                        self.app.success("XTTS Service is now available.")
+                    self.ready = True
                    return True
            except requests.exceptions.RequestException:
                pass

            retries += 1
-            time.sleep(3)
+            time.sleep(5)
            ASCIIColors.yellow("Waiting ...")
        if show_warning:
            print("Service did not become available within the given time.")
@ -265,5 +272,9 @@ class LollmsXTTS:
        if response.status_code == 200:
            print("Request successful")
            # You can access the response data using response.json()
+            # Open a new file in binary write mode
+            with open(self.output_folder/file_name_or_path, 'wb') as file:
+                # Write the binary content to the file
+                file.write(response.content)
        else:
            print("Request failed with status code:", response.status_code)
--- a/lollms/services/xtts/voices/main_voice.json
+++ b/lollms/services/xtts/voices/main_voice.json