fixed xtts

2025-01-21 03:55:01 +00:00 · 2024-05-05 02:23:02 +02:00 · 2024-05-05 02:23:02 +02:00 · e63dcb81ad
commit e63dcb81ad
parent 501911c488
4 changed files with 37 additions and 8 deletions
--- a/lollms/app.py
+++ b/lollms/app.py
@ -269,8 +269,15 @@ class LollmsApplication(LoLLMsCom):
        if self.config.enable_voice_service:
            try:
                from lollms.services.xtts.lollms_xtts import LollmsXTTS
+                voice=self.config.xtts_current_voice
+                if voice!="main_voice":
+                    voices_folder = self.lollms_paths.custom_voices_path
+                else:
+                    voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+
                self.tts = LollmsXTTS(
                                        self,
+                                        voices_folder=voices_folder,
                                        voice_samples_path=self.lollms_paths.custom_voices_path, 
                                        xtts_base_url=self.config.xtts_base_url,
                                        wait_for_service=False,
--- a/lollms/server/endpoints/lollms_xtts.py
+++ b/lollms/server/endpoints/lollms_xtts.py
@ -95,9 +95,17 @@ async def text2Audio(request: LollmsText2AudioRequest):
        # Get the JSON data from the POST request.
        try:
            from lollms.services.xtts.lollms_xtts import LollmsXTTS
+            voice=lollmsElfServer.config.xtts_current_voice
            if lollmsElfServer.tts is None:
+                voice=lollmsElfServer.config.xtts_current_voice
+                if voice!="main_voice":
+                    voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
+                else:
+                    voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+
                lollmsElfServer.tts = LollmsXTTS(
                    lollmsElfServer, 
+                    voices_folder=voices_folder,
                    voice_samples_path=Path(__file__).parent/"voices", 
                    xtts_base_url= lollmsElfServer.config.xtts_base_url,
                    use_deep_speed= lollmsElfServer.config.xtts_use_deep_speed,
@ -142,7 +150,7 @@ async def text2Audio(request: LollmsText2AudioRequest):
                    lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
            else:
                lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.") 
-                 
+                return  {"status":False, "error":"Service not ready yet"} 
            return {"url": url}
        except Exception as ex:
            trace_exception(ex)
@ -177,8 +185,15 @@ def start_xtts():
        lollmsElfServer.ShowBlockingMessage("Starting xTTS api server\nPlease stand by")
        from lollms.services.xtts.lollms_xtts import LollmsXTTS
        if lollmsElfServer.tts is None:
+            voice=lollmsElfServer.config.xtts_current_voice
+            if voice!="main_voice":
+                voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
+            else:
+                voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
+
            lollmsElfServer.tts = LollmsXTTS(
                lollmsElfServer, 
+                voices_folder=voices_folder,
                voice_samples_path=Path(__file__).parent/"voices", 
                xtts_base_url= lollmsElfServer.config.xtts_base_url,
                use_deep_speed=lollmsElfServer.config.xtts_use_deepspeed,
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -161,6 +161,8 @@ class LollmsXTTS:

        # Wait until the service is available at http://127.0.0.1:7860/
        if wait_for_service:
+            self.wait_for_service()
+        else:
            self.wait_for_service_in_another_thread(max_retries=max_retries)


@ -181,28 +183,32 @@ class LollmsXTTS:
        return thread

    def wait_for_service(self, max_retries = 150, show_warning=True):
+        print(f"Waiting for xtts service (max_retries={max_retries})")
        url = f"{self.xtts_base_url}/languages"
        # Adjust this value as needed
        retries = 0

        while retries < max_retries or max_retries<0:
-            
            try:
                response = requests.get(url)
                if response.status_code == 200:
+                    print(f"voices_folder is {self.voices_folder}.")
+                    if self.voices_folder is not None:
+                        print("Senerating sample audio.")
+                        voice_file =  [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
+                        self.tts_to_audio("xtts is ready",voice_file[0].name)
                    print("Service is available.")
                    if self.app is not None:
                        self.app.success("XTTS Service is now available.")
-                    voice_file =  [v for v in self.voices_folder.iterdir() if v.suffix==".wav"]
-                    self.tts_to_audio("xtts is ready",voice_file[0])
                    self.ready = True
                    return True
-            except requests.exceptions.RequestException:
+            except:
                pass

            retries += 1
+            ASCIIColors.yellow("Waiting for xtts...")
            time.sleep(5)
-            ASCIIColors.yellow("Waiting ...")
+
        if show_warning:
            print("Service did not become available within the given time.")
            if self.app is not None:
--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@ -178,7 +178,8 @@ def process_ai_output(output, images, output_folder):
            cv2.putText(image, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # Save the modified image
-        output_path = Path(output_folder)/f"image_{image_index}.jpg"
+        random_stuff = np.random
+        output_path = Path(output_folder)/f"image_{image_index}_{random_stuff}.jpg"
        cv2.imwrite(str(output_path), image)

    # Remove bounding box text from the output