added fish

2025-04-09 03:44:14 +00:00 · 2024-09-20 01:46:59 +02:00 · 2024-09-20 01:46:59 +02:00 · c4ecf825b6
commit c4ecf825b6
parent 676c80f825
5 changed files with 137 additions and 5 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 136
+version: 137
 binding_name: null
 model_name: null
 model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts, fish_tts (API key required)
 active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -168,6 +168,10 @@ elevenlabs_tts_model_id: "eleven_turbo_v2_5"
 elevenlabs_tts_voice_stability: 0.5
 elevenlabs_tts_voice_boost: 0.5
 elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
+
+fish_tts_key: ""
+fish_tts_voice: "default"
+
 # ***************** TTI *****************

 use_negative_prompt: true
--- a/lollms/app.py
+++ b/lollms/app.py
@ -398,6 +398,9 @@ class LollmsApplication(LoLLMsCom):
            elif self.config.active_tts_service == "openai_tts":
                from lollms.services.tts.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
                self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice,  self.config.openai_tts_key)
+            elif self.config.active_tts_service == "fish_tts":
+                from lollms.services.tts.fish.lollms_fish_tts import LollmsFishAudioTTS
+                self.tts = LollmsFishAudioTTS(self, self.config.fish_tts_voice,  self.config.fish_tts_key)
            elif self.config.active_tts_service == "xtts" and self.xtts:
                self.tts = self.xtts

--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 136
+version: 137
 binding_name: null
 model_name: null
 model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false

 # -------------------- Services global configurations --------------------------
 # Select the active test to speach, text to image and speach to text services
-active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
+active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts, fish_tts (API key required)
 active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
 active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
 active_ttm_service: "None" # musicgen (offline)
@ -164,10 +164,14 @@ openai_tts_voice: "alloy"


 elevenlabs_tts_key: ""
-elevenlabs_tts_model_id: "eleven_monolingual_v2"
+elevenlabs_tts_model_id: "eleven_turbo_v2_5"
 elevenlabs_tts_voice_stability: 0.5
 elevenlabs_tts_voice_boost: 0.5
 elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
+
+fish_tts_key: ""
+fish_tts_voice: "default"
+
 # ***************** TTI *****************

 use_negative_prompt: true
--- a/lollms/services/tts/fish/lollms_fish_tts.py
+++ b/lollms/services/tts/fish/lollms_fish_tts.py
@ -0,0 +1,120 @@
+from pathlib import Path
+from typing import List, Dict, Any
+import httpx
+import ormsgpack
+from pydantic import BaseModel
+from lollms.app import LollmsApplication
+from lollms.paths import LollmsPaths
+from lollms.tts import LollmsTTS
+from lollms.utilities import PackageManager, find_next_available_filename
+
+if not PackageManager.check_package_installed("sounddevice"):
+    PackageManager.install_package("sounddevice")
+if not PackageManager.check_package_installed("soundfile"):
+    PackageManager.install_package("soundfile")
+
+import sounddevice as sd
+import soundfile as sf
+
+class ServeReferenceAudio(BaseModel):
+    audio: bytes
+    text: str
+
+class ServeTTSRequest(BaseModel):
+    text: str
+    chunk_length: int = 200
+    format: str = "mp3"
+    mp3_bitrate: int = 128
+    references: List[ServeReferenceAudio] = []
+    reference_id: str | None = None
+    normalize: bool = True
+    latency: str = "normal"
+
+def get_FishAudioTTS(lollms_paths: LollmsPaths):
+    return LollmsFishAudioTTS
+
+class LollmsFishAudioTTS(LollmsTTS):
+    def __init__(
+        self,
+        app: LollmsApplication,
+        voice_name: str = "default",
+        api_key: str = "",
+        output_path: Path | str = None,
+        reference_folder: Path | str = None
+    ):
+        super().__init__("fishaudio_tts", app, "default", voice_name, api_key, output_path)
+        self.reference_folder = Path(reference_folder) if reference_folder else None
+        self.voices = self._load_voices()
+        self.ready = True
+
+    def _load_voices(self) -> List[str]:
+        if not self.reference_folder or not self.reference_folder.exists():
+            return ["default"]
+        
+        voices = []
+        for audio_file in self.reference_folder.glob("*.mp3"):
+            text_file = audio_file.with_suffix(".txt")
+            if text_file.exists():
+                voices.append(audio_file.stem)
+        return voices or ["default"]
+
+    def set_voice(self, voice_name: str):
+        if voice_name in self.voices:
+            self.voice_name = voice_name
+        else:
+            raise ValueError(f"Voice '{voice_name}' not found. Available voices: {', '.join(self.voices)}")
+
+    def _get_reference_audio(self, voice_name: str) -> ServeReferenceAudio | None:
+        if voice_name == "default":
+            return None
+        
+        audio_file = self.reference_folder / f"{voice_name}.mp3"
+        text_file = self.reference_folder / f"{voice_name}.txt"
+        
+        if audio_file.exists() and text_file.exists():
+            return ServeReferenceAudio(
+                audio=audio_file.read_bytes(),
+                text=text_file.read_text()
+            )
+        return None
+
+    def tts_file(self, text, file_name_or_path: Path | str = None, speaker=None, language="en", use_threading=False):
+        speech_file_path = Path(file_name_or_path) if file_name_or_path else self._get_output_path("mp3")
+        
+        reference = self._get_reference_audio(self.voice_name)
+        request = ServeTTSRequest(
+            text=text,
+            references=[reference] if reference else []
+        )
+
+        with httpx.Client() as client:
+            with client.stream(
+                "POST",
+                "https://api.fish.audio/v1/tts",
+                content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
+                headers={
+                    "authorization": f"Bearer {self.api_key}",
+                    "content-type": "application/msgpack",
+                },
+                timeout=None,
+            ) as response:
+                with open(speech_file_path, "wb") as f:
+                    for chunk in response.iter_bytes():
+                        f.write(chunk)
+
+        return speech_file_path
+
+    def tts_audio(self, text, speaker: str = None, file_name_or_path: Path | str = None, language="en", use_threading=False):
+        speech_file_path = self.tts_file(text, file_name_or_path, speaker, language, use_threading)
+        
+        def play_audio(file_path):
+            data, fs = sf.read(file_path, dtype='float32')
+            sd.play(data, fs)
+            sd.wait()
+
+        play_audio(speech_file_path)
+
+    def _get_output_path(self, extension: str) -> Path:
+        if self.output_path:
+            return find_next_available_filename(self.output_path, f"output.{extension}")
+        return find_next_available_filename(Path.cwd(), f"output.{extension}")
--- a/lollms/services/tts/xtts/lollms_xtts.py
+++ b/lollms/services/tts/xtts/lollms_xtts.py
@ -55,6 +55,7 @@ class LollmsXTTS(LollmsTTS):
        self.stop_event = threading.Event()

        # Show a cool LOGO using ASCIIColors
+        ASCIIColors.red("")
        ASCIIColors.red("   __    ___  __    __          __     __  ___   _        ")
        ASCIIColors.red("  / /   /___\/ /   / /   /\/\  / _\    \ \/ / |_| |_ ___  ")
        ASCIIColors.red(" / /   //  // /   / /   /    \ \ \ _____\  /| __| __/ __| ")