added fish

This commit is contained in:
Saifeddine ALOUI 2024-09-20 01:46:59 +02:00
parent 676c80f825
commit c4ecf825b6
5 changed files with 137 additions and 5 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 136
version: 137
binding_name: null
model_name: null
model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts, fish_tts (API key required)
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -168,6 +168,10 @@ elevenlabs_tts_model_id: "eleven_turbo_v2_5"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
fish_tts_key: ""
fish_tts_voice: "default"
# ***************** TTI *****************
use_negative_prompt: true

View File

@ -398,6 +398,9 @@ class LollmsApplication(LoLLMsCom):
elif self.config.active_tts_service == "openai_tts":
from lollms.services.tts.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
elif self.config.active_tts_service == "fish_tts":
from lollms.services.tts.fish.lollms_fish_tts import LollmsFishAudioTTS
self.tts = LollmsFishAudioTTS(self, self.config.fish_tts_voice, self.config.fish_tts_key)
elif self.config.active_tts_service == "xtts" and self.xtts:
self.tts = self.xtts

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 136
version: 137
binding_name: null
model_name: null
model_variant: null
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
# -------------------- Services global configurations --------------------------
# Select the active test to speach, text to image and speach to text services
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts, fish_tts (API key required)
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
active_ttm_service: "None" # musicgen (offline)
@ -164,10 +164,14 @@ openai_tts_voice: "alloy"
elevenlabs_tts_key: ""
elevenlabs_tts_model_id: "eleven_monolingual_v2"
elevenlabs_tts_model_id: "eleven_turbo_v2_5"
elevenlabs_tts_voice_stability: 0.5
elevenlabs_tts_voice_boost: 0.5
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
fish_tts_key: ""
fish_tts_voice: "default"
# ***************** TTI *****************
use_negative_prompt: true

View File

@ -0,0 +1,120 @@
from pathlib import Path
from typing import List, Dict, Any
import httpx
import ormsgpack
from pydantic import BaseModel
from lollms.app import LollmsApplication
from lollms.paths import LollmsPaths
from lollms.tts import LollmsTTS
from lollms.utilities import PackageManager, find_next_available_filename
if not PackageManager.check_package_installed("sounddevice"):
PackageManager.install_package("sounddevice")
if not PackageManager.check_package_installed("soundfile"):
PackageManager.install_package("soundfile")
import sounddevice as sd
import soundfile as sf
class ServeReferenceAudio(BaseModel):
audio: bytes
text: str
class ServeTTSRequest(BaseModel):
text: str
chunk_length: int = 200
format: str = "mp3"
mp3_bitrate: int = 128
references: List[ServeReferenceAudio] = []
reference_id: str | None = None
normalize: bool = True
latency: str = "normal"
def get_FishAudioTTS(lollms_paths: LollmsPaths):
return LollmsFishAudioTTS
class LollmsFishAudioTTS(LollmsTTS):
def __init__(
self,
app: LollmsApplication,
voice_name: str = "default",
api_key: str = "",
output_path: Path | str = None,
reference_folder: Path | str = None
):
super().__init__("fishaudio_tts", app, "default", voice_name, api_key, output_path)
self.reference_folder = Path(reference_folder) if reference_folder else None
self.voices = self._load_voices()
self.ready = True
def _load_voices(self) -> List[str]:
if not self.reference_folder or not self.reference_folder.exists():
return ["default"]
voices = []
for audio_file in self.reference_folder.glob("*.mp3"):
text_file = audio_file.with_suffix(".txt")
if text_file.exists():
voices.append(audio_file.stem)
return voices or ["default"]
def set_voice(self, voice_name: str):
if voice_name in self.voices:
self.voice_name = voice_name
else:
raise ValueError(f"Voice '{voice_name}' not found. Available voices: {', '.join(self.voices)}")
def _get_reference_audio(self, voice_name: str) -> ServeReferenceAudio | None:
if voice_name == "default":
return None
audio_file = self.reference_folder / f"{voice_name}.mp3"
text_file = self.reference_folder / f"{voice_name}.txt"
if audio_file.exists() and text_file.exists():
return ServeReferenceAudio(
audio=audio_file.read_bytes(),
text=text_file.read_text()
)
return None
def tts_file(self, text, file_name_or_path: Path | str = None, speaker=None, language="en", use_threading=False):
speech_file_path = Path(file_name_or_path) if file_name_or_path else self._get_output_path("mp3")
reference = self._get_reference_audio(self.voice_name)
request = ServeTTSRequest(
text=text,
references=[reference] if reference else []
)
with httpx.Client() as client:
with client.stream(
"POST",
"https://api.fish.audio/v1/tts",
content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
headers={
"authorization": f"Bearer {self.api_key}",
"content-type": "application/msgpack",
},
timeout=None,
) as response:
with open(speech_file_path, "wb") as f:
for chunk in response.iter_bytes():
f.write(chunk)
return speech_file_path
def tts_audio(self, text, speaker: str = None, file_name_or_path: Path | str = None, language="en", use_threading=False):
speech_file_path = self.tts_file(text, file_name_or_path, speaker, language, use_threading)
def play_audio(file_path):
data, fs = sf.read(file_path, dtype='float32')
sd.play(data, fs)
sd.wait()
play_audio(speech_file_path)
def _get_output_path(self, extension: str) -> Path:
if self.output_path:
return find_next_available_filename(self.output_path, f"output.{extension}")
return find_next_available_filename(Path.cwd(), f"output.{extension}")

View File

@ -55,6 +55,7 @@ class LollmsXTTS(LollmsTTS):
self.stop_event = threading.Event()
# Show a cool LOGO using ASCIIColors
ASCIIColors.red("")
ASCIIColors.red(" __ ___ __ __ __ __ ___ _ ")
ASCIIColors.red(" / / /___\/ / / / /\/\ / _\ \ \/ / |_| |_ ___ ")
ASCIIColors.red(" / / // // / / / / \ \ \ _____\ /| __| __/ __| ")