mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
added fish
This commit is contained in:
parent
676c80f825
commit
c4ecf825b6
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 136
|
||||
version: 137
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts, fish_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
@ -168,6 +168,10 @@ elevenlabs_tts_model_id: "eleven_turbo_v2_5"
|
||||
elevenlabs_tts_voice_stability: 0.5
|
||||
elevenlabs_tts_voice_boost: 0.5
|
||||
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
|
||||
|
||||
fish_tts_key: ""
|
||||
fish_tts_voice: "default"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
use_negative_prompt: true
|
||||
|
@ -398,6 +398,9 @@ class LollmsApplication(LoLLMsCom):
|
||||
elif self.config.active_tts_service == "openai_tts":
|
||||
from lollms.services.tts.open_ai_tts.lollms_openai_tts import LollmsOpenAITTS
|
||||
self.tts = LollmsOpenAITTS(self, self.config.openai_tts_model, self.config.openai_tts_voice, self.config.openai_tts_key)
|
||||
elif self.config.active_tts_service == "fish_tts":
|
||||
from lollms.services.tts.fish.lollms_fish_tts import LollmsFishAudioTTS
|
||||
self.tts = LollmsFishAudioTTS(self, self.config.fish_tts_voice, self.config.fish_tts_key)
|
||||
elif self.config.active_tts_service == "xtts" and self.xtts:
|
||||
self.tts = self.xtts
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 136
|
||||
version: 137
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -101,7 +101,7 @@ copy_to_clipboard_add_all_details: false
|
||||
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts (API key required)
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required), elevenlabs_tts, fish_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), diffusers (offline), diffusers_client (online), dall-e (online), midjourney (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
@ -164,10 +164,14 @@ openai_tts_voice: "alloy"
|
||||
|
||||
|
||||
elevenlabs_tts_key: ""
|
||||
elevenlabs_tts_model_id: "eleven_monolingual_v2"
|
||||
elevenlabs_tts_model_id: "eleven_turbo_v2_5"
|
||||
elevenlabs_tts_voice_stability: 0.5
|
||||
elevenlabs_tts_voice_boost: 0.5
|
||||
elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL
|
||||
|
||||
fish_tts_key: ""
|
||||
fish_tts_voice: "default"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
use_negative_prompt: true
|
||||
|
120
lollms/services/tts/fish/lollms_fish_tts.py
Normal file
120
lollms/services/tts/fish/lollms_fish_tts.py
Normal file
@ -0,0 +1,120 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
import httpx
|
||||
import ormsgpack
|
||||
from pydantic import BaseModel
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.paths import LollmsPaths
|
||||
from lollms.tts import LollmsTTS
|
||||
from lollms.utilities import PackageManager, find_next_available_filename
|
||||
|
||||
if not PackageManager.check_package_installed("sounddevice"):
|
||||
PackageManager.install_package("sounddevice")
|
||||
if not PackageManager.check_package_installed("soundfile"):
|
||||
PackageManager.install_package("soundfile")
|
||||
|
||||
import sounddevice as sd
|
||||
import soundfile as sf
|
||||
|
||||
class ServeReferenceAudio(BaseModel):
|
||||
audio: bytes
|
||||
text: str
|
||||
|
||||
class ServeTTSRequest(BaseModel):
|
||||
text: str
|
||||
chunk_length: int = 200
|
||||
format: str = "mp3"
|
||||
mp3_bitrate: int = 128
|
||||
references: List[ServeReferenceAudio] = []
|
||||
reference_id: str | None = None
|
||||
normalize: bool = True
|
||||
latency: str = "normal"
|
||||
|
||||
def get_FishAudioTTS(lollms_paths: LollmsPaths):
|
||||
return LollmsFishAudioTTS
|
||||
|
||||
class LollmsFishAudioTTS(LollmsTTS):
|
||||
def __init__(
|
||||
self,
|
||||
app: LollmsApplication,
|
||||
voice_name: str = "default",
|
||||
api_key: str = "",
|
||||
output_path: Path | str = None,
|
||||
reference_folder: Path | str = None
|
||||
):
|
||||
super().__init__("fishaudio_tts", app, "default", voice_name, api_key, output_path)
|
||||
self.reference_folder = Path(reference_folder) if reference_folder else None
|
||||
self.voices = self._load_voices()
|
||||
self.ready = True
|
||||
|
||||
def _load_voices(self) -> List[str]:
|
||||
if not self.reference_folder or not self.reference_folder.exists():
|
||||
return ["default"]
|
||||
|
||||
voices = []
|
||||
for audio_file in self.reference_folder.glob("*.mp3"):
|
||||
text_file = audio_file.with_suffix(".txt")
|
||||
if text_file.exists():
|
||||
voices.append(audio_file.stem)
|
||||
return voices or ["default"]
|
||||
|
||||
def set_voice(self, voice_name: str):
|
||||
if voice_name in self.voices:
|
||||
self.voice_name = voice_name
|
||||
else:
|
||||
raise ValueError(f"Voice '{voice_name}' not found. Available voices: {', '.join(self.voices)}")
|
||||
|
||||
def _get_reference_audio(self, voice_name: str) -> ServeReferenceAudio | None:
|
||||
if voice_name == "default":
|
||||
return None
|
||||
|
||||
audio_file = self.reference_folder / f"{voice_name}.mp3"
|
||||
text_file = self.reference_folder / f"{voice_name}.txt"
|
||||
|
||||
if audio_file.exists() and text_file.exists():
|
||||
return ServeReferenceAudio(
|
||||
audio=audio_file.read_bytes(),
|
||||
text=text_file.read_text()
|
||||
)
|
||||
return None
|
||||
|
||||
def tts_file(self, text, file_name_or_path: Path | str = None, speaker=None, language="en", use_threading=False):
|
||||
speech_file_path = Path(file_name_or_path) if file_name_or_path else self._get_output_path("mp3")
|
||||
|
||||
reference = self._get_reference_audio(self.voice_name)
|
||||
request = ServeTTSRequest(
|
||||
text=text,
|
||||
references=[reference] if reference else []
|
||||
)
|
||||
|
||||
with httpx.Client() as client:
|
||||
with client.stream(
|
||||
"POST",
|
||||
"https://api.fish.audio/v1/tts",
|
||||
content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
|
||||
headers={
|
||||
"authorization": f"Bearer {self.api_key}",
|
||||
"content-type": "application/msgpack",
|
||||
},
|
||||
timeout=None,
|
||||
) as response:
|
||||
with open(speech_file_path, "wb") as f:
|
||||
for chunk in response.iter_bytes():
|
||||
f.write(chunk)
|
||||
|
||||
return speech_file_path
|
||||
|
||||
def tts_audio(self, text, speaker: str = None, file_name_or_path: Path | str = None, language="en", use_threading=False):
|
||||
speech_file_path = self.tts_file(text, file_name_or_path, speaker, language, use_threading)
|
||||
|
||||
def play_audio(file_path):
|
||||
data, fs = sf.read(file_path, dtype='float32')
|
||||
sd.play(data, fs)
|
||||
sd.wait()
|
||||
|
||||
play_audio(speech_file_path)
|
||||
|
||||
def _get_output_path(self, extension: str) -> Path:
|
||||
if self.output_path:
|
||||
return find_next_available_filename(self.output_path, f"output.{extension}")
|
||||
return find_next_available_filename(Path.cwd(), f"output.{extension}")
|
@ -55,6 +55,7 @@ class LollmsXTTS(LollmsTTS):
|
||||
self.stop_event = threading.Event()
|
||||
|
||||
# Show a cool LOGO using ASCIIColors
|
||||
ASCIIColors.red("")
|
||||
ASCIIColors.red(" __ ___ __ __ __ __ ___ _ ")
|
||||
ASCIIColors.red(" / / /___\/ / / / /\/\ / _\ \ \/ / |_| |_ ___ ")
|
||||
ASCIIColors.red(" / / // // / / / / \ \ \ _____\ /| __| __/ __| ")
|
||||
|
Loading…
Reference in New Issue
Block a user