diff --git a/configs/config.yaml b/configs/config.yaml index fcb8f5ef..b618038f 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -164,7 +164,7 @@ openai_tts_voice: "alloy" elevenlabs_tts_key: "" -elevenlabs_tts_model_id: "eleven_monolingual_v2" +elevenlabs_tts_model_id: "eleven_turbo_v2_5" elevenlabs_tts_voice_stability: 0.5 elevenlabs_tts_voice_boost: 0.5 elevenlabs_tts_voice_id: EXAVITQu4vr4xnSDxMaL diff --git a/endpoints/lollms_advanced.py b/endpoints/lollms_advanced.py index 78630046..efe9eb78 100644 --- a/endpoints/lollms_advanced.py +++ b/endpoints/lollms_advanced.py @@ -28,6 +28,11 @@ import re import subprocess from typing import Optional +from fastapi import FastAPI, Query, HTTPException +from fastapi.responses import FileResponse, StreamingResponse +from pydantic import BaseModel +import io + def validate_file_path(path): try: sanitized_path = sanitize_path(path, allow_absolute_path=False) @@ -47,7 +52,11 @@ from utilities.execution_engines.graphviz_execution_engine import execute_graphv from utilities.execution_engines.svg_execution_engine import execute_svg - +import os +from fastapi import FastAPI, UploadFile, File +from fastapi.responses import JSONResponse +import tempfile +import shutil # ----------------------- Defining router and main class ------------------------------ @@ -513,3 +522,61 @@ def stop_recording(data:Identification): else: return "" + +@router.post("/transcribe") +async def transcribe_audio(file: UploadFile = File(...)): + with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file: + # Copy the contents of the uploaded file to the temporary file + shutil.copyfileobj(file.file, temp_file) + temp_file_path = temp_file.name + + try: + if hasattr(lollmsElfServer, 'stt') and lollmsElfServer.stt: + text = lollmsElfServer.stt.transcribe(temp_file_path) + return JSONResponse(content={"transcription": text}) + else: + return JSONResponse(content={"error": "STT service not available"}, status_code=503) + finally: + # Clean up the temporary file + os.unlink(temp_file_path) + + + +class TTSRequest(BaseModel): + text: str + speaker: str = None + language: str = "en" + +@router.post("/tts/file") +async def text_to_speech_file(request: TTSRequest): + try: + file_path = lollmsElfServer.tts.tts_file( + text=request.text, + file_name_or_path="output.wav", + speaker=request.speaker, + language=request.language + ) + return FileResponse(file_path, media_type="audio/wav", filename="speech.wav") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@router.post("/tts/stream") +async def text_to_speech_stream(request: TTSRequest): + try: + audio_data = lollmsElfServer.tts.tts_audio( + text=request.text, + speaker=request.speaker, + language=request.language + ) + return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/tts/voices") +async def get_available_voices(): + try: + voices = lollmsElfServer.tts.get_voices() + return JSONResponse(content={"voices": voices}) + except Exception as e: + return JSONResponse(content={"error": str(e)}, status_code=500) \ No newline at end of file diff --git a/lollms_core b/lollms_core index a1c63689..fe562543 160000 --- a/lollms_core +++ b/lollms_core @@ -1 +1 @@ -Subproject commit a1c63689f79b61ed644a37b124adbed1e6faee69 +Subproject commit fe562543b4e6811f98bc42e58aa06c46dd441c46 diff --git a/zoos/models_zoo b/zoos/models_zoo index 6323528d..358a64a0 160000 --- a/zoos/models_zoo +++ b/zoos/models_zoo @@ -1 +1 @@ -Subproject commit 6323528d9626cbef69f40614f0b5ac773be1ef28 +Subproject commit 358a64a034c7651a8b551cf20bd9bb2059f8835a diff --git a/zoos/personalities_zoo b/zoos/personalities_zoo index 128a7e36..29913409 160000 --- a/zoos/personalities_zoo +++ b/zoos/personalities_zoo @@ -1 +1 @@ -Subproject commit 128a7e360ba8fb876465a297b2e266fc8e0d1104 +Subproject commit 299134098a0b43b920e2194d43fb70f936c2dd62