good stuff

This commit is contained in:
Saifeddine ALOUI 2024-05-05 20:57:05 +02:00
parent fd20ec2859
commit 1377c5b953
6 changed files with 47 additions and 11 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 91
version: 92
binding_name: null
model_name: null
model_variant: null
@ -88,6 +88,14 @@ xtts_use_streaming_mode: true
auto_read: false
xtts_current_voice: null
xtts_current_language: en
xtts_stream_chunk_size: 100
xtts_temperature: 0.75
xtts_length_penalty: 1.0
xtts_repetition_penalty: 5.0
xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
# Image generation service
enable_sd_service: false

View File

@ -26,6 +26,7 @@ import sys, os
import platform
import gc
import yaml
import time
class LollmsApplication(LoLLMsCom):
def __init__(
self,
@ -265,7 +266,15 @@ class LollmsApplication(LoLLMsCom):
trace_exception(ex)
self.warning(f"Couldn't load vllm")
if self.config.whisper_activate:
try:
from lollms.media import AudioRecorder
self.rec = AudioRecorder(self.lollms_paths.personal_outputs_path/"test.wav")
self.rec.start_recording()
time.sleep(1)
self.rec.stop_recording()
except:
pass
if self.config.xtts_enable:
try:
from lollms.services.xtts.lollms_xtts import LollmsXTTS

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 91
version: 92
binding_name: null
model_name: null
model_variant: null
@ -88,6 +88,14 @@ xtts_use_streaming_mode: true
auto_read: false
xtts_current_voice: null
xtts_current_language: en
xtts_stream_chunk_size: 100
xtts_temperature: 0.75
xtts_length_penalty: 1.0
xtts_repetition_penalty: 5.0
xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
# Image generation service
enable_sd_service: false

View File

@ -79,11 +79,12 @@ from scipy.io.wavfile import write
from matplotlib import pyplot as plt
import numpy as np
from scipy.signal import spectrogram
from pathlib import Path
class AudioRecorder:
def __init__(self, sio:socketio.Client, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None, build_spectrogram=False, model = "base", transcribe=False):
def __init__(self, filename:Path, sio:socketio.Client=None, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None, build_spectrogram=False, model = "base", transcribe=False):
self.sio = sio
self.filename = filename
self.filename = Path(filename)
self.filename.parent.mkdir(exist_ok=True, parents=True)
self.channels = channels
self.sample_rate = sample_rate
self.chunk_size = chunk_size
@ -138,7 +139,8 @@ class AudioRecorder:
with open(transcription_fn, "w", encoding="utf-8") as f:
f.write(result["text"])
self.lollmsCom.info(f"File saved to {transcription_fn}")
run_async(partial(self.sio.emit,'transcript', result["text"]))
if self.sio:
run_async(partial(self.sio.emit,'transcript', result["text"]))
return {"text":result["text"], "audio":transcription_fn}
else:
return {"text":""}
@ -152,7 +154,8 @@ class AudioRecorder:
plt.savefig(img_buffer, format='png')
img_buffer.seek(0)
img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
run_async(partial(self.sio.emit,'update_spectrogram', img_base64))
if self.sio:
run_async(partial(self.sio.emit,'update_spectrogram', img_base64))
self.last_spectrogram_update = time.perf_counter()
plt.clf()
@ -207,7 +210,8 @@ class WebcamImageSender:
_, buffer = cv2.imencode('.jpg', frame)
image_base64 = base64.b64encode(buffer)
run_async(partial(self.sio.emit,"video_stream_image", image_base64.decode('utf-8')))
if self.sio:
run_async(partial(self.sio.emit,"video_stream_image", image_base64.decode('utf-8')))
cap.release()
except Exception as ex:

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 91
version: 92
binding_name: null
model_name: null
model_variant: null
@ -88,6 +88,14 @@ xtts_use_streaming_mode: true
auto_read: false
xtts_current_voice: null
xtts_current_language: en
xtts_stream_chunk_size: 100
xtts_temperature: 0.75
xtts_length_penalty: 1.0
xtts_repetition_penalty: 5.0
xtts_top_k: 50
xtts_top_p: 0.85
xtts_speed: 1
xtts_enable_text_splitting: true
# Image generation service
enable_sd_service: false

View File

@ -150,7 +150,6 @@ async def text2Audio(request: LollmsText2AudioRequest):
else:
lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.")
return {"status":False, "error":"Service not ready yet"}
return {"url": url}
except Exception as ex:
trace_exception(ex)
return {"url": None}