mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
good stuff
This commit is contained in:
parent
fd20ec2859
commit
1377c5b953
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 91
|
||||
version: 92
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -88,6 +88,14 @@ xtts_use_streaming_mode: true
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
xtts_stream_chunk_size: 100
|
||||
xtts_temperature: 0.75
|
||||
xtts_length_penalty: 1.0
|
||||
xtts_repetition_penalty: 5.0
|
||||
xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
|
@ -26,6 +26,7 @@ import sys, os
|
||||
import platform
|
||||
import gc
|
||||
import yaml
|
||||
import time
|
||||
class LollmsApplication(LoLLMsCom):
|
||||
def __init__(
|
||||
self,
|
||||
@ -265,7 +266,15 @@ class LollmsApplication(LoLLMsCom):
|
||||
trace_exception(ex)
|
||||
self.warning(f"Couldn't load vllm")
|
||||
|
||||
|
||||
if self.config.whisper_activate:
|
||||
try:
|
||||
from lollms.media import AudioRecorder
|
||||
self.rec = AudioRecorder(self.lollms_paths.personal_outputs_path/"test.wav")
|
||||
self.rec.start_recording()
|
||||
time.sleep(1)
|
||||
self.rec.stop_recording()
|
||||
except:
|
||||
pass
|
||||
if self.config.xtts_enable:
|
||||
try:
|
||||
from lollms.services.xtts.lollms_xtts import LollmsXTTS
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 91
|
||||
version: 92
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -88,6 +88,14 @@ xtts_use_streaming_mode: true
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
xtts_stream_chunk_size: 100
|
||||
xtts_temperature: 0.75
|
||||
xtts_length_penalty: 1.0
|
||||
xtts_repetition_penalty: 5.0
|
||||
xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
|
@ -79,11 +79,12 @@ from scipy.io.wavfile import write
|
||||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
from scipy.signal import spectrogram
|
||||
|
||||
from pathlib import Path
|
||||
class AudioRecorder:
|
||||
def __init__(self, sio:socketio.Client, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None, build_spectrogram=False, model = "base", transcribe=False):
|
||||
def __init__(self, filename:Path, sio:socketio.Client=None, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None, build_spectrogram=False, model = "base", transcribe=False):
|
||||
self.sio = sio
|
||||
self.filename = filename
|
||||
self.filename = Path(filename)
|
||||
self.filename.parent.mkdir(exist_ok=True, parents=True)
|
||||
self.channels = channels
|
||||
self.sample_rate = sample_rate
|
||||
self.chunk_size = chunk_size
|
||||
@ -138,7 +139,8 @@ class AudioRecorder:
|
||||
with open(transcription_fn, "w", encoding="utf-8") as f:
|
||||
f.write(result["text"])
|
||||
self.lollmsCom.info(f"File saved to {transcription_fn}")
|
||||
run_async(partial(self.sio.emit,'transcript', result["text"]))
|
||||
if self.sio:
|
||||
run_async(partial(self.sio.emit,'transcript', result["text"]))
|
||||
return {"text":result["text"], "audio":transcription_fn}
|
||||
else:
|
||||
return {"text":""}
|
||||
@ -152,7 +154,8 @@ class AudioRecorder:
|
||||
plt.savefig(img_buffer, format='png')
|
||||
img_buffer.seek(0)
|
||||
img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
|
||||
run_async(partial(self.sio.emit,'update_spectrogram', img_base64))
|
||||
if self.sio:
|
||||
run_async(partial(self.sio.emit,'update_spectrogram', img_base64))
|
||||
self.last_spectrogram_update = time.perf_counter()
|
||||
plt.clf()
|
||||
|
||||
@ -207,7 +210,8 @@ class WebcamImageSender:
|
||||
|
||||
_, buffer = cv2.imencode('.jpg', frame)
|
||||
image_base64 = base64.b64encode(buffer)
|
||||
run_async(partial(self.sio.emit,"video_stream_image", image_base64.decode('utf-8')))
|
||||
if self.sio:
|
||||
run_async(partial(self.sio.emit,"video_stream_image", image_base64.decode('utf-8')))
|
||||
|
||||
cap.release()
|
||||
except Exception as ex:
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 91
|
||||
version: 92
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -88,6 +88,14 @@ xtts_use_streaming_mode: true
|
||||
auto_read: false
|
||||
xtts_current_voice: null
|
||||
xtts_current_language: en
|
||||
xtts_stream_chunk_size: 100
|
||||
xtts_temperature: 0.75
|
||||
xtts_length_penalty: 1.0
|
||||
xtts_repetition_penalty: 5.0
|
||||
xtts_top_k: 50
|
||||
xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
|
@ -150,7 +150,6 @@ async def text2Audio(request: LollmsText2AudioRequest):
|
||||
else:
|
||||
lollmsElfServer.InfoMessage("xtts is not up yet.\nPlease wait for it to load then try again. This may take some time.")
|
||||
return {"status":False, "error":"Service not ready yet"}
|
||||
return {"url": url}
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
return {"url": None}
|
||||
|
Loading…
Reference in New Issue
Block a user