mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-01-18 02:39:46 +00:00
fixed channels problem
This commit is contained in:
parent
f5853c8163
commit
45cc5e39b9
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 99
|
||||
version: 100
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -89,7 +89,11 @@ active_ttm_service: "None" # musicgen (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
# STT service
|
||||
stt_input_device: null
|
||||
|
||||
|
||||
|
||||
# ASR STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
@ -104,6 +108,8 @@ whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
tts_output_device: null
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 99
|
||||
version: 100
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -89,7 +89,11 @@ active_ttm_service: "None" # musicgen (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
# STT service
|
||||
stt_input_device: null
|
||||
|
||||
|
||||
|
||||
# ASR STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
@ -104,6 +108,8 @@ whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
tts_output_device: null
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
|
@ -109,7 +109,8 @@ class RTCom:
|
||||
channels=1,
|
||||
buffer_size=10,
|
||||
model="small.en",
|
||||
snd_device=None,
|
||||
snd_input_device=None,
|
||||
snd_output_device=None,
|
||||
logs_folder="logs",
|
||||
voice=None,
|
||||
block_while_talking=True,
|
||||
@ -138,11 +139,15 @@ class RTCom:
|
||||
self.block_while_talking = block_while_talking
|
||||
self.image_shot = None
|
||||
|
||||
if snd_device is None:
|
||||
if snd_input_device is None:
|
||||
devices = sd.query_devices()
|
||||
snd_device = [device['name'] for device in devices][0]
|
||||
snd_input_device = [device['name'] for device in devices if device['type'] == 'input'][0]
|
||||
if snd_output_device is None:
|
||||
devices = sd.query_devices()
|
||||
snd_output_device = [device['name'] for device in devices if device['type'] == 'output'][0]
|
||||
|
||||
self.snd_device = snd_device
|
||||
self.snd_input_device = snd_input_device
|
||||
self.snd_output_device = snd_output_device
|
||||
self.logs_folder = logs_folder
|
||||
|
||||
self.frames = []
|
||||
@ -206,8 +211,7 @@ class RTCom:
|
||||
ASCIIColors.green("<<RTCOM off>>")
|
||||
|
||||
def _record(self):
|
||||
sd.default.device = self.snd_device
|
||||
with sd.InputStream(channels=self.channels, samplerate=self.rate, callback=self.callback, dtype='int16'):
|
||||
with sd.InputStream(channels=self.channels, device=self.snd_input_device, samplerate=self.rate, callback=self.callback, dtype='int16'):
|
||||
while not self.stop_flag:
|
||||
time.sleep(0.1)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 95
|
||||
version: 100
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -80,10 +80,36 @@ auto_show_browser: true
|
||||
# copy to clipboard
|
||||
copy_to_clipboard_add_all_details: false
|
||||
|
||||
# STT service
|
||||
# -------------------- Services global configurations --------------------------
|
||||
# Select the active test to speach, text to image and speach to text services
|
||||
active_tts_service: "None" # xtts (offline), openai_tts (API key required)
|
||||
active_tti_service: "None" # autosd (offline), dall-e (online)
|
||||
active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
|
||||
active_ttm_service: "None" # musicgen (offline)
|
||||
# -------------------- Services --------------------------
|
||||
|
||||
# ***************** STT *****************
|
||||
stt_input_device: null
|
||||
|
||||
|
||||
|
||||
# ASR STT service
|
||||
asr_enable: false
|
||||
asr_base_url: http://localhost:9000
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_whisper_key: ""
|
||||
openai_whisper_model: "whisper-1"
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
||||
|
||||
|
||||
# ***************** TTS *****************
|
||||
tts_output_device: null
|
||||
|
||||
# Voice service
|
||||
xtts_enable: false
|
||||
xtts_base_url: http://localhost:8020
|
||||
@ -101,10 +127,24 @@ xtts_top_p: 0.85
|
||||
xtts_speed: 1
|
||||
xtts_enable_text_splitting: true
|
||||
|
||||
# openai_whisper configuration
|
||||
openai_tts_key: ""
|
||||
openai_tts_model: "tts-1"
|
||||
openai_tts_voice: "alloy"
|
||||
|
||||
# ***************** TTI *****************
|
||||
|
||||
# Image generation service
|
||||
enable_sd_service: false
|
||||
sd_base_url: http://localhost:7860
|
||||
|
||||
# Dall e service key
|
||||
dall_e_key: ""
|
||||
dall_e_generation_engine: "dall-e-3"
|
||||
|
||||
# Midjourney service key
|
||||
midjourney_key: ""
|
||||
|
||||
# Image generation service comfyui
|
||||
enable_comfyui_service: false
|
||||
comfyui_base_url: http://127.0.0.1:8188/
|
||||
@ -113,6 +153,8 @@ comfyui_base_url: http://127.0.0.1:8188/
|
||||
enable_motion_ctrl_service: false
|
||||
motion_ctrl_base_url: http://localhost:7861
|
||||
|
||||
# ***************** TTT *****************
|
||||
|
||||
# ollama service
|
||||
enable_ollama_service: false
|
||||
ollama_base_url: http://localhost:11434
|
||||
@ -197,6 +239,3 @@ show_code_of_conduct: true
|
||||
activate_audio_infos: true
|
||||
|
||||
|
||||
# whisper configuration
|
||||
whisper_activate: false
|
||||
whisper_model: base
|
@ -19,6 +19,7 @@ from ascii_colors import ASCIIColors
|
||||
from lollms.utilities import load_config, trace_exception, gc
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from lollms.security import sanitize_svg
|
||||
import os
|
||||
import re
|
||||
|
||||
@ -274,6 +275,19 @@ async def serve_discussions(path: str):
|
||||
if not Path(file_path).exists():
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
# Check if the file is an SVG
|
||||
if file_path.suffix.lower() == '.svg':
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
svg_content = file.read()
|
||||
sanitized_svg_content = sanitize_svg(svg_content)
|
||||
|
||||
# Save the sanitized SVG content to a temporary file
|
||||
temp_svg_path = file_path.with_suffix('.sanitized.svg')
|
||||
with open(temp_svg_path, 'w', encoding='utf-8') as file:
|
||||
file.write(sanitized_svg_content)
|
||||
|
||||
return FileResponse(str(temp_svg_path))
|
||||
|
||||
return FileResponse(str(file_path))
|
||||
|
||||
|
||||
|
@ -304,3 +304,13 @@ def tts_is_ready():
|
||||
if lollmsElfServer.tts.ready:
|
||||
return {"status":True}
|
||||
return {"status":False}
|
||||
|
||||
|
||||
@router.get("/get_snd_input_devices")
|
||||
def get_snd_input_devices():
|
||||
lollmsElfServer.stt.get_devices()
|
||||
|
||||
@router.get("/get_snd_output_devices")
|
||||
def get_snd_output_devices():
|
||||
lollmsElfServer.tts.get_devices()
|
||||
|
||||
|
@ -8,7 +8,24 @@ Author: ParisNeo, a computer geek passionate about AI
|
||||
"""
|
||||
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.utilities import PackageManager
|
||||
from pathlib import Path
|
||||
from ascii_colors import ASCIIColors
|
||||
|
||||
try:
|
||||
if not PackageManager.check_package_installed("sounddevice"):
|
||||
# os.system("sudo apt-get install portaudio19-dev")
|
||||
PackageManager.install_package("sounddevice")
|
||||
PackageManager.install_package("wave")
|
||||
except:
|
||||
# os.system("sudo apt-get install portaudio19-dev -y")
|
||||
PackageManager.install_package("sounddevice")
|
||||
PackageManager.install_package("wave")
|
||||
try:
|
||||
import sounddevice as sd
|
||||
import wave
|
||||
except:
|
||||
ASCIIColors.error("Couldn't load sound tools")
|
||||
|
||||
class LollmsSTT:
|
||||
"""
|
||||
@ -96,3 +113,11 @@ class LollmsSTT:
|
||||
LollmsSTT: The LollmsSTT class.
|
||||
"""
|
||||
return LollmsSTT
|
||||
|
||||
def get_devices(self):
|
||||
devices = sd.query_devices()
|
||||
print(devices)
|
||||
return {
|
||||
"status": True,
|
||||
"device_names": [device['name'] for device in devices if device["max_input_channels"]>0]
|
||||
}
|
@ -7,8 +7,23 @@ This module is part of the Lollms library, designed to provide Text-to-Speech (T
|
||||
Author: ParisNeo, a computer geek passionate about AI
|
||||
"""
|
||||
from lollms.app import LollmsApplication
|
||||
from lollms.utilities import PackageManager
|
||||
from pathlib import Path
|
||||
|
||||
from ascii_colors import ASCIIColors
|
||||
try:
|
||||
if not PackageManager.check_package_installed("sounddevice"):
|
||||
# os.system("sudo apt-get install portaudio19-dev")
|
||||
PackageManager.install_package("sounddevice")
|
||||
PackageManager.install_package("wave")
|
||||
except:
|
||||
# os.system("sudo apt-get install portaudio19-dev -y")
|
||||
PackageManager.install_package("sounddevice")
|
||||
PackageManager.install_package("wave")
|
||||
try:
|
||||
import sounddevice as sd
|
||||
import wave
|
||||
except:
|
||||
ASCIIColors.error("Couldn't load sound tools")
|
||||
class LollmsTTS:
|
||||
"""
|
||||
LollmsTTS is a base class for implementing Text-to-Speech (TTS) functionalities within the LollmsApplication.
|
||||
@ -119,4 +134,12 @@ class LollmsTTS:
|
||||
Returns:
|
||||
list: A list of available voices.
|
||||
"""
|
||||
return self.voices
|
||||
return self.voices
|
||||
|
||||
def get_devices(self):
|
||||
devices = sd.query_devices()
|
||||
|
||||
return {
|
||||
"status": True,
|
||||
"device_names": [device['name'] for device in devices if device["max_output_channels"]>0]
|
||||
}
|
Loading…
Reference in New Issue
Block a user