fixed channels problem

2025-04-15 14:36:34 +00:00 · 2024-05-20 15:08:44 +02:00 · 2024-05-20 15:08:44 +02:00 · 45cc5e39b9
commit 45cc5e39b9
parent f5853c8163
8 changed files with 144 additions and 17 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 99
+version: 100
 binding_name: null
 model_name: null
 model_variant: null
@ -89,7 +89,11 @@ active_ttm_service: "None" # musicgen (offline)
 # -------------------- Services --------------------------

 # ***************** STT *****************
-# STT service 
+stt_input_device: null
+
+
+
+# ASR STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000

@ -104,6 +108,8 @@ whisper_model: base


 # ***************** TTS *****************
+tts_output_device: null
+
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 99
+version: 100
 binding_name: null
 model_name: null
 model_variant: null
@ -89,7 +89,11 @@ active_ttm_service: "None" # musicgen (offline)
 # -------------------- Services --------------------------

 # ***************** STT *****************
-# STT service 
+stt_input_device: null
+
+
+
+# ASR STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000

@ -104,6 +108,8 @@ whisper_model: base


 # ***************** TTS *****************
+tts_output_device: null
+
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
--- a/lollms/media.py
+++ b/lollms/media.py
@ -109,7 +109,8 @@ class RTCom:
                        channels=1, 
                        buffer_size=10, 
                        model="small.en", 
-                        snd_device=None, 
+                        snd_input_device=None,
+                        snd_output_device=None,
                        logs_folder="logs", 
                        voice=None, 
                        block_while_talking=True, 
@ -138,11 +139,15 @@ class RTCom:
        self.block_while_talking = block_while_talking
        self.image_shot = None

-        if snd_device is None:
+        if snd_input_device is None:
            devices = sd.query_devices()
-            snd_device = [device['name'] for device in devices][0]
+            snd_input_device = [device['name'] for device in devices if device['type'] == 'input'][0]
+        if snd_output_device is None:
+            devices = sd.query_devices()
+            snd_output_device = [device['name'] for device in devices if device['type'] == 'output'][0]

-        self.snd_device = snd_device
+        self.snd_input_device = snd_input_device
+        self.snd_output_device = snd_output_device
        self.logs_folder = logs_folder

        self.frames = []
@ -206,8 +211,7 @@ class RTCom:
        ASCIIColors.green("<<RTCOM off>>")

    def _record(self):
-        sd.default.device = self.snd_device
-        with sd.InputStream(channels=self.channels, samplerate=self.rate, callback=self.callback, dtype='int16'):
+        with sd.InputStream(channels=self.channels, device=self.snd_input_device, samplerate=self.rate, callback=self.callback, dtype='int16'):
            while not self.stop_flag:
                time.sleep(0.1)

--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 95
+version: 100
 binding_name: null
 model_name: null
 model_variant: null
@ -80,10 +80,36 @@ auto_show_browser: true
 # copy to clipboard 
 copy_to_clipboard_add_all_details: false

-# STT service 
+# -------------------- Services global configurations --------------------------
+# Select the active test to speach, text to image and speach to text services
+active_tts_service: "None" # xtts (offline), openai_tts (API key required)
+active_tti_service: "None" # autosd (offline), dall-e (online)
+active_stt_service: "None" # whisper (offline), asr (offline or online), openai_whiosper (API key required)
+active_ttm_service: "None" # musicgen (offline)
+# -------------------- Services --------------------------
+
+# ***************** STT *****************
+stt_input_device: null
+
+
+
+# ASR STT service 
 asr_enable: false
 asr_base_url: http://localhost:9000

+# openai_whisper configuration
+openai_whisper_key: ""
+openai_whisper_model: "whisper-1"
+
+
+# whisper configuration
+whisper_activate: false
+whisper_model: base
+
+
+# ***************** TTS *****************
+tts_output_device: null
+
 # Voice service
 xtts_enable: false
 xtts_base_url: http://localhost:8020
@ -101,10 +127,24 @@ xtts_top_p: 0.85
 xtts_speed: 1
 xtts_enable_text_splitting: true

+# openai_whisper configuration
+openai_tts_key: ""
+openai_tts_model: "tts-1"
+openai_tts_voice: "alloy"
+
+# ***************** TTI *****************
+
 # Image generation service
 enable_sd_service: false
 sd_base_url: http://localhost:7860

+# Dall e service key
+dall_e_key: ""
+dall_e_generation_engine: "dall-e-3"
+
+# Midjourney service key
+midjourney_key: ""
+
 # Image generation service comfyui
 enable_comfyui_service: false
 comfyui_base_url: http://127.0.0.1:8188/
@ -113,6 +153,8 @@ comfyui_base_url: http://127.0.0.1:8188/
 enable_motion_ctrl_service: false
 motion_ctrl_base_url: http://localhost:7861

+# ***************** TTT *****************
+
 # ollama service
 enable_ollama_service: false
 ollama_base_url: http://localhost:11434
@ -197,6 +239,3 @@ show_code_of_conduct: true
 activate_audio_infos: true


-# whisper configuration
-whisper_activate: false
-whisper_model: base
--- a/lollms/server/endpoints/lollms_binding_files_server.py
+++ b/lollms/server/endpoints/lollms_binding_files_server.py
@ -19,6 +19,7 @@ from ascii_colors import ASCIIColors
 from lollms.utilities import load_config, trace_exception, gc
 from pathlib import Path
 from typing import List
+from lollms.security import sanitize_svg
 import os
 import re

@ -274,6 +275,19 @@ async def serve_discussions(path: str):
    if not Path(file_path).exists():
        raise HTTPException(status_code=404, detail="File not found")

+    # Check if the file is an SVG
+    if file_path.suffix.lower() == '.svg':
+        with open(file_path, 'r', encoding='utf-8') as file:
+            svg_content = file.read()
+        sanitized_svg_content = sanitize_svg(svg_content)
+        
+        # Save the sanitized SVG content to a temporary file
+        temp_svg_path = file_path.with_suffix('.sanitized.svg')
+        with open(temp_svg_path, 'w', encoding='utf-8') as file:
+            file.write(sanitized_svg_content)
+        
+        return FileResponse(str(temp_svg_path))
+
    return FileResponse(str(file_path))


--- a/lollms/server/endpoints/lollms_tts.py
+++ b/lollms/server/endpoints/lollms_tts.py
@ -304,3 +304,13 @@ def tts_is_ready():
        if lollmsElfServer.tts.ready:
            return {"status":True}
    return {"status":False}
+
+
+@router.get("/get_snd_input_devices")
+def get_snd_input_devices():
+    lollmsElfServer.stt.get_devices()
+
+@router.get("/get_snd_output_devices")
+def get_snd_output_devices():
+    lollmsElfServer.tts.get_devices()
+
--- a/lollms/stt.py
+++ b/lollms/stt.py
@ -8,7 +8,24 @@ Author: ParisNeo, a computer geek passionate about AI
 """

 from lollms.app import LollmsApplication
+from lollms.utilities import PackageManager
 from pathlib import Path
+from ascii_colors import ASCIIColors
+
+try:
+    if not PackageManager.check_package_installed("sounddevice"):
+        # os.system("sudo apt-get install portaudio19-dev")
+        PackageManager.install_package("sounddevice")
+        PackageManager.install_package("wave")
+except:
+    # os.system("sudo apt-get install portaudio19-dev -y")
+    PackageManager.install_package("sounddevice")
+    PackageManager.install_package("wave")
+try:
+    import sounddevice as sd
+    import wave
+except:
+    ASCIIColors.error("Couldn't load sound tools")

 class LollmsSTT:
    """
@ -96,3 +113,11 @@ class LollmsSTT:
            LollmsSTT: The LollmsSTT class.
        """
        return LollmsSTT
+
+    def get_devices(self):
+        devices =  sd.query_devices()
+        print(devices)
+        return {
+            "status": True,
+            "device_names": [device['name'] for device in devices if device["max_input_channels"]>0]
+        }
--- a/lollms/tts.py
+++ b/lollms/tts.py
@ -7,8 +7,23 @@ This module is part of the Lollms library, designed to provide Text-to-Speech (T
 Author: ParisNeo, a computer geek passionate about AI
 """
 from lollms.app import LollmsApplication
+from lollms.utilities import PackageManager
 from pathlib import Path
-
+from ascii_colors import ASCIIColors
+try:
+    if not PackageManager.check_package_installed("sounddevice"):
+        # os.system("sudo apt-get install portaudio19-dev")
+        PackageManager.install_package("sounddevice")
+        PackageManager.install_package("wave")
+except:
+    # os.system("sudo apt-get install portaudio19-dev -y")
+    PackageManager.install_package("sounddevice")
+    PackageManager.install_package("wave")
+try:
+    import sounddevice as sd
+    import wave
+except:
+    ASCIIColors.error("Couldn't load sound tools")
 class LollmsTTS:
    """
    LollmsTTS is a base class for implementing Text-to-Speech (TTS) functionalities within the LollmsApplication.
@ -119,4 +134,12 @@ class LollmsTTS:
        Returns:
            list: A list of available voices.
        """
-        return self.voices
+        return self.voices
+    
+    def get_devices(self):
+        devices =  sd.query_devices()
+
+        return {
+            "status": True,
+            "device_names": [device['name'] for device in devices if device["max_output_channels"]>0]
+        }