upgraded lollms core

2024-12-19 04:37:54 +00:00 · 2024-01-15 01:10:29 +01:00 · 2024-01-15 01:10:29 +01:00 · 6c2199b5f3
commit 6c2199b5f3
parent d970f80325
4 changed files with 93 additions and 160 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 45
+version: 46
 binding_name: null
 model_name: null
@ -42,6 +42,13 @@ db_path: database.db
 # Automatic updates
 debug: False
 auto_update: true
 auto_sync_personalities: true
 auto_sync_extensions: true
 auto_sync_bindings: true
 auto_sync_models: true
 auto_save: true
 auto_title: false
 # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Models Configuration file =========================== 
-version: 45
+version: 46
 binding_name: null
 model_name: null
@ -42,6 +42,13 @@ db_path: database.db
 # Automatic updates
 debug: False
 auto_update: true
 auto_sync_personalities: true
 auto_sync_extensions: true
 auto_sync_bindings: true
 auto_sync_models: true
 auto_save: true
 auto_title: false
 # Install mode (cpu, cpu-noavx, nvidia-tensorcores, nvidia, amd-noavx, amd, apple-intel, apple-silicon)
--- a/lollms/media.py
+++ b/lollms/media.py
@ -8,8 +8,10 @@ License: Apache 2.0
 """
 from lollms.utilities import PackageManager
 from lollms.com import LoLLMsCom
 from lollms.utilities import trace_exception, run_async
 from ascii_colors import ASCIIColors
 import platform
 from functools import partial
 import subprocess
 import os
@ -46,12 +48,8 @@ if not PackageManager.check_package_installed("whisper"):
    PackageManager.install_package("openai-whisper")
 import whisper
 import socketio
 from lollms.com import LoLLMsCom
 import time
 import json
 import base64
 import io
 import numpy as np
 try:
    if not PackageManager.check_package_installed("sounddevice"):
        # os.system("sudo apt-get install portaudio19-dev")
@ -66,38 +64,45 @@ try:
    import wave
 except:
    ASCIIColors.error("Couldn't load sound tools")
 import time
 import base64
 import io
 import socketio
 from scipy.io.wavfile import write
 from matplotlib import pyplot as plt
 import numpy as np
 from scipy.signal import spectrogram
 class AudioRecorder:
-    def __init__(self, socketio, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom=None):
+    def __init__(self, sio:socketio.Client, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None):
-        try:
+        self.sio = sio
-            self.socketio = socketio
+        self.sio = sio
-            self.filename = filename
+        self.filename = filename
-            self.channels = channels
+        self.channels = channels
-            self.sample_rate = sample_rate
+        self.sample_rate = sample_rate
-            self.chunk_size = chunk_size
+        self.chunk_size = chunk_size
-            self.audio_stream = None
+        self.silence_threshold = silence_threshold
-            self.audio_frames = []
+        self.silence_duration = silence_duration
-            self.is_recording = False
+        self.callback = callback
-            self.silence_threshold = silence_threshold
+        self.lollmsCom = lollmsCom
-            self.silence_duration = silence_duration
+        self.buffer = []
-            self.last_sound_time = time.time()
+        self.is_recording = False
-            self.callback = callback
+        self.start_time = time.time()
-            self.lollmsCom = lollmsCom
+        self.last_sound_time = time.time()
-            self.whisper_model = None
+        self.whisper_model = None
-        except:
+
-            self.socketio = socketio
+    def audio_callback(self, indata, frames, time_, status):
-            self.filename = filename
+        volume_norm = np.linalg.norm(indata)*10
-            self.channels = channels
+        # if volume_norm > self.silence_threshold:
-            self.sample_rate = sample_rate
+        #     self.last_sound_time = time.time()
-            self.chunk_size = chunk_size
+        #     if not self.is_recording:
-            self.audio_stream = None
+        #         self.is_recording = True
-            self.audio_frames = []
+        #         self.start_time = time.time()
-            self.is_recording = False
+        if self.is_recording:
-            self.silence_threshold = silence_threshold
+            self.buffer = np.append(self.buffer, indata.copy())
-            self.silence_duration = silence_duration
+            # if time.time() - self.last_sound_time > self.silence_duration:
-            self.last_sound_time = time.time()
+            #     self.stop_recording()
            self.callback = callback
            self.lollmsCom = lollmsCom
            self.whisper_model = None
    def start_recording(self):
        if self.whisper_model is None:
@ -105,138 +110,50 @@ class AudioRecorder:
            self.whisper_model=whisper.load_model("base.en")
        try:
            self.is_recording = True
-            self.audio_stream = sd.InputStream(
+            self.buffer = np.array([], dtype=np.float32)
-                channels=self.channels,
+            self.audio_stream = sd.InputStream(callback=self.audio_callback, channels=self.channels, samplerate=self.sample_rate)
                samplerate=self.sample_rate,
                callback=self._record,
                blocksize=self.chunk_size
            )
            self.audio_stream.start()
-
+        except Exception as ex:
-            self.lollmsCom.info("Recording started...")
+            self.lollmsCom.InfoMessage("Couldn't start recording.\nMake sure your input device is connected and operational")
-        except:
+            trace_exception(ex)
            self.lollmsCom.error("No audio input found!")
    def _record(self, indata, frames, time_, status):
        first_recording = True  # Flag to track the first recording
        silence_duration = 5
        non_silent_start = None
        non_silent_end = None
        last_spectrogram_update = time.time()
        self.audio_frames = None
        buffered = np.array(indata)
        if self.audio_frames is not None:
            self.audio_frames = np.concatenate([self.audio_frames, buffered])
        else:
            self.audio_frames = buffered
        # Remove audio frames that are older than 30 seconds
        if len(self.audio_frames) > self.sample_rate * 30:
            self.audio_frames=self.audio_frames[-self.sample_rate * 30:]
        # Update spectrogram every 3 seconds
        if time.time() - last_spectrogram_update >= 1:
            self._update_spectrogram()
            last_spectrogram_update = time.time()
        # Check for silence
        rms = self._calculate_rms(buffered)
        if rms < self.silence_threshold:
            current_time = time.time()
            if current_time - self.last_sound_time >= silence_duration:
                if first_recording:
                    first_recording = False
                    silence_duration = self.silence_duration
                if self.callback and non_silent_start is not None and non_silent_end - non_silent_start >= 1:
                    self.lollmsCom.info("Analyzing")
                    # Convert to float
                    audio_data = self.audio_frames.astype(np.float32)
                    audio = wave.open(str(self.filename), 'wb')
                    audio.setnchannels(self.channels)
                    audio.setsampwidth(self.audio_stream.dtype.itemsize)
                    audio.setframerate(self.sample_rate)
                    audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end]))
                    audio.close()
                    # Transcribe the audio using the whisper model
                    text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
                    self.callback(text)
                    print(text["text"])
                self.last_sound_time = time.time()
                non_silent_start = None
        else:
            self.last_sound_time = time.time()
            if non_silent_start is None:
                non_silent_start = len(self.audio_frames) - 1
            non_silent_end = len(self.audio_frames)
    def _update_spectrogram(self):
        audio_data = self.audio_frames[-self.sample_rate*30:]
        frequencies, _, spectrogram = signal.spectrogram(audio_data, self.sample_rate)
        # Generate a new times array that only spans the last 30 seconds
        times = np.linspace(0, 30, spectrogram.shape[1])
        # Plot spectrogram
        plt.figure(figsize=(10, 4))
        plt.imshow(np.log(spectrogram), aspect='auto', origin='lower', cmap='inferno', extent=[times.min(), times.max(), frequencies.min(), frequencies.max()])
        plt.xlabel('Time')
        plt.ylabel('Frequency')
        plt.title('Spectrogram')
        plt.colorbar(format='%+2.0f dB')
        # Convert plot to base64 image
        img_buffer = io.BytesIO()
        plt.savefig(img_buffer, format='png')
        img_buffer.seek(0)
        img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
        # Send base64 image using socketio
        self.socketio.emit('update_spectrogram', img_base64)
        self.socketio.sleep(0.0)
        plt.close()
    def _calculate_rms(self, data):
        try:
            squared_sum = sum([sample ** 2 for sample in data])
            rms = np.sqrt(squared_sum / len(data))
        except:
            rms = 0
        return rms
    def stop_recording(self):
        self.is_recording = False
-        if self.audio_stream:
+        self.audio_stream.stop()
-            self.audio_stream.stop()
+        self.audio_stream.close()
-            import wave
+        write(self.filename, self.sample_rate, self.buffer)
-            audio = wave.open(str(self.filename), 'wb')
+        self.lollmsCom.info(f"Saved to {self.filename}")
-            audio.setnchannels(self.channels)
+        time.sleep(2)
-            audio.setsampwidth(self.audio_stream.dtype.itemsize)
+        self.lollmsCom.info(f"Transcribing ... ")
-            audio.setframerate(self.sample_rate)
+        result = self.whisper_model.transcribe(str(self.filename))
-            audio.writeframes(b''.join(self.audio_frames))
+        with open(self.filename.replace("wav","txt"), "w") as f:
-            audio.close()
+            f.write(result["text"])
        self.lollmsCom.info(f"Saved to {self.filename}")
-            self.lollmsCom.info(f"Recording saved to {self.filename}")
+    def update_spectrogram(self):
-        else:
+        f, t, Sxx = spectrogram(self.buffer[-30*self.sample_rate:], self.sample_rate)
-            self.warning("No recording available")
+        plt.pcolormesh(t, f, 10 * np.log10(Sxx))
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        image_base64 = base64.b64encode(buf.read())
        self.sio.emit('update_spectrogram', {'image': image_base64.decode('utf-8')})
        self.last_spectrogram_update = time.perf_counter()
        buf.close()
        plt.clf()
 class WebcamImageSender:
    """
    Class for capturing images from the webcam and sending them to a SocketIO client.
    """
-    def __init__(self, socketio, lollmsCom:LoLLMsCom=None):
+    def __init__(self, sio:socketio, lollmsCom:LoLLMsCom=None):
        """
        Initializes the WebcamImageSender class.
        Args:
            socketio (socketio.Client): The SocketIO client object.
        """
-        self.socketio = socketio
+        self.socketio = sio
        self.last_image = None
        self.last_change_time = None
        self.capture_thread = None
@ -278,8 +195,9 @@ class WebcamImageSender:
                self.socketio.emit("video_stream_image", image_base64.decode('utf-8'))
            cap.release()
-        except:
+        except Exception as ex:
            self.lollmsCom.error("Couldn't start webcam")
            trace_exception(ex)
    def image_difference(self, image):
        """
@ -349,5 +267,6 @@ class MusicPlayer(threading.Thread):
        """
        Stops the music.
        """
        import pygame
        self.stopped = True
        pygame.mixer.music.stop()
--- a/lollms/server/endpoints/lollms_personalities_infos.py
+++ b/lollms/server/endpoints/lollms_personalities_infos.py
@ -17,7 +17,7 @@ from lollms.personality import AIPersonality, InstallOption
 from ascii_colors import ASCIIColors
 from lollms.utilities import load_config, trace_exception, gc
 from pathlib import Path
-from typing import List
+from typing import List, Optional
 import psutil
 import yaml
@ -26,10 +26,6 @@ import yaml
 class PersonalityListingInfos(BaseModel):
    category:str
 class PersonalityMountingInfos(BaseModel):
    category:str
    folder:str
    language:str
 class PersonalitySelectionInfos(BaseModel):
    id:int
@ -237,6 +233,10 @@ def clear_personality_files_list():
 # ------------------------------------------- Mounting/Unmounting/Remounting ------------------------------------------------
 class PersonalityMountingInfos(BaseModel):
    category:str
    folder:str
    language:Optional[str] = None
@router.post("/mount_personality")
 def mount_personality(data:PersonalityMountingInfos):