enhanced

2025-04-16 23:08:51 +00:00 · 2024-02-17 00:09:31 +01:00 · 2024-02-17 00:09:31 +01:00 · 61c2f0ed2a
commit 61c2f0ed2a
parent 56a53bb35e
2 changed files with 54 additions and 1 deletions
--- a/lollms/binding.py
+++ b/lollms/binding.py
@ -144,6 +144,8 @@ class LLMBinding:

    def install_model(self, model_type:str, model_path:str, variant_name:str, client_id:int=None):
        print("Install model triggered")
+        if(".." in model_path):
+            raise "Detected an attempt of path traversal. Are you kidding me?"
        model_path = model_path.replace("\\","/")

        if model_type.lower() in model_path.lower():
--- a/lollms/media.py
+++ b/lollms/media.py
@ -82,7 +82,6 @@ from scipy.signal import spectrogram

 class AudioRecorder:
    def __init__(self, sio:socketio.Client, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None, build_spectrogram=False, model = "base", transcribe=False):
-        self.sio = sio
        self.sio = sio
        self.filename = filename
        self.channels = channels
@ -286,3 +285,55 @@ class MusicPlayer(threading.Thread):
        import pygame
        self.stopped = True
        pygame.mixer.music.stop()
+
+
+class RealTimeTranscription:
+    def __init__(self, callback):
+        if not PackageManager.check_package_installed('pyaudio'):
+            try:
+                import conda.cli
+                conda.cli.main("install", "anaconda::pyaudio", "-y")
+            except:
+                ASCIIColors.bright_red("Couldn't install pyaudio. whisper won't work. Please install it manually")
+        import pyaudio
+        # Initialize Whisper ASR
+        print("Loading whisper ...", end="")
+        self.whisper = whisper.load_model("base")
+        print("ok")
+
+        # Set up PyAudio
+        self.p = pyaudio.PyAudio()
+        self.stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
+
+        # Set the callback
+        self.callback = callback
+
+    def start(self):
+        import torch
+        # Start the stream
+        self.stream.start_stream()
+
+        try:
+            while True:
+                # Read a chunk of audio data
+                data = self.stream.read(1024)
+
+                # Convert bytes to numpy array
+                data_np = np.frombuffer(data, dtype=np.int16)
+                # Convert numpy array to float tensor
+                data_tensor = torch.tensor(data_np).float()
+                # Send the chunk to Whisper for transcription
+                result = self.whisper.transcribe(data_tensor)
+                
+                # If the result is not empty, call the callback
+                if result:
+                    self.callback(result["text"])
+        except KeyboardInterrupt:
+            # If the user hits Ctrl+C, stop the stream
+            self.stop()
+
+    def stop(self):
+        # Stop the stream and clean up
+        self.stream.stop_stream()
+        self.stream.close()
+        self.p.terminate()