moved to sounddevice instead of pyaudio

This commit is contained in:
Saifeddine ALOUI 2023-12-22 20:16:14 +01:00
parent e0e99c5a74
commit be2283cf34

View File

@ -14,8 +14,8 @@ import subprocess
import os import os
import threading import threading
if not PackageManager.check_package_installed("cv2"): if not PackageManager.check_package_installed("cv2"):
os.system('sudo apt-get update')
os.system('sudo apt-get install libgl1-mesa-glx python3-opencv -y') os.system('sudo apt-get install libgl1-mesa-glx python3-opencv -y')
PackageManager.install_package("opencv-python")
import cv2 import cv2
@ -41,28 +41,24 @@ import json
import base64 import base64
import io import io
import numpy as np import numpy as np
if not PackageManager.check_package_installed("sounddevice"):
if platform.system() == "Windows":
PackageManager.install_package("sounddevice")
elif platform.system() == "Linux":
subprocess.check_call(["sudo", "apt", "install", "-y", "portaudio19-dev python3-sounddevice"])
elif platform.system() == "Darwin":
subprocess.check_call(["brew", "install", "portaudio19-dev python3-sounddevice"])
PackageManager.install_package("wave")
import sounddevice as sd
class AudioRecorder: class AudioRecorder:
def __init__(self, socketio, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None): def __init__(self, socketio, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom=None):
try: try:
if not PackageManager.check_package_installed("pyaudio"):
if platform.system() == "Windows":
PackageManager.install_package("pyaudio")
elif platform.system() == "Linux":
subprocess.check_call(["sudo", "apt", "install", "-y", "portaudio19-dev python3-pyaudio"])
elif platform.system() == "Darwin":
subprocess.check_call(["brew", "install", "portaudio19-dev python3-pyaudio"])
PackageManager.install_package("wave")
import pyaudio
import wave
self.socketio = socketio self.socketio = socketio
self.filename = filename self.filename = filename
self.channels = channels self.channels = channels
self.sample_rate = sample_rate self.sample_rate = sample_rate
self.chunk_size = chunk_size self.chunk_size = chunk_size
self.audio_format = pyaudio.paInt16
self.audio_stream = None self.audio_stream = None
self.audio_frames = [] self.audio_frames = []
self.is_recording = False self.is_recording = False
@ -78,7 +74,6 @@ class AudioRecorder:
self.channels = channels self.channels = channels
self.sample_rate = sample_rate self.sample_rate = sample_rate
self.chunk_size = chunk_size self.chunk_size = chunk_size
self.audio_format = None
self.audio_stream = None self.audio_stream = None
self.audio_frames = [] self.audio_frames = []
self.is_recording = False self.is_recording = False
@ -88,91 +83,81 @@ class AudioRecorder:
self.callback = callback self.callback = callback
self.lollmsCom = lollmsCom self.lollmsCom = lollmsCom
self.whisper_model = None self.whisper_model = None
def start_recording(self): def start_recording(self):
if self.whisper_model is None: if self.whisper_model is None:
self.lollmsCom.info("Loading whisper model") self.lollmsCom.info("Loading whisper model")
self.whisper_model=whisper.load_model("base.en") self.whisper_model=whisper.load_model("base.en")
try: try:
import pyaudio
self.is_recording = True self.is_recording = True
self.audio_stream = pyaudio.PyAudio().open( self.audio_stream = sd.InputStream(
format=self.audio_format,
channels=self.channels, channels=self.channels,
rate=self.sample_rate, samplerate=self.sample_rate,
input=True, callback=self._record,
frames_per_buffer=self.chunk_size blocksize=self.chunk_size
) )
self.audio_stream.start()
self.lollmsCom.info("Recording started...") self.lollmsCom.info("Recording started...")
threading.Thread(target=self._record).start()
except: except:
self.lollmsCom.error("No audio input found!") self.lollmsCom.error("No audio input found!")
def _record(self, indata, frames, time, status):
def _record(self):
first_recording = True # Flag to track the first recording first_recording = True # Flag to track the first recording
silence_duration = 5 silence_duration = 5
non_silent_start = None non_silent_start = None
non_silent_end = None non_silent_end = None
last_spectrogram_update = time.time() last_spectrogram_update = time.time()
self.audio_frames = None self.audio_frames = None
while self.is_recording: buffered = np.array(indata)
data = self.audio_stream.read(self.chunk_size) if self.audio_frames is not None:
buffered = np.frombuffer(data, dtype=np.int16) self.audio_frames = np.concatenate([self.audio_frames, buffered])
if self.audio_frames is not None: else:
self.audio_frames = np.concatenate([self.audio_frames,buffered]) self.audio_frames = buffered
else:
self.audio_frames = buffered
# Remove audio frames that are older than 30 seconds
if len(self.audio_frames) > self.sample_rate * 30:
self.audio_frames=self.audio_frames[-self.sample_rate * 30:]
# Update spectrogram every 3 seconds # Remove audio frames that are older than 30 seconds
if time.time() - last_spectrogram_update >= 1: if len(self.audio_frames) > self.sample_rate * 30:
self._update_spectrogram() self.audio_frames=self.audio_frames[-self.sample_rate * 30:]
last_spectrogram_update = time.time()
# Check for silence # Update spectrogram every 3 seconds
rms = self._calculate_rms(buffered) if time.time() - last_spectrogram_update >= 1:
if rms < self.silence_threshold: self._update_spectrogram()
current_time = time.time() last_spectrogram_update = time.time()
if current_time - self.last_sound_time >= silence_duration:
if first_recording:
first_recording = False
silence_duration = self.silence_duration
if self.callback and non_silent_start is not None and non_silent_end - non_silent_start >= 1: # Check for silence
self.lollmsCom.info("Analyzing") rms = self._calculate_rms(buffered)
# Convert to float if rms < self.silence_threshold:
import pyaudio current_time = time.time()
import wave if current_time - self.last_sound_time >= silence_duration:
audio_data = self.audio_frames.astype(np.float32) if first_recording:
audio = wave.open(str(self.filename), 'wb') first_recording = False
audio.setnchannels(self.channels) silence_duration = self.silence_duration
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format))
audio.setframerate(self.sample_rate)
audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end]))
audio.close()
# Transcribe the audio using the whisper model
text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
self.callback(text) if self.callback and non_silent_start is not None and non_silent_end - non_silent_start >= 1:
print(text["text"]) self.lollmsCom.info("Analyzing")
# Convert to float
audio_data = self.audio_frames.astype(np.float32)
audio = wave.open(str(self.filename), 'wb')
audio.setnchannels(self.channels)
audio.setsampwidth(audio_stream.dtype.itemsize)
audio.setframerate(self.sample_rate)
audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end]))
audio.close()
self.last_sound_time = time.time() # Transcribe the audio using the whisper model
non_silent_start = None text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
self.callback(text)
print(text["text"])
else:
self.last_sound_time = time.time() self.last_sound_time = time.time()
if non_silent_start is None: non_silent_start = None
non_silent_start = len(self.audio_frames) - 1
non_silent_end = len(self.audio_frames) else:
self.last_sound_time = time.time()
if non_silent_start is None:
non_silent_start = len(self.audio_frames) - 1
non_silent_end = len(self.audio_frames)
def _update_spectrogram(self): def _update_spectrogram(self):
audio_data = self.audio_frames[-self.sample_rate*30:] audio_data = self.audio_frames[-self.sample_rate*30:]
@ -208,17 +193,14 @@ class AudioRecorder:
rms = 0 rms = 0
return rms return rms
def stop_recording(self): def stop_recording(self):
self.is_recording = False self.is_recording = False
if self.audio_stream: if self.audio_stream:
self.audio_stream.stop_stream() self.audio_stream.stop()
self.audio_stream.close()
import pyaudio
import wave import wave
audio = wave.open(str(self.filename), 'wb') audio = wave.open(str(self.filename), 'wb')
audio.setnchannels(self.channels) audio.setnchannels(self.channels)
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format)) audio.setsampwidth(self.audio_stream.dtype.itemsize)
audio.setframerate(self.sample_rate) audio.setframerate(self.sample_rate)
audio.writeframes(b''.join(self.audio_frames)) audio.writeframes(b''.join(self.audio_frames))
audio.close() audio.close()