moved to sounddevice instead of pyaudio

This commit is contained in:
Saifeddine ALOUI 2023-12-22 20:16:14 +01:00
parent e0e99c5a74
commit be2283cf34

View File

@ -14,8 +14,8 @@ import subprocess
import os import os
import threading import threading
if not PackageManager.check_package_installed("cv2"): if not PackageManager.check_package_installed("cv2"):
os.system('sudo apt-get update')
os.system('sudo apt-get install libgl1-mesa-glx python3-opencv -y') os.system('sudo apt-get install libgl1-mesa-glx python3-opencv -y')
PackageManager.install_package("opencv-python")
import cv2 import cv2
@ -41,28 +41,24 @@ import json
import base64 import base64
import io import io
import numpy as np import numpy as np
if not PackageManager.check_package_installed("sounddevice"):
if platform.system() == "Windows":
PackageManager.install_package("sounddevice")
elif platform.system() == "Linux":
subprocess.check_call(["sudo", "apt", "install", "-y", "portaudio19-dev python3-sounddevice"])
elif platform.system() == "Darwin":
subprocess.check_call(["brew", "install", "portaudio19-dev python3-sounddevice"])
PackageManager.install_package("wave")
import sounddevice as sd
class AudioRecorder: class AudioRecorder:
def __init__(self, socketio, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None): def __init__(self, socketio, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom=None):
try: try:
if not PackageManager.check_package_installed("pyaudio"):
if platform.system() == "Windows":
PackageManager.install_package("pyaudio")
elif platform.system() == "Linux":
subprocess.check_call(["sudo", "apt", "install", "-y", "portaudio19-dev python3-pyaudio"])
elif platform.system() == "Darwin":
subprocess.check_call(["brew", "install", "portaudio19-dev python3-pyaudio"])
PackageManager.install_package("wave")
import pyaudio
import wave
self.socketio = socketio self.socketio = socketio
self.filename = filename self.filename = filename
self.channels = channels self.channels = channels
self.sample_rate = sample_rate self.sample_rate = sample_rate
self.chunk_size = chunk_size self.chunk_size = chunk_size
self.audio_format = pyaudio.paInt16
self.audio_stream = None self.audio_stream = None
self.audio_frames = [] self.audio_frames = []
self.is_recording = False self.is_recording = False
@ -78,7 +74,6 @@ class AudioRecorder:
self.channels = channels self.channels = channels
self.sample_rate = sample_rate self.sample_rate = sample_rate
self.chunk_size = chunk_size self.chunk_size = chunk_size
self.audio_format = None
self.audio_stream = None self.audio_stream = None
self.audio_frames = [] self.audio_frames = []
self.is_recording = False self.is_recording = False
@ -89,44 +84,37 @@ class AudioRecorder:
self.lollmsCom = lollmsCom self.lollmsCom = lollmsCom
self.whisper_model = None self.whisper_model = None
def start_recording(self): def start_recording(self):
if self.whisper_model is None: if self.whisper_model is None:
self.lollmsCom.info("Loading whisper model") self.lollmsCom.info("Loading whisper model")
self.whisper_model=whisper.load_model("base.en") self.whisper_model=whisper.load_model("base.en")
try: try:
import pyaudio
self.is_recording = True self.is_recording = True
self.audio_stream = pyaudio.PyAudio().open( self.audio_stream = sd.InputStream(
format=self.audio_format,
channels=self.channels, channels=self.channels,
rate=self.sample_rate, samplerate=self.sample_rate,
input=True, callback=self._record,
frames_per_buffer=self.chunk_size blocksize=self.chunk_size
) )
self.audio_stream.start()
self.lollmsCom.info("Recording started...") self.lollmsCom.info("Recording started...")
threading.Thread(target=self._record).start()
except: except:
self.lollmsCom.error("No audio input found!") self.lollmsCom.error("No audio input found!")
def _record(self, indata, frames, time, status):
def _record(self):
first_recording = True # Flag to track the first recording first_recording = True # Flag to track the first recording
silence_duration = 5 silence_duration = 5
non_silent_start = None non_silent_start = None
non_silent_end = None non_silent_end = None
last_spectrogram_update = time.time() last_spectrogram_update = time.time()
self.audio_frames = None self.audio_frames = None
while self.is_recording: buffered = np.array(indata)
data = self.audio_stream.read(self.chunk_size)
buffered = np.frombuffer(data, dtype=np.int16)
if self.audio_frames is not None: if self.audio_frames is not None:
self.audio_frames = np.concatenate([self.audio_frames,buffered]) self.audio_frames = np.concatenate([self.audio_frames, buffered])
else: else:
self.audio_frames = buffered self.audio_frames = buffered
# Remove audio frames that are older than 30 seconds # Remove audio frames that are older than 30 seconds
if len(self.audio_frames) > self.sample_rate * 30: if len(self.audio_frames) > self.sample_rate * 30:
self.audio_frames=self.audio_frames[-self.sample_rate * 30:] self.audio_frames=self.audio_frames[-self.sample_rate * 30:]
@ -148,17 +136,14 @@ class AudioRecorder:
if self.callback and non_silent_start is not None and non_silent_end - non_silent_start >= 1: if self.callback and non_silent_start is not None and non_silent_end - non_silent_start >= 1:
self.lollmsCom.info("Analyzing") self.lollmsCom.info("Analyzing")
# Convert to float # Convert to float
import pyaudio
import wave
audio_data = self.audio_frames.astype(np.float32) audio_data = self.audio_frames.astype(np.float32)
audio = wave.open(str(self.filename), 'wb') audio = wave.open(str(self.filename), 'wb')
audio.setnchannels(self.channels) audio.setnchannels(self.channels)
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format)) audio.setsampwidth(audio_stream.dtype.itemsize)
audio.setframerate(self.sample_rate) audio.setframerate(self.sample_rate)
audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end])) audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end]))
audio.close() audio.close()
# Transcribe the audio using the whisper model # Transcribe the audio using the whisper model
text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end]) text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
@ -208,17 +193,14 @@ class AudioRecorder:
rms = 0 rms = 0
return rms return rms
def stop_recording(self): def stop_recording(self):
self.is_recording = False self.is_recording = False
if self.audio_stream: if self.audio_stream:
self.audio_stream.stop_stream() self.audio_stream.stop()
self.audio_stream.close()
import pyaudio
import wave import wave
audio = wave.open(str(self.filename), 'wb') audio = wave.open(str(self.filename), 'wb')
audio.setnchannels(self.channels) audio.setnchannels(self.channels)
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format)) audio.setsampwidth(self.audio_stream.dtype.itemsize)
audio.setframerate(self.sample_rate) audio.setframerate(self.sample_rate)
audio.writeframes(b''.join(self.audio_frames)) audio.writeframes(b''.join(self.audio_frames))
audio.close() audio.close()