mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-02-23 10:30:15 +00:00
moved to sounddevice instead of pyaudio
This commit is contained in:
parent
e0e99c5a74
commit
be2283cf34
@ -14,8 +14,8 @@ import subprocess
|
|||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
if not PackageManager.check_package_installed("cv2"):
|
if not PackageManager.check_package_installed("cv2"):
|
||||||
|
os.system('sudo apt-get update')
|
||||||
os.system('sudo apt-get install libgl1-mesa-glx python3-opencv -y')
|
os.system('sudo apt-get install libgl1-mesa-glx python3-opencv -y')
|
||||||
PackageManager.install_package("opencv-python")
|
|
||||||
import cv2
|
import cv2
|
||||||
|
|
||||||
|
|
||||||
@ -41,28 +41,24 @@ import json
|
|||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
if not PackageManager.check_package_installed("sounddevice"):
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
PackageManager.install_package("sounddevice")
|
||||||
|
elif platform.system() == "Linux":
|
||||||
|
subprocess.check_call(["sudo", "apt", "install", "-y", "portaudio19-dev python3-sounddevice"])
|
||||||
|
elif platform.system() == "Darwin":
|
||||||
|
subprocess.check_call(["brew", "install", "portaudio19-dev python3-sounddevice"])
|
||||||
|
PackageManager.install_package("wave")
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
class AudioRecorder:
|
class AudioRecorder:
|
||||||
def __init__(self, socketio, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom:LoLLMsCom=None):
|
def __init__(self, socketio, filename, channels=1, sample_rate=16000, chunk_size=24678, silence_threshold=150.0, silence_duration=2, callback=None, lollmsCom=None):
|
||||||
try:
|
try:
|
||||||
if not PackageManager.check_package_installed("pyaudio"):
|
|
||||||
if platform.system() == "Windows":
|
|
||||||
PackageManager.install_package("pyaudio")
|
|
||||||
elif platform.system() == "Linux":
|
|
||||||
subprocess.check_call(["sudo", "apt", "install", "-y", "portaudio19-dev python3-pyaudio"])
|
|
||||||
elif platform.system() == "Darwin":
|
|
||||||
subprocess.check_call(["brew", "install", "portaudio19-dev python3-pyaudio"])
|
|
||||||
PackageManager.install_package("wave")
|
|
||||||
|
|
||||||
import pyaudio
|
|
||||||
import wave
|
|
||||||
|
|
||||||
self.socketio = socketio
|
self.socketio = socketio
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
self.channels = channels
|
self.channels = channels
|
||||||
self.sample_rate = sample_rate
|
self.sample_rate = sample_rate
|
||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
self.audio_format = pyaudio.paInt16
|
|
||||||
self.audio_stream = None
|
self.audio_stream = None
|
||||||
self.audio_frames = []
|
self.audio_frames = []
|
||||||
self.is_recording = False
|
self.is_recording = False
|
||||||
@ -78,7 +74,6 @@ class AudioRecorder:
|
|||||||
self.channels = channels
|
self.channels = channels
|
||||||
self.sample_rate = sample_rate
|
self.sample_rate = sample_rate
|
||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
self.audio_format = None
|
|
||||||
self.audio_stream = None
|
self.audio_stream = None
|
||||||
self.audio_frames = []
|
self.audio_frames = []
|
||||||
self.is_recording = False
|
self.is_recording = False
|
||||||
@ -89,44 +84,37 @@ class AudioRecorder:
|
|||||||
self.lollmsCom = lollmsCom
|
self.lollmsCom = lollmsCom
|
||||||
self.whisper_model = None
|
self.whisper_model = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def start_recording(self):
|
def start_recording(self):
|
||||||
if self.whisper_model is None:
|
if self.whisper_model is None:
|
||||||
self.lollmsCom.info("Loading whisper model")
|
self.lollmsCom.info("Loading whisper model")
|
||||||
self.whisper_model=whisper.load_model("base.en")
|
self.whisper_model=whisper.load_model("base.en")
|
||||||
try:
|
try:
|
||||||
import pyaudio
|
|
||||||
self.is_recording = True
|
self.is_recording = True
|
||||||
self.audio_stream = pyaudio.PyAudio().open(
|
self.audio_stream = sd.InputStream(
|
||||||
format=self.audio_format,
|
|
||||||
channels=self.channels,
|
channels=self.channels,
|
||||||
rate=self.sample_rate,
|
samplerate=self.sample_rate,
|
||||||
input=True,
|
callback=self._record,
|
||||||
frames_per_buffer=self.chunk_size
|
blocksize=self.chunk_size
|
||||||
)
|
)
|
||||||
|
self.audio_stream.start()
|
||||||
|
|
||||||
self.lollmsCom.info("Recording started...")
|
self.lollmsCom.info("Recording started...")
|
||||||
|
|
||||||
threading.Thread(target=self._record).start()
|
|
||||||
except:
|
except:
|
||||||
self.lollmsCom.error("No audio input found!")
|
self.lollmsCom.error("No audio input found!")
|
||||||
|
|
||||||
|
def _record(self, indata, frames, time, status):
|
||||||
def _record(self):
|
|
||||||
first_recording = True # Flag to track the first recording
|
first_recording = True # Flag to track the first recording
|
||||||
silence_duration = 5
|
silence_duration = 5
|
||||||
non_silent_start = None
|
non_silent_start = None
|
||||||
non_silent_end = None
|
non_silent_end = None
|
||||||
last_spectrogram_update = time.time()
|
last_spectrogram_update = time.time()
|
||||||
self.audio_frames = None
|
self.audio_frames = None
|
||||||
while self.is_recording:
|
buffered = np.array(indata)
|
||||||
data = self.audio_stream.read(self.chunk_size)
|
|
||||||
buffered = np.frombuffer(data, dtype=np.int16)
|
|
||||||
if self.audio_frames is not None:
|
if self.audio_frames is not None:
|
||||||
self.audio_frames = np.concatenate([self.audio_frames,buffered])
|
self.audio_frames = np.concatenate([self.audio_frames, buffered])
|
||||||
else:
|
else:
|
||||||
self.audio_frames = buffered
|
self.audio_frames = buffered
|
||||||
|
|
||||||
# Remove audio frames that are older than 30 seconds
|
# Remove audio frames that are older than 30 seconds
|
||||||
if len(self.audio_frames) > self.sample_rate * 30:
|
if len(self.audio_frames) > self.sample_rate * 30:
|
||||||
self.audio_frames=self.audio_frames[-self.sample_rate * 30:]
|
self.audio_frames=self.audio_frames[-self.sample_rate * 30:]
|
||||||
@ -148,17 +136,14 @@ class AudioRecorder:
|
|||||||
if self.callback and non_silent_start is not None and non_silent_end - non_silent_start >= 1:
|
if self.callback and non_silent_start is not None and non_silent_end - non_silent_start >= 1:
|
||||||
self.lollmsCom.info("Analyzing")
|
self.lollmsCom.info("Analyzing")
|
||||||
# Convert to float
|
# Convert to float
|
||||||
import pyaudio
|
|
||||||
import wave
|
|
||||||
audio_data = self.audio_frames.astype(np.float32)
|
audio_data = self.audio_frames.astype(np.float32)
|
||||||
audio = wave.open(str(self.filename), 'wb')
|
audio = wave.open(str(self.filename), 'wb')
|
||||||
audio.setnchannels(self.channels)
|
audio.setnchannels(self.channels)
|
||||||
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format))
|
audio.setsampwidth(audio_stream.dtype.itemsize)
|
||||||
audio.setframerate(self.sample_rate)
|
audio.setframerate(self.sample_rate)
|
||||||
audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end]))
|
audio.writeframes(b''.join(self.audio_frames[non_silent_start:non_silent_end]))
|
||||||
audio.close()
|
audio.close()
|
||||||
|
|
||||||
|
|
||||||
# Transcribe the audio using the whisper model
|
# Transcribe the audio using the whisper model
|
||||||
text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
|
text = self.whisper_model.transcribe(audio_data[non_silent_start:non_silent_end])
|
||||||
|
|
||||||
@ -208,17 +193,14 @@ class AudioRecorder:
|
|||||||
rms = 0
|
rms = 0
|
||||||
return rms
|
return rms
|
||||||
|
|
||||||
|
|
||||||
def stop_recording(self):
|
def stop_recording(self):
|
||||||
self.is_recording = False
|
self.is_recording = False
|
||||||
if self.audio_stream:
|
if self.audio_stream:
|
||||||
self.audio_stream.stop_stream()
|
self.audio_stream.stop()
|
||||||
self.audio_stream.close()
|
|
||||||
import pyaudio
|
|
||||||
import wave
|
import wave
|
||||||
audio = wave.open(str(self.filename), 'wb')
|
audio = wave.open(str(self.filename), 'wb')
|
||||||
audio.setnchannels(self.channels)
|
audio.setnchannels(self.channels)
|
||||||
audio.setsampwidth(pyaudio.PyAudio().get_sample_size(self.audio_format))
|
audio.setsampwidth(self.audio_stream.dtype.itemsize)
|
||||||
audio.setframerate(self.sample_rate)
|
audio.setframerate(self.sample_rate)
|
||||||
audio.writeframes(b''.join(self.audio_frames))
|
audio.writeframes(b''.join(self.audio_frames))
|
||||||
audio.close()
|
audio.close()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user