refactor audio recording

This commit is contained in:
SevaSk 2023-05-13 10:36:00 -04:00
parent 9e3aa3685d
commit 654464c8d9
3 changed files with 23 additions and 42 deletions

View File

@ -7,12 +7,11 @@ ENERGY_THRESHOLD = 1000
DYNAMIC_ENERGY_THRESHOLD = False
class BaseRecorder:
def __init__(self, source, num_channels, source_name):
def __init__(self, source, source_name):
self.recorder = sr.Recognizer()
self.recorder.energy_threshold = ENERGY_THRESHOLD
self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD
self.source = source
self.num_channels = num_channels
self.source_name = source_name
def adjust_for_noise(self):
@ -30,7 +29,7 @@ class BaseRecorder:
class DefaultMicRecorder(BaseRecorder):
def __init__(self):
super().__init__(source=sr.Microphone(sample_rate=16000), num_channels=1, source_name="You")
super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
self.adjust_for_noise()
class DefaultSpeakerRecorder(BaseRecorder):
@ -47,8 +46,10 @@ class DefaultSpeakerRecorder(BaseRecorder):
else:
print("[ERROR] No loopback device found.")
source = sr.Microphone(sample_rate=int(default_speakers["defaultSampleRate"]),
speaker=True,
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16))
super().__init__(source=source, num_channels=default_speakers["maxInputChannels"], source_name="Speaker")
source = sr.Microphone(speaker=True,
device_index= default_speakers["index"],
sample_rate=int(default_speakers["defaultSampleRate"]),
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
channels=default_speakers["maxInputChannels"])
super().__init__(source=source, source_name="Speaker")
self.adjust_for_noise()

View File

@ -24,7 +24,7 @@ class AudioTranscriber:
"You": {
"sample_rate": default_mic.source.SAMPLE_RATE,
"sample_width": default_mic.source.SAMPLE_WIDTH,
"channels": default_mic.num_channels,
"channels": default_mic.source.channels,
"last_sample": bytes(),
"last_spoken": None,
"new_phrase": True,
@ -33,7 +33,7 @@ class AudioTranscriber:
"Speaker": {
"sample_rate": default_speaker.source.SAMPLE_RATE,
"sample_width": default_speaker.source.SAMPLE_WIDTH,
"channels": default_speaker.num_channels,
"channels": default_speaker.source.channels,
"last_sample": bytes(),
"last_spoken": None,
"new_phrase": True,
@ -44,9 +44,8 @@ class AudioTranscriber:
def transcribe_audio_queue(self, audio_queue):
while True:
who_spoke, data, time_spoken = audio_queue.get()
source_info = self.audio_sources[who_spoke]
self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
source_info = self.audio_sources[who_spoke]
temp_file = source_info["process_data_func"](source_info["last_sample"])
text = self.get_transcription(temp_file)
@ -107,5 +106,4 @@ class AudioTranscriber:
def clear_transcript_data(self):
self.transcript_data["You"].clear()
self.transcript_data["Speaker"].clear()
self.transcript_data["Speaker"].clear()

View File

@ -71,7 +71,7 @@ class Microphone(AudioSource):
Higher ``chunk_size`` values help avoid triggering on rapidly changing ambient noise, but also makes detection less sensitive. This value, generally, should be left at its default.
"""
def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False):
def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False, channels = 1):
assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
assert sample_rate is None or (isinstance(sample_rate, int) and sample_rate > 0), "Sample rate must be None or a positive integer"
assert isinstance(chunk_size, int) and chunk_size > 0, "Chunk size must be a positive integer"
@ -96,6 +96,7 @@ class Microphone(AudioSource):
self.SAMPLE_WIDTH = self.pyaudio_module.get_sample_size(self.format) # size of each sample
self.SAMPLE_RATE = sample_rate # sampling rate in Hertz
self.CHUNK = chunk_size # number of frames stored in each buffer
self.channels = channels
self.audio = None
self.stream = None
@ -178,35 +179,16 @@ class Microphone(AudioSource):
try:
if self.speaker:
p = self.audio
pyaudio = self.pyaudio_module
try:
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
except:
pass
default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
if not default_speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
"""
Try to find loopback device with same name(and [Loopback suffix]).
Unfortunately, this is the most adequate way at the moment.
"""
if default_speakers["name"] in loopback["name"]:
default_speakers = loopback
break
else:
exit()
self.stream = Microphone.MicrophoneStream(
p.open(
input_device_index=default_speakers["index"],
channels=default_speakers["maxInputChannels"],
format=self.format,
rate=int(default_speakers["defaultSampleRate"]),
frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
input=True,
)
self.stream = Microphone.MicrophoneStream(
p.open(
input_device_index=self.device_index,
channels=self.channels,
format=self.format,
rate=self.SAMPLE_RATE,
frames_per_buffer=self.CHUNK,
input=True
)
)
else:
self.stream = Microphone.MicrophoneStream(
self.audio.open(