mirror of
https://github.com/SevaSk/ecoute.git
synced 2024-12-18 20:27:56 +00:00
refactor audio recording
This commit is contained in:
parent
9e3aa3685d
commit
654464c8d9
@ -7,12 +7,11 @@ ENERGY_THRESHOLD = 1000
|
||||
DYNAMIC_ENERGY_THRESHOLD = False
|
||||
|
||||
class BaseRecorder:
|
||||
def __init__(self, source, num_channels, source_name):
|
||||
def __init__(self, source, source_name):
|
||||
self.recorder = sr.Recognizer()
|
||||
self.recorder.energy_threshold = ENERGY_THRESHOLD
|
||||
self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD
|
||||
self.source = source
|
||||
self.num_channels = num_channels
|
||||
self.source_name = source_name
|
||||
|
||||
def adjust_for_noise(self):
|
||||
@ -30,7 +29,7 @@ class BaseRecorder:
|
||||
|
||||
class DefaultMicRecorder(BaseRecorder):
|
||||
def __init__(self):
|
||||
super().__init__(source=sr.Microphone(sample_rate=16000), num_channels=1, source_name="You")
|
||||
super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
|
||||
self.adjust_for_noise()
|
||||
|
||||
class DefaultSpeakerRecorder(BaseRecorder):
|
||||
@ -47,8 +46,10 @@ class DefaultSpeakerRecorder(BaseRecorder):
|
||||
else:
|
||||
print("[ERROR] No loopback device found.")
|
||||
|
||||
source = sr.Microphone(sample_rate=int(default_speakers["defaultSampleRate"]),
|
||||
speaker=True,
|
||||
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16))
|
||||
super().__init__(source=source, num_channels=default_speakers["maxInputChannels"], source_name="Speaker")
|
||||
source = sr.Microphone(speaker=True,
|
||||
device_index= default_speakers["index"],
|
||||
sample_rate=int(default_speakers["defaultSampleRate"]),
|
||||
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
|
||||
channels=default_speakers["maxInputChannels"])
|
||||
super().__init__(source=source, source_name="Speaker")
|
||||
self.adjust_for_noise()
|
@ -24,7 +24,7 @@ class AudioTranscriber:
|
||||
"You": {
|
||||
"sample_rate": default_mic.source.SAMPLE_RATE,
|
||||
"sample_width": default_mic.source.SAMPLE_WIDTH,
|
||||
"channels": default_mic.num_channels,
|
||||
"channels": default_mic.source.channels,
|
||||
"last_sample": bytes(),
|
||||
"last_spoken": None,
|
||||
"new_phrase": True,
|
||||
@ -33,7 +33,7 @@ class AudioTranscriber:
|
||||
"Speaker": {
|
||||
"sample_rate": default_speaker.source.SAMPLE_RATE,
|
||||
"sample_width": default_speaker.source.SAMPLE_WIDTH,
|
||||
"channels": default_speaker.num_channels,
|
||||
"channels": default_speaker.source.channels,
|
||||
"last_sample": bytes(),
|
||||
"last_spoken": None,
|
||||
"new_phrase": True,
|
||||
@ -44,9 +44,8 @@ class AudioTranscriber:
|
||||
def transcribe_audio_queue(self, audio_queue):
|
||||
while True:
|
||||
who_spoke, data, time_spoken = audio_queue.get()
|
||||
source_info = self.audio_sources[who_spoke]
|
||||
|
||||
self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
|
||||
source_info = self.audio_sources[who_spoke]
|
||||
temp_file = source_info["process_data_func"](source_info["last_sample"])
|
||||
text = self.get_transcription(temp_file)
|
||||
|
||||
@ -107,5 +106,4 @@ class AudioTranscriber:
|
||||
|
||||
def clear_transcript_data(self):
|
||||
self.transcript_data["You"].clear()
|
||||
self.transcript_data["Speaker"].clear()
|
||||
|
||||
self.transcript_data["Speaker"].clear()
|
@ -71,7 +71,7 @@ class Microphone(AudioSource):
|
||||
|
||||
Higher ``chunk_size`` values help avoid triggering on rapidly changing ambient noise, but also makes detection less sensitive. This value, generally, should be left at its default.
|
||||
"""
|
||||
def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False):
|
||||
def __init__(self, device_index=None, sample_rate=None, chunk_size=1024, speaker=False, channels = 1):
|
||||
assert device_index is None or isinstance(device_index, int), "Device index must be None or an integer"
|
||||
assert sample_rate is None or (isinstance(sample_rate, int) and sample_rate > 0), "Sample rate must be None or a positive integer"
|
||||
assert isinstance(chunk_size, int) and chunk_size > 0, "Chunk size must be a positive integer"
|
||||
@ -96,6 +96,7 @@ class Microphone(AudioSource):
|
||||
self.SAMPLE_WIDTH = self.pyaudio_module.get_sample_size(self.format) # size of each sample
|
||||
self.SAMPLE_RATE = sample_rate # sampling rate in Hertz
|
||||
self.CHUNK = chunk_size # number of frames stored in each buffer
|
||||
self.channels = channels
|
||||
|
||||
self.audio = None
|
||||
self.stream = None
|
||||
@ -178,35 +179,16 @@ class Microphone(AudioSource):
|
||||
try:
|
||||
if self.speaker:
|
||||
p = self.audio
|
||||
pyaudio = self.pyaudio_module
|
||||
try:
|
||||
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
|
||||
except:
|
||||
pass
|
||||
|
||||
default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
|
||||
if not default_speakers["isLoopbackDevice"]:
|
||||
for loopback in p.get_loopback_device_info_generator():
|
||||
"""
|
||||
Try to find loopback device with same name(and [Loopback suffix]).
|
||||
Unfortunately, this is the most adequate way at the moment.
|
||||
"""
|
||||
if default_speakers["name"] in loopback["name"]:
|
||||
default_speakers = loopback
|
||||
break
|
||||
else:
|
||||
exit()
|
||||
|
||||
self.stream = Microphone.MicrophoneStream(
|
||||
p.open(
|
||||
input_device_index=default_speakers["index"],
|
||||
channels=default_speakers["maxInputChannels"],
|
||||
format=self.format,
|
||||
rate=int(default_speakers["defaultSampleRate"]),
|
||||
frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
|
||||
input=True,
|
||||
)
|
||||
self.stream = Microphone.MicrophoneStream(
|
||||
p.open(
|
||||
input_device_index=self.device_index,
|
||||
channels=self.channels,
|
||||
format=self.format,
|
||||
rate=self.SAMPLE_RATE,
|
||||
frames_per_buffer=self.CHUNK,
|
||||
input=True
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.stream = Microphone.MicrophoneStream(
|
||||
self.audio.open(
|
||||
|
Loading…
Reference in New Issue
Block a user