diff --git a/AudioRecorder.py b/AudioRecorder.py index 83394ee..b0a0031 100644 --- a/AudioRecorder.py +++ b/AudioRecorder.py @@ -14,8 +14,10 @@ class DefaultMicRecorder: self.source = sr.Microphone(sample_rate=16000) self.num_channels = 1 + print("Adjusting for ambient microphone noise. Say something through the microphone") with self.source: self.recorder.adjust_for_ambient_noise(self.source) + print("Done adjusting for ambient microphone noise") def record_into_queue(self, audio_queue): def record_callback(_, audio:sr.AudioData) -> None: @@ -44,11 +46,13 @@ class DefaultSpeakerRecorder: self.source = sr.Microphone(sample_rate=int(self.default_speakers["defaultSampleRate"]), speaker=True, - chunk_size= pyaudio.get_sample_size(pyaudio.paInt16)) + chunk_size=pyaudio.get_sample_size(pyaudio.paInt16)) self.num_channels = self.default_speakers["maxInputChannels"] + print("Adjusting for ambient speaker noise. Play something through the speaker") with self.source: self.recorder.adjust_for_ambient_noise(self.source) + print("Done adjusting for ambient speaker noise") def record_into_queue(self, audio_queue): def record_callback(_, audio:sr.AudioData) -> None: diff --git a/AudioTranscriber.py b/AudioTranscriber.py index 3021571..681031d 100644 --- a/AudioTranscriber.py +++ b/AudioTranscriber.py @@ -7,12 +7,13 @@ from tempfile import NamedTemporaryFile import custom_speech_recognition as sr import io from datetime import timedelta -from time import sleep import pyaudiowpatch as pyaudio from AudioRecorder import DefaultMicRecorder, DefaultSpeakerRecorder from heapq import merge -PHRASE_TIMEOUT = 4 +PHRASE_TIMEOUT = 3.01 + +MAX_PHRASES = 10 class AudioTranscriber: def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSpeakerRecorder): @@ -26,7 +27,7 @@ class AudioTranscriber: self.mic_channels = default_mic.num_channels self.speaker_sample_rate = default_speaker.source.SAMPLE_RATE - self.speaker_sample_rate = default_speaker.source.SAMPLE_RATE + self.speaker_sample_width = default_speaker.source.SAMPLE_WIDTH self.speaker_channels = default_speaker.num_channels def create_transcription_from_queue(self, audio_queue): @@ -66,6 +67,8 @@ class AudioTranscriber: if text != '' and text.lower() != 'you': if mic_start_new_phrase or len(self.mic_transcript_data) == 0: + if len(self.mic_transcript_data) > MAX_PHRASES: + self.mic_transcript_data.pop() self.mic_transcript_data = [(who_spoke + ": [" + text + ']\n\n', time_spoken)] + self.mic_transcript_data self.transcript_changed_event.set() else: @@ -96,6 +99,8 @@ class AudioTranscriber: if text != '' and text.lower() != 'you': if speaker_start_new_phrase or len(self.speaker_transcript_data) == 0: + if len(self.mic_transcript_data) > MAX_PHRASES: + self.mic_transcript_data.pop() self.speaker_transcript_data = [(who_spoke + ": [" + text + ']\n\n', time_spoken)] + self.speaker_transcript_data self.transcript_changed_event.set() @@ -107,5 +112,6 @@ class AudioTranscriber: def get_transcript(self): key = lambda x : x[1] transcript_tuple = list(merge(self.mic_transcript_data, self.speaker_transcript_data, key=key, reverse=True)) + transcript_tuple = transcript_tuple[0:MAX_PHRASES] return "".join([t[0] for t in transcript_tuple]) \ No newline at end of file diff --git a/main.py b/main.py index 7997a58..2d8265a 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,6 @@ from GPTResponder import GPTResponder import customtkinter as ctk import AudioRecorder import queue -import os import time def write_in_textbox(textbox, text): diff --git a/requirements.txt b/requirements.txt index f740aa4..1c9b605 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -numpy -openai-whisper -torch -wave -openai -customtkinter -pyaudiowpatch \ No newline at end of file +numpy==1.24.3 +openai-whisper==20230314 +torch==2.0.1 +Wave==0.0.2 +openai==0.27.6 +customtkinter==5.1.3 +PyAudioWPatch==0.2.12.5 \ No newline at end of file