added transcript limit

This commit is contained in:
SevaSk 2023-05-12 21:08:31 -04:00
parent e07a090f4b
commit 22ea6acb93
4 changed files with 21 additions and 12 deletions

View File

@ -14,8 +14,10 @@ class DefaultMicRecorder:
self.source = sr.Microphone(sample_rate=16000) self.source = sr.Microphone(sample_rate=16000)
self.num_channels = 1 self.num_channels = 1
print("Adjusting for ambient microphone noise. Say something through the microphone")
with self.source: with self.source:
self.recorder.adjust_for_ambient_noise(self.source) self.recorder.adjust_for_ambient_noise(self.source)
print("Done adjusting for ambient microphone noise")
def record_into_queue(self, audio_queue): def record_into_queue(self, audio_queue):
def record_callback(_, audio:sr.AudioData) -> None: def record_callback(_, audio:sr.AudioData) -> None:
@ -47,8 +49,10 @@ class DefaultSpeakerRecorder:
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16)) chunk_size=pyaudio.get_sample_size(pyaudio.paInt16))
self.num_channels = self.default_speakers["maxInputChannels"] self.num_channels = self.default_speakers["maxInputChannels"]
print("Adjusting for ambient speaker noise. Play something through the speaker")
with self.source: with self.source:
self.recorder.adjust_for_ambient_noise(self.source) self.recorder.adjust_for_ambient_noise(self.source)
print("Done adjusting for ambient speaker noise")
def record_into_queue(self, audio_queue): def record_into_queue(self, audio_queue):
def record_callback(_, audio:sr.AudioData) -> None: def record_callback(_, audio:sr.AudioData) -> None:

View File

@ -7,12 +7,13 @@ from tempfile import NamedTemporaryFile
import custom_speech_recognition as sr import custom_speech_recognition as sr
import io import io
from datetime import timedelta from datetime import timedelta
from time import sleep
import pyaudiowpatch as pyaudio import pyaudiowpatch as pyaudio
from AudioRecorder import DefaultMicRecorder, DefaultSpeakerRecorder from AudioRecorder import DefaultMicRecorder, DefaultSpeakerRecorder
from heapq import merge from heapq import merge
PHRASE_TIMEOUT = 4 PHRASE_TIMEOUT = 3.01
MAX_PHRASES = 10
class AudioTranscriber: class AudioTranscriber:
def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSpeakerRecorder): def __init__(self, default_mic : DefaultMicRecorder, default_speaker : DefaultSpeakerRecorder):
@ -26,7 +27,7 @@ class AudioTranscriber:
self.mic_channels = default_mic.num_channels self.mic_channels = default_mic.num_channels
self.speaker_sample_rate = default_speaker.source.SAMPLE_RATE self.speaker_sample_rate = default_speaker.source.SAMPLE_RATE
self.speaker_sample_rate = default_speaker.source.SAMPLE_RATE self.speaker_sample_width = default_speaker.source.SAMPLE_WIDTH
self.speaker_channels = default_speaker.num_channels self.speaker_channels = default_speaker.num_channels
def create_transcription_from_queue(self, audio_queue): def create_transcription_from_queue(self, audio_queue):
@ -66,6 +67,8 @@ class AudioTranscriber:
if text != '' and text.lower() != 'you': if text != '' and text.lower() != 'you':
if mic_start_new_phrase or len(self.mic_transcript_data) == 0: if mic_start_new_phrase or len(self.mic_transcript_data) == 0:
if len(self.mic_transcript_data) > MAX_PHRASES:
self.mic_transcript_data.pop()
self.mic_transcript_data = [(who_spoke + ": [" + text + ']\n\n', time_spoken)] + self.mic_transcript_data self.mic_transcript_data = [(who_spoke + ": [" + text + ']\n\n', time_spoken)] + self.mic_transcript_data
self.transcript_changed_event.set() self.transcript_changed_event.set()
else: else:
@ -96,6 +99,8 @@ class AudioTranscriber:
if text != '' and text.lower() != 'you': if text != '' and text.lower() != 'you':
if speaker_start_new_phrase or len(self.speaker_transcript_data) == 0: if speaker_start_new_phrase or len(self.speaker_transcript_data) == 0:
if len(self.mic_transcript_data) > MAX_PHRASES:
self.mic_transcript_data.pop()
self.speaker_transcript_data = [(who_spoke + ": [" + text + ']\n\n', time_spoken)] + self.speaker_transcript_data self.speaker_transcript_data = [(who_spoke + ": [" + text + ']\n\n', time_spoken)] + self.speaker_transcript_data
self.transcript_changed_event.set() self.transcript_changed_event.set()
@ -107,5 +112,6 @@ class AudioTranscriber:
def get_transcript(self): def get_transcript(self):
key = lambda x : x[1] key = lambda x : x[1]
transcript_tuple = list(merge(self.mic_transcript_data, self.speaker_transcript_data, key=key, reverse=True)) transcript_tuple = list(merge(self.mic_transcript_data, self.speaker_transcript_data, key=key, reverse=True))
transcript_tuple = transcript_tuple[0:MAX_PHRASES]
return "".join([t[0] for t in transcript_tuple]) return "".join([t[0] for t in transcript_tuple])

View File

@ -4,7 +4,6 @@ from GPTResponder import GPTResponder
import customtkinter as ctk import customtkinter as ctk
import AudioRecorder import AudioRecorder
import queue import queue
import os
import time import time
def write_in_textbox(textbox, text): def write_in_textbox(textbox, text):

View File

@ -1,7 +1,7 @@
numpy numpy==1.24.3
openai-whisper openai-whisper==20230314
torch torch==2.0.1
wave Wave==0.0.2
openai openai==0.27.6
customtkinter customtkinter==5.1.3
pyaudiowpatch PyAudioWPatch==0.2.12.5