2023-05-08 02:10:48 +00:00
|
|
|
import numpy as np
|
|
|
|
import whisper
|
|
|
|
import torch
|
|
|
|
import wave
|
|
|
|
import os
|
|
|
|
|
|
|
|
class AudioTranscriber:
|
2023-05-09 04:10:55 +00:00
|
|
|
def __init__(self):
|
|
|
|
self.transcript = []
|
2023-05-08 02:10:48 +00:00
|
|
|
self.audio_model = whisper.load_model(os.getcwd() + r'\tiny.en' + '.pt')
|
|
|
|
|
2023-05-09 04:10:55 +00:00
|
|
|
def transcribe(self, audio_data):
|
|
|
|
with wave.open(f'temp_{id(self)}.wav', 'wb') as wav_file:
|
|
|
|
wav_file.setnchannels(audio_data.shape[1])
|
|
|
|
wav_file.setsampwidth(2)
|
|
|
|
wav_file.setframerate(16000)
|
|
|
|
audio_data = (audio_data * (2**15 - 1)).astype(np.int16)
|
|
|
|
wav_file.writeframes(audio_data.tobytes())
|
|
|
|
result = self.audio_model.transcribe(f'temp_{id(self)}.wav', fp16=torch.cuda.is_available())
|
|
|
|
text = result['text'].strip()
|
|
|
|
return text
|
|
|
|
|
|
|
|
def create_transcription_from_queue(self, audio_queue):
|
|
|
|
while True:
|
|
|
|
top_of_queue = audio_queue.get()
|
|
|
|
source = top_of_queue[0]
|
|
|
|
audio_data = top_of_queue[1]
|
|
|
|
audio_data_transcription = self.transcribe(audio_data)
|
|
|
|
# whisper gives "you" on many null inputs
|
|
|
|
if audio_data_transcription != '' and audio_data_transcription.lower() != 'you':
|
|
|
|
self.transcript = [source + ": [" + audio_data_transcription + ']\n\n'] + self.transcript
|
2023-05-08 02:10:48 +00:00
|
|
|
|
2023-05-09 04:10:55 +00:00
|
|
|
def get_transcript(self):
|
|
|
|
return "".join(self.transcript)
|