diff --git a/AudioTranscriber.py b/AudioTranscriber.py index e439e9f..37cb21e 100644 --- a/AudioTranscriber.py +++ b/AudioTranscriber.py @@ -50,8 +50,14 @@ class AudioTranscriber: who_spoke, data, time_spoken = audio_queue.get() self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken) source_info = self.audio_sources[who_spoke] - temp_file = source_info["process_data_func"](source_info["last_sample"]) - text = self.audio_model.get_transcription(temp_file) + + text = '' + temp_file = NamedTemporaryFile(delete=False, suffix=".wav") + source_info["process_data_func"](source_info["last_sample"], temp_file.name) + text = self.audio_model.get_transcription(temp_file.name) + + temp_file.close() + os.unlink(temp_file.name) if text != '' and text.lower() != 'you': self.update_transcript(who_spoke, text, time_spoken) @@ -68,23 +74,19 @@ class AudioTranscriber: source_info["last_sample"] += data source_info["last_spoken"] = time_spoken - def process_mic_data(self, data): - temp_file = NamedTemporaryFile().name + def process_mic_data(self, data, temp_file_name): audio_data = sr.AudioData(data, self.audio_sources["You"]["sample_rate"], self.audio_sources["You"]["sample_width"]) wav_data = io.BytesIO(audio_data.get_wav_data()) - with open(temp_file, 'w+b') as f: + with open(temp_file_name, 'w+b') as f: f.write(wav_data.read()) - return temp_file - def process_speaker_data(self, data): - temp_file = NamedTemporaryFile().name - with wave.open(temp_file, 'wb') as wf: + def process_speaker_data(self, data, temp_file_name): + with wave.open(temp_file_name, 'wb') as wf: wf.setnchannels(self.audio_sources["Speaker"]["channels"]) p = pyaudio.PyAudio() wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) wf.setframerate(self.audio_sources["Speaker"]["sample_rate"]) wf.writeframes(data) - return temp_file def update_transcript(self, who_spoke, text, time_spoken): source_info = self.audio_sources[who_spoke] diff --git a/README.md b/README.md index 9b40977..3a238e6 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Follow these steps to set up and run Ecoute on your local machine. ### 📋 Prerequisites -- Python 3.x +- Python >=3.8.0 - An OpenAI API key - Windows OS (Not tested on others) - FFmpeg @@ -73,11 +73,18 @@ If FFmpeg is not installed in your system, you can follow the steps below to ins 4. Create a `keys.py` file in the ecoute directory and add your OpenAI API key: - ``` - python -c "with open('keys.py', 'w', encoding='utf-8') as f: f.write('OPENAI_API_KEY=\"API KEY\"')" - ``` + - Option 1: You can utilize a command on your command prompt. Run the following command, ensuring to replace "API KEY" with your actual OpenAI API key: - Replace `API KEY` with your actual OpenAI API key. + ``` + python -c "with open('keys.py', 'w', encoding='utf-8') as f: f.write('OPENAI_API_KEY=\"API KEY\"')" + ``` + + - Option 2: You can create the keys.py file manually. Open up your text editor of choice and enter the following content: + + ``` + OPENAI_API_KEY="API KEY" + ``` + Replace "API KEY" with your actual OpenAI API key. Save this file as keys.py within the ecoute directory. ### 🎬 Running Ecoute diff --git a/TranscriberModels.py b/TranscriberModels.py index 843d18b..60a3dd8 100644 --- a/TranscriberModels.py +++ b/TranscriberModels.py @@ -24,9 +24,7 @@ class WhisperTranscriber: class APIWhisperTranscriber: def get_transcription(self, wav_file_path): - new_file_path = wav_file_path + '.wav' - os.rename(wav_file_path, new_file_path) - audio_file= open(new_file_path, "rb") + audio_file= open(wav_file_path, "rb") try: result = openai.Audio.translate("whisper-1", audio_file) except Exception as e: