mirror of
https://github.com/SevaSk/ecoute.git
synced 2025-01-11 23:42:42 +00:00
Merge branch 'main' into add-mac-os-support
This commit is contained in:
commit
c425cedcd1
@ -50,8 +50,14 @@ class AudioTranscriber:
|
|||||||
who_spoke, data, time_spoken = audio_queue.get()
|
who_spoke, data, time_spoken = audio_queue.get()
|
||||||
self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
|
self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
|
||||||
source_info = self.audio_sources[who_spoke]
|
source_info = self.audio_sources[who_spoke]
|
||||||
temp_file = source_info["process_data_func"](source_info["last_sample"])
|
|
||||||
text = self.audio_model.get_transcription(temp_file)
|
text = ''
|
||||||
|
temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
|
||||||
|
source_info["process_data_func"](source_info["last_sample"], temp_file.name)
|
||||||
|
text = self.audio_model.get_transcription(temp_file.name)
|
||||||
|
|
||||||
|
temp_file.close()
|
||||||
|
os.unlink(temp_file.name)
|
||||||
|
|
||||||
if text != '' and text.lower() != 'you':
|
if text != '' and text.lower() != 'you':
|
||||||
self.update_transcript(who_spoke, text, time_spoken)
|
self.update_transcript(who_spoke, text, time_spoken)
|
||||||
@ -68,23 +74,19 @@ class AudioTranscriber:
|
|||||||
source_info["last_sample"] += data
|
source_info["last_sample"] += data
|
||||||
source_info["last_spoken"] = time_spoken
|
source_info["last_spoken"] = time_spoken
|
||||||
|
|
||||||
def process_mic_data(self, data):
|
def process_mic_data(self, data, temp_file_name):
|
||||||
temp_file = NamedTemporaryFile().name
|
|
||||||
audio_data = sr.AudioData(data, self.audio_sources["You"]["sample_rate"], self.audio_sources["You"]["sample_width"])
|
audio_data = sr.AudioData(data, self.audio_sources["You"]["sample_rate"], self.audio_sources["You"]["sample_width"])
|
||||||
wav_data = io.BytesIO(audio_data.get_wav_data())
|
wav_data = io.BytesIO(audio_data.get_wav_data())
|
||||||
with open(temp_file, 'w+b') as f:
|
with open(temp_file_name, 'w+b') as f:
|
||||||
f.write(wav_data.read())
|
f.write(wav_data.read())
|
||||||
return temp_file
|
|
||||||
|
|
||||||
def process_speaker_data(self, data):
|
def process_speaker_data(self, data, temp_file_name):
|
||||||
temp_file = NamedTemporaryFile().name
|
with wave.open(temp_file_name, 'wb') as wf:
|
||||||
with wave.open(temp_file, 'wb') as wf:
|
|
||||||
wf.setnchannels(self.audio_sources["Speaker"]["channels"])
|
wf.setnchannels(self.audio_sources["Speaker"]["channels"])
|
||||||
p = pyaudio.PyAudio()
|
p = pyaudio.PyAudio()
|
||||||
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
|
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
|
||||||
wf.setframerate(self.audio_sources["Speaker"]["sample_rate"])
|
wf.setframerate(self.audio_sources["Speaker"]["sample_rate"])
|
||||||
wf.writeframes(data)
|
wf.writeframes(data)
|
||||||
return temp_file
|
|
||||||
|
|
||||||
def update_transcript(self, who_spoke, text, time_spoken):
|
def update_transcript(self, who_spoke, text, time_spoken):
|
||||||
source_info = self.audio_sources[who_spoke]
|
source_info = self.audio_sources[who_spoke]
|
||||||
|
17
README.md
17
README.md
@ -15,7 +15,7 @@ Follow these steps to set up and run Ecoute on your local machine.
|
|||||||
|
|
||||||
### 📋 Prerequisites
|
### 📋 Prerequisites
|
||||||
|
|
||||||
- Python 3.x
|
- Python >=3.8.0
|
||||||
- An OpenAI API key
|
- An OpenAI API key
|
||||||
- Windows OS (Not tested on others)
|
- Windows OS (Not tested on others)
|
||||||
- FFmpeg
|
- FFmpeg
|
||||||
@ -73,11 +73,18 @@ If FFmpeg is not installed in your system, you can follow the steps below to ins
|
|||||||
|
|
||||||
4. Create a `keys.py` file in the ecoute directory and add your OpenAI API key:
|
4. Create a `keys.py` file in the ecoute directory and add your OpenAI API key:
|
||||||
|
|
||||||
```
|
- Option 1: You can utilize a command on your command prompt. Run the following command, ensuring to replace "API KEY" with your actual OpenAI API key:
|
||||||
python -c "with open('keys.py', 'w', encoding='utf-8') as f: f.write('OPENAI_API_KEY=\"API KEY\"')"
|
|
||||||
```
|
|
||||||
|
|
||||||
Replace `API KEY` with your actual OpenAI API key.
|
```
|
||||||
|
python -c "with open('keys.py', 'w', encoding='utf-8') as f: f.write('OPENAI_API_KEY=\"API KEY\"')"
|
||||||
|
```
|
||||||
|
|
||||||
|
- Option 2: You can create the keys.py file manually. Open up your text editor of choice and enter the following content:
|
||||||
|
|
||||||
|
```
|
||||||
|
OPENAI_API_KEY="API KEY"
|
||||||
|
```
|
||||||
|
Replace "API KEY" with your actual OpenAI API key. Save this file as keys.py within the ecoute directory.
|
||||||
|
|
||||||
|
|
||||||
### 🎬 Running Ecoute
|
### 🎬 Running Ecoute
|
||||||
|
@ -24,9 +24,7 @@ class WhisperTranscriber:
|
|||||||
|
|
||||||
class APIWhisperTranscriber:
|
class APIWhisperTranscriber:
|
||||||
def get_transcription(self, wav_file_path):
|
def get_transcription(self, wav_file_path):
|
||||||
new_file_path = wav_file_path + '.wav'
|
audio_file= open(wav_file_path, "rb")
|
||||||
os.rename(wav_file_path, new_file_path)
|
|
||||||
audio_file= open(new_file_path, "rb")
|
|
||||||
try:
|
try:
|
||||||
result = openai.Audio.translate("whisper-1", audio_file)
|
result = openai.Audio.translate("whisper-1", audio_file)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Loading…
Reference in New Issue
Block a user