From 704cdf0b9f96cd4051ba79290e2e0f07b7dc86ac Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Mon, 3 Jun 2024 01:48:16 +0200 Subject: [PATCH] enhanced --- lollms/media.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/lollms/media.py b/lollms/media.py index b7ad546..3f90ff0 100644 --- a/lollms/media.py +++ b/lollms/media.py @@ -158,7 +158,7 @@ class RTCom: self.summoned = False self.sample_mfccs = None if self.use_keyword_audio and self.keyword_audio_path: - self.sample_features = self.load_and_extract_features() + self.sample_features = self.load_and_extract_features(self.keyword_audio_path) @@ -203,12 +203,29 @@ class RTCom: mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) return np.mean(mfccs.T, axis=0) - def extract_features(self, buffer): + def extract_features(self, frames): if not PackageManager.check_package_installed("librosa"): PackageManager.install_package("librosa") + + filename = f"recording_{self.file_index}.wav" + self.file_index += 1 + + amplified_frames = self._apply_gain(frames) + trimmed_frames = self._trim_silence([amplified_frames]) + logs_file = Path(self.logs_folder)/filename + logs_file.parent.mkdir(exist_ok=True, parents=True) + + wf = wave.open(str(logs_file), 'wb') + wf.setnchannels(self.channels) + wf.setsampwidth(2) + wf.setframerate(self.rate) + wf.writeframes(trimmed_frames) + wf.close() + import librosa - y, sr = librosa.load(buffer, sr=self.rate) + y, sr = librosa.load(logs_file, sr=self.rate) mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) + return np.mean(mfccs.T, axis=0) def compare_voices(self, sample_features, realtime_features, th = 20):