diff --git a/lollms/media.py b/lollms/media.py index 7dda7b9..d2787ba 100644 --- a/lollms/media.py +++ b/lollms/media.py @@ -46,6 +46,12 @@ matplotlib.use('Agg') if not PackageManager.check_package_installed("whisper"): PackageManager.install_package("openai-whisper") + try: + import conda.cli + conda.cli.main("install", "conda-forge::ffmpeg", "-y") + except: + ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually") + import whisper import socketio @@ -90,6 +96,7 @@ class AudioRecorder: self.is_recording = False self.start_time = time.time() self.last_time = time.time() + self.whisper = whisper.load_model("base") def audio_callback(self, indata, frames, time_, status): volume_norm = np.linalg.norm(indata)*10 @@ -120,6 +127,11 @@ class AudioRecorder: write(self.filename, self.sample_rate, self.buffer) self.lollmsCom.info(f"Saved to {self.filename}") self.lollmsCom.info(f"Transcribing ... ") + result = self.whisper.transcribe(str(self.filename)) + transcription_fn = str(self.filename)+".txt" + with open(transcription_fn, "w", encoding="utf-8") as f: + f.write(result["text"]) + self.lollmsCom.info(f"File saved to {transcription_fn}") def update_spectrogram(self): f, t, Sxx = spectrogram(self.buffer[-30*self.sample_rate:], self.sample_rate) diff --git a/lollms/personality.py b/lollms/personality.py index 0ccce71..771a0a5 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -131,6 +131,9 @@ class AIPersonality: self.installation_option = installation_option + # Whisper to transcribe audio + self.whisper = None + # First setup a default personality # Version self._version = pkg_resources.get_distribution('lollms').version @@ -762,7 +765,30 @@ Date: {{date}} db_path = self.lollms_paths.personal_databases_path / "personalities" / self.name / "db.json" db_path.parent.mkdir(parents=True, exist_ok=True) path = Path(path) - if path.suffix in [".png",".jpg",".gif",".bmp",".webp"]: + if path.suffix in [".wav",".mp3"]: + if self.whisper is None: + if not PackageManager.check_package_installed("whisper"): + PackageManager.install_package("openai-whisper") + try: + import conda.cli + conda.cli.main("install", "conda-forge::ffmpeg", "-y") + except: + ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually") + + import whisper + self.whisper = whisper.load_model("base") + + + self.info(f"Transcribing ... ") + self.step_start("Transcribing ... ") + result = self.whisper.transcribe(str(self.filename)) + transcription_fn = str(path)+".txt" + with open(transcription_fn, "w", encoding="utf-8") as f: + f.write(result["text"]) + self.info(f"File saved to {transcription_fn}") + self.full(result["text"]) + self.step_end("Transcribing ... ") + elif path.suffix in [".png",".jpg",".gif",".bmp",".webp"]: if self.callback: try: pth = str(path).replace("\\","/").split('/')