added whisper

2025-04-05 01:49:08 +00:00 · 2024-02-01 23:31:14 +01:00 · 2024-02-01 23:31:14 +01:00 · 86f8fab38f
commit 86f8fab38f
parent ec63c7be86
2 changed files with 39 additions and 1 deletions
--- a/lollms/media.py
+++ b/lollms/media.py
@ -46,6 +46,12 @@ matplotlib.use('Agg')

 if not PackageManager.check_package_installed("whisper"):
    PackageManager.install_package("openai-whisper")
+    try:
+        import conda.cli
+        conda.cli.main("install", "conda-forge::ffmpeg", "-y")
+    except:
+        ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually")
+
 import whisper

 import socketio
@ -90,6 +96,7 @@ class AudioRecorder:
        self.is_recording = False
        self.start_time = time.time()
        self.last_time = time.time()
+        self.whisper = whisper.load_model("base")

    def audio_callback(self, indata, frames, time_, status):
        volume_norm = np.linalg.norm(indata)*10
@ -120,6 +127,11 @@ class AudioRecorder:
        write(self.filename, self.sample_rate, self.buffer)
        self.lollmsCom.info(f"Saved to {self.filename}")
        self.lollmsCom.info(f"Transcribing ... ")
+        result = self.whisper.transcribe(str(self.filename))
+        transcription_fn = str(self.filename)+".txt"
+        with open(transcription_fn, "w", encoding="utf-8") as f:
+            f.write(result["text"])
+        self.lollmsCom.info(f"File saved to {transcription_fn}")

    def update_spectrogram(self):
        f, t, Sxx = spectrogram(self.buffer[-30*self.sample_rate:], self.sample_rate)
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -131,6 +131,9 @@ class AIPersonality:

        self.installation_option = installation_option

+        # Whisper to transcribe audio
+        self.whisper = None
+
        # First setup a default personality
        # Version
        self._version = pkg_resources.get_distribution('lollms').version
@ -762,7 +765,30 @@ Date: {{date}}
        db_path = self.lollms_paths.personal_databases_path / "personalities" / self.name / "db.json"
        db_path.parent.mkdir(parents=True, exist_ok=True)
        path = Path(path)
-        if path.suffix in [".png",".jpg",".gif",".bmp",".webp"]:
+        if path.suffix in [".wav",".mp3"]:
+            if self.whisper is None:
+                if not PackageManager.check_package_installed("whisper"):
+                    PackageManager.install_package("openai-whisper")
+                    try:
+                        import conda.cli
+                        conda.cli.main("install", "conda-forge::ffmpeg", "-y")
+                    except:
+                        ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually")
+
+                import whisper
+                self.whisper = whisper.load_model("base")
+
+
+            self.info(f"Transcribing ... ")
+            self.step_start("Transcribing ... ")
+            result = self.whisper.transcribe(str(self.filename))
+            transcription_fn = str(path)+".txt"
+            with open(transcription_fn, "w", encoding="utf-8") as f:
+                f.write(result["text"])
+            self.info(f"File saved to {transcription_fn}")
+            self.full(result["text"])
+            self.step_end("Transcribing ... ")
+        elif path.suffix in [".png",".jpg",".gif",".bmp",".webp"]:
            if self.callback:
                try:
                    pth = str(path).replace("\\","/").split('/')