diff --git a/lollms/com.py b/lollms/com.py index 3d643f2..d689c1b 100644 --- a/lollms/com.py +++ b/lollms/com.py @@ -167,6 +167,10 @@ class LoLLMsCom: """ pass + + def emit_socket_io_info(self, name, data, client_id): + pass + def notify( self, content:str, diff --git a/lollms/functions/search.py b/lollms/functions/search.py index 96d3fc7..5a6c796 100644 --- a/lollms/functions/search.py +++ b/lollms/functions/search.py @@ -7,7 +7,7 @@ from lollms.utilities import PackageManager from ascii_colors import trace_exception # Ensure necessary packages are installed -if not PackageManager.check_package_installed("beautifulsoup4"): +if not PackageManager.check_package_installed("bs4"): PackageManager.install_package("beautifulsoup4") if not PackageManager.check_package_installed("html2text"): PackageManager.install_package("html2text") diff --git a/lollms/media.py b/lollms/media.py index 97dac4b..9b83977 100644 --- a/lollms/media.py +++ b/lollms/media.py @@ -8,7 +8,7 @@ License: Apache 2.0 """ from lollms.utilities import PackageManager from lollms.com import LoLLMsCom -from lollms.utilities import trace_exception, run_async +from lollms.utilities import trace_exception, run_async, install_conda_package from lollms.types import MSG_TYPE, SENDER_TYPES from lollms.client_session import Session from ascii_colors import ASCIIColors @@ -16,6 +16,7 @@ import platform from functools import partial import subprocess from collections import deque +from scipy.signal import butter, lfilter import os import threading @@ -47,15 +48,7 @@ import matplotlib.pyplot as plt import matplotlib matplotlib.use('Agg') -if not PackageManager.check_package_installed("whisper"): - PackageManager.install_package("openai-whisper") - try: - import conda.cli - conda.cli.main("install", "conda-forge::ffmpeg", "-y") - except: - ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually") -import whisper import socketio from lollms.com import LoLLMsCom @@ -92,7 +85,37 @@ from lollms.function_call import FunctionCalling_Library from lollms.client_session import Client from datetime import datetime -import math +import sys + +def update_progress_bar(silence_counter, max_silence): + bar_length = 40 # Length of the progress bar + progress = silence_counter / max_silence + block = int(round(bar_length * progress)) + + # Determine the color based on progress + if progress < 0.5: + color = ASCIIColors.color_bright_green + else: + color = ASCIIColors.color_bright_red + + bar = "#" * block + "-" * (bar_length - block) + + sys.stdout.write("\r") + ASCIIColors.print(f"silence_counter: {silence_counter} |{bar}| {round(progress * 100, 2)}%", color=color, end="") + sys.stdout.flush() + +# Step 1: Define your high-pass and low-pass filters (they’re like the bouncers for your audio club) +def butter_bandpass(lowcut, highcut, fs, order=5): + nyquist = 0.5 * fs + low = lowcut / nyquist + high = highcut / nyquist + b, a = butter(order, [low, high], btype='band') + return b, a + +def bandpass_filter(data, lowcut, highcut, fs, order=5): + b, a = butter_bandpass(lowcut, highcut, fs, order=order) + y = lfilter(b, a, data) + return y class RTCom: def __init__( @@ -108,25 +131,16 @@ class RTCom: rate=44100, channels=1, buffer_size=10, - model="small.en", snd_input_device=None, snd_output_device=None, logs_folder="logs", - voice=None, block_while_talking=True, context_size=4096 ): self.sio = sio self.lc = lc self.client = client - self.tts = LollmsTTS(self.lc) - self.tl = TasksLibrary(self.lc) - self.block_listening = False - if not voice: - voices = self.get_voices() - voice = voices[0] - self.voice = voice self.context_size = context_size self.personality = personality self.rate = rate @@ -171,32 +185,6 @@ class RTCom: self.transcribed_files = deque() self.buffer_lock = threading.Condition() self.transcribed_lock = threading.Condition() - self.lc.ShowBlockingMessage("Loading whisper...") - ASCIIColors.info("Loading whisper...", end="",flush=True) - - self.model = model - self.whisper = whisper.load_model(model) - self.lc.HideBlockingMessage() - ASCIIColors.success("OK") - - def get_date_time(self): - now = datetime.now() - return now.strftime("%Y-%m-%d %H:%M:%S") - - def calculator_function(self, expression: str) -> float: - try: - # Add the math module functions to the local namespace - allowed_names = {k: v for k, v in math.__dict__.items() if not k.startswith("__")} - - # Evaluate the expression safely using the allowed names - result = eval(expression, {"__builtins__": None}, allowed_names) - return result - except Exception as e: - return str(e) - - def take_a_photo(self): - return "Couldn't take a photo" - def start_recording(self): self.recording = True @@ -224,8 +212,12 @@ class RTCom: # self._save_histogram(self.audio_values) def callback(self, indata, frames, time, status): + max_scilence = int((self.rate / frames) * self.silence_duration) if not self.block_listening: + # Transform the buffer into a numpy array (like turning a frog into a prince) audio_data = np.frombuffer(indata, dtype=np.int16) + # Apply the bandpass filter to the incoming audio data + audio_data = bandpass_filter(audio_data, lowcut=300, highcut=3000, fs=self.rate) max_value = np.max(audio_data) min_value = np.min(audio_data) @@ -237,6 +229,7 @@ class RTCom: self.audio_values.extend(audio_data) self.total_frames += frames + ASCIIColors.red(f" max_value: {max_value}", end="") if max_value < self.threshold: self.silence_counter += 1 self.current_silence_duration += frames @@ -248,21 +241,19 @@ class RTCom: if self.current_silence_duration > self.longest_silence_duration: self.longest_silence_duration = self.current_silence_duration - if self.silence_counter > (self.rate / frames) * self.silence_duration: - ASCIIColors.red("Silence counter reached threshold") + if self.silence_counter > max_scilence: trimmed_frames = self._trim_silence(self.frames) sound_percentage = self._calculate_sound_percentage(trimmed_frames) - ASCIIColors.red(f"Sound percentage {sound_percentage}") if sound_percentage >= self.sound_threshold_percentage: + ASCIIColors.red(f"Sound percentage {sound_percentage}") + ASCIIColors.red("\nSilence counter reached threshold") self._save_wav(self.frames) self.frames = [] self.silence_counter = 0 self.total_frames = 0 self.sound_frames = 0 else: - ASCIIColors.red(f"Appending data") - ASCIIColors.yellow(f"silence_counter: {self.silence_counter}") - print(f"silence duration: {(self.rate / frames) * self.silence_duration}") + update_progress_bar(self.silence_counter, max_scilence) self.frames.append(indata.copy()) else: self.frames = [] @@ -385,7 +376,7 @@ class RTCom: ASCIIColors.red(" -------------------------------------------------") self.lc.info("Talking") ASCIIColors.green("<>") - self.lc.tts.tts_audio(lollms_text, speaker=self.voice, file_name_or_path=str(Path(self.logs_folder)/filename)+"_answer.wav") + self.lc.tts.tts_audio(lollms_text, file_name_or_path=str(Path(self.logs_folder)/filename)+"_answer.wav") except Exception as ex: trace_exception(ex) self.block_listening = False @@ -400,6 +391,81 @@ class RTCom: return voices return [] +from pathlib import Path +import os +import sounddevice as sd +import threading +import datetime +import wave + +class AudioNinja: + def __init__(self, lc:LollmsApplication, logs_folder='logs', device=None): + """ + Initialize the AudioNinja with a LollmsApplication object, + a log folder, and an optional recording device. + + Args: + lc (LollmsApplication): The LollmsApplication object for communication. + logs_folder (str): The folder to save recordings. Default is 'logs'. + device (int or str): The recording device index or name. Default is None. + """ + self.lc = lc + self.logs_folder = Path(logs_folder) + self.device = device + self.recording_thread = None + self.is_recording = False + self.frames = [] + if not self.logs_folder.exists(): + self.logs_folder.mkdir(parents=True, exist_ok=True) + self.lc.info(f"AudioNinja is ready to strike from the shadows! Logging to '{self.logs_folder}' with device '{self.device}'") + + def _record_audio(self): + """ + Internal method to handle audio recording callback. + """ + def callback(indata, frames, time, status): + if self.is_recording: + self.frames.append(indata.copy()) + self.lc.info("Ninja is capturing sounds... Shhh!") + + with sd.InputStream(callback=callback, device=self.device): + while self.is_recording: + sd.sleep(1000) + + def start_recording(self): + """ + Start the audio recording. + """ + if not self.is_recording: + self.is_recording = True + self.frames = [] + self.recording_thread = threading.Thread(target=self._record_audio) + self.recording_thread.start() + self.lc.info("Ninja recording started! 🥷🔴") + + def stop_recording(self): + """ + Stop the audio recording. + """ + if self.is_recording: + self.is_recording = False + self.recording_thread.join() + self._save_recording() + self.lc.info("Ninja recording stopped! 🥷⚪️") + + def _save_recording(self): + """ + Save the recorded audio to a .wav file. + """ + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + filename = self.logs_folder / f"recording_{timestamp}.wav" + with wave.open(filename, 'wb') as wf: + wf.setnchannels(1) + wf.setsampwidth(sd.default.dtype[0].itemsize) + wf.setframerate(44100) + wf.writeframes(b''.join(self.frames)) + self.lc.info(f"Ninja stored the audio file at '{filename}'! 🥷📂") + class WebcamImageSender: """ @@ -535,17 +601,6 @@ class MusicPlayer(threading.Thread): class RealTimeTranscription: def __init__(self, callback): - if not PackageManager.check_package_installed('pyaudio'): - try: - import conda.cli - conda.cli.main("install", "anaconda::pyaudio", "-y") - except: - ASCIIColors.bright_red("Couldn't install pyaudio. whisper won't work. Please install it manually") - import pyaudio - # Initialize Whisper ASR - print("Loading whisper ...", end="") - self.whisper = whisper.load_model("base") - print("ok") # Set up PyAudio self.p = pyaudio.PyAudio() diff --git a/lollms/personality.py b/lollms/personality.py index 34fd7cc..a42212b 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -995,24 +995,16 @@ class AIPersonality: self.new_message("") self.info(f"Transcribing ... ") self.step_start("Transcribing ... ") - if self.whisper is None: - if not PackageManager.check_package_installed("whisper"): - PackageManager.install_package("openai-whisper") - try: - import conda.cli - conda.cli.main("install", "conda-forge::ffmpeg", "-y") - except: - ASCIIColors.bright_red("Couldn't install ffmpeg. whisper won't work. Please install it manually") - - import whisper - self.whisper = whisper.load_model("base") - result = self.whisper.transcribe(str(path)) + if self.app.stt is None: + self.InfoMessage("No STT service is up.\nPlease configure your default STT service in the settings page.") + return + text = self.app.stt.transcribe(str(path)) transcription_fn = str(path)+".txt" with open(transcription_fn, "w", encoding="utf-8") as f: - f.write(result["text"]) + f.write(text) self.info(f"File saved to {transcription_fn}") - self.full(result["text"]) + self.full(text) self.step_end("Transcribing ... ") elif path.suffix in [".png",".jpg",".jpeg",".gif",".bmp",".svg",".webp"]: self.image_files.append(path) diff --git a/lollms/services/open_ai_tts/lollms_openai_tts.py b/lollms/services/open_ai_tts/lollms_openai_tts.py index 7c11628..c0ff706 100644 --- a/lollms/services/open_ai_tts/lollms_openai_tts.py +++ b/lollms/services/open_ai_tts/lollms_openai_tts.py @@ -85,11 +85,11 @@ class LollmsOpenAITTS(LollmsTTS): response.write_to_file(speech_file_path) - def tts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False): + def tts_audio(self, text, speaker:str=None, file_name_or_path:Path|str=None, language="en", use_threading=False): speech_file_path = file_name_or_path response = self.client.audio.speech.create( model=self.model, - voice=self.voice, + voice=self.voice if speaker is None else speaker, input=text, response_format="wav" diff --git a/lollms/services/whisper/lollms_whisper.py b/lollms/services/whisper/lollms_whisper.py index 63e12bb..c4ed67c 100644 --- a/lollms/services/whisper/lollms_whisper.py +++ b/lollms/services/whisper/lollms_whisper.py @@ -18,9 +18,18 @@ from ascii_colors import ASCIIColors, trace_exception from lollms.paths import LollmsPaths import subprocess -if not PackageManager.check_package_installed("whisper"): - install_conda_package("ffmpeg") - PackageManager.install_package("whisper") +try: + if not PackageManager.check_package_installed("whisper"): + PackageManager.install_package("openai-whisper") + try: + install_conda_package("conda-forge::ffmpeg") + except Exception as ex: + trace_exception(ex) + ASCIIColors.red("Couldn't install ffmpeg") +except: + PackageManager.install_package("git+https://github.com/openai/whisper.git") + + import whisper diff --git a/lollms/services/xtts/lollms_xtts.py b/lollms/services/xtts/lollms_xtts.py index 5a6397f..12fbe15 100644 --- a/lollms/services/xtts/lollms_xtts.py +++ b/lollms/services/xtts/lollms_xtts.py @@ -291,7 +291,7 @@ class LollmsXTTS(LollmsTTS): return file_name_or_path - def tts_audio(self, text, speaker, file_name_or_path:Path|str=None, language="en", use_threading=False): + def tts_audio(self, text, speaker=None, file_name_or_path:Path|str=None, language="en", use_threading=False): voice=self.app.config.xtts_current_voice if speaker is None else speaker index = find_first_available_file_index(self.output_folder, "voice_sample_",".wav") output_fn=f"voice_sample_{index}.wav" if file_name_or_path is None else file_name_or_path diff --git a/lollms/tts.py b/lollms/tts.py index 94c92e5..858d97b 100644 --- a/lollms/tts.py +++ b/lollms/tts.py @@ -75,7 +75,7 @@ class LollmsTTS: """ pass - def tts_audio(self, text, speaker, file_name_or_path: Path | str = None, language="en", use_threading=False): + def tts_audio(self, text, speaker=None, file_name_or_path: Path | str = None, language="en", use_threading=False): """ Converts the given text to speech and returns the audio data.