import aifc import audioop import io import os import platform import stat import subprocess import sys import wave class AudioData(object): """ Creates a new ``AudioData`` instance, which represents mono audio data. The raw audio data is specified by ``frame_data``, which is a sequence of bytes representing audio samples. This is the frame data structure used by the PCM WAV format. The width of each sample, in bytes, is specified by ``sample_width``. Each group of ``sample_width`` bytes represents a single audio sample. The audio data is assumed to have a sample rate of ``sample_rate`` samples per second (Hertz). Usually, instances of this class are obtained from ``recognizer_instance.record`` or ``recognizer_instance.listen``, or in the callback for ``recognizer_instance.listen_in_background``, rather than instantiating them directly. """ def __init__(self, frame_data, sample_rate, sample_width): assert sample_rate > 0, "Sample rate must be a positive integer" assert ( sample_width % 1 == 0 and 1 <= sample_width <= 4 ), "Sample width must be between 1 and 4 inclusive" self.frame_data = frame_data self.sample_rate = sample_rate self.sample_width = int(sample_width) def get_segment(self, start_ms=None, end_ms=None): """ Returns a new ``AudioData`` instance, trimmed to a given time interval. In other words, an ``AudioData`` instance with the same audio data except starting at ``start_ms`` milliseconds in and ending ``end_ms`` milliseconds in. If not specified, ``start_ms`` defaults to the beginning of the audio, and ``end_ms`` defaults to the end. """ assert ( start_ms is None or start_ms >= 0 ), "``start_ms`` must be a non-negative number" assert end_ms is None or end_ms >= ( 0 if start_ms is None else start_ms ), "``end_ms`` must be a non-negative number greater or equal to ``start_ms``" if start_ms is None: start_byte = 0 else: start_byte = int( (start_ms * self.sample_rate * self.sample_width) // 1000 ) if end_ms is None: end_byte = len(self.frame_data) else: end_byte = int( (end_ms * self.sample_rate * self.sample_width) // 1000 ) return AudioData( self.frame_data[start_byte:end_byte], self.sample_rate, self.sample_width, ) def get_raw_data(self, convert_rate=None, convert_width=None): """ Returns a byte string representing the raw frame data for the audio represented by the ``AudioData`` instance. If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. Writing these bytes directly to a file results in a valid `RAW/PCM audio file `__. """ assert ( convert_rate is None or convert_rate > 0 ), "Sample rate to convert to must be a positive integer" assert convert_width is None or ( convert_width % 1 == 0 and 1 <= convert_width <= 4 ), "Sample width to convert to must be between 1 and 4 inclusive" raw_data = self.frame_data # make sure unsigned 8-bit audio (which uses unsigned samples) is handled like higher sample width audio (which uses signed samples) if self.sample_width == 1: raw_data = audioop.bias( raw_data, 1, -128 ) # subtract 128 from every sample to make them act like signed samples # resample audio at the desired rate if specified if convert_rate is not None and self.sample_rate != convert_rate: raw_data, _ = audioop.ratecv( raw_data, self.sample_width, 1, self.sample_rate, convert_rate, None, ) # convert samples to desired sample width if specified if convert_width is not None and self.sample_width != convert_width: if ( convert_width == 3 ): # we're converting the audio into 24-bit (workaround for https://bugs.python.org/issue12866) raw_data = audioop.lin2lin( raw_data, self.sample_width, 4 ) # convert audio into 32-bit first, which is always supported try: audioop.bias( b"", 3, 0 ) # test whether 24-bit audio is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do) except ( audioop.error ): # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less) raw_data = b"".join( raw_data[i + 1 : i + 4] for i in range(0, len(raw_data), 4) ) # since we're in little endian, we discard the first byte from each 32-bit sample to get a 24-bit sample else: # 24-bit audio fully supported, we don't need to shim anything raw_data = audioop.lin2lin( raw_data, self.sample_width, convert_width ) else: raw_data = audioop.lin2lin( raw_data, self.sample_width, convert_width ) # if the output is 8-bit audio with unsigned samples, convert the samples we've been treating as signed to unsigned again if convert_width == 1: raw_data = audioop.bias( raw_data, 1, 128 ) # add 128 to every sample to make them act like unsigned samples again return raw_data def get_wav_data(self, convert_rate=None, convert_width=None, nchannels = 1): """ Returns a byte string representing the contents of a WAV file containing the audio represented by the ``AudioData`` instance. If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. Writing these bytes directly to a file results in a valid `WAV file `__. """ raw_data = self.get_raw_data(convert_rate, convert_width) sample_rate = ( self.sample_rate if convert_rate is None else convert_rate ) sample_width = ( self.sample_width if convert_width is None else convert_width ) # generate the WAV file contents with io.BytesIO() as wav_file: wav_writer = wave.open(wav_file, "wb") try: # note that we can't use context manager, since that was only added in Python 3.4 wav_writer.setframerate(sample_rate) wav_writer.setsampwidth(sample_width) wav_writer.setnchannels(nchannels) wav_writer.writeframes(raw_data) wav_data = wav_file.getvalue() finally: # make sure resources are cleaned up wav_writer.close() return wav_data def get_aiff_data(self, convert_rate=None, convert_width=None): """ Returns a byte string representing the contents of an AIFF-C file containing the audio represented by the ``AudioData`` instance. If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. Writing these bytes directly to a file results in a valid `AIFF-C file `__. """ raw_data = self.get_raw_data(convert_rate, convert_width) sample_rate = ( self.sample_rate if convert_rate is None else convert_rate ) sample_width = ( self.sample_width if convert_width is None else convert_width ) # the AIFF format is big-endian, so we need to convert the little-endian raw data to big-endian if hasattr( audioop, "byteswap" ): # ``audioop.byteswap`` was only added in Python 3.4 raw_data = audioop.byteswap(raw_data, sample_width) else: # manually reverse the bytes of each sample, which is slower but works well enough as a fallback raw_data = raw_data[sample_width - 1 :: -1] + b"".join( raw_data[i + sample_width : i : -1] for i in range(sample_width - 1, len(raw_data), sample_width) ) # generate the AIFF-C file contents with io.BytesIO() as aiff_file: aiff_writer = aifc.open(aiff_file, "wb") try: # note that we can't use context manager, since that was only added in Python 3.4 aiff_writer.setframerate(sample_rate) aiff_writer.setsampwidth(sample_width) aiff_writer.setnchannels(1) aiff_writer.writeframes(raw_data) aiff_data = aiff_file.getvalue() finally: # make sure resources are cleaned up aiff_writer.close() return aiff_data def get_flac_data(self, convert_rate=None, convert_width=None): """ Returns a byte string representing the contents of a FLAC file containing the audio represented by the ``AudioData`` instance. Note that 32-bit FLAC is not supported. If the audio data is 32-bit and ``convert_width`` is not specified, then the resulting FLAC will be a 24-bit FLAC. If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. Writing these bytes directly to a file results in a valid `FLAC file `__. """ assert convert_width is None or ( convert_width % 1 == 0 and 1 <= convert_width <= 3 ), "Sample width to convert to must be between 1 and 3 inclusive" if ( self.sample_width > 3 and convert_width is None ): # resulting WAV data would be 32-bit, which is not convertable to FLAC using our encoder convert_width = 3 # the largest supported sample width is 24-bit, so we'll limit the sample width to that # run the FLAC converter with the WAV data to get the FLAC data wav_data = self.get_wav_data(convert_rate, convert_width) flac_converter = get_flac_converter() if ( os.name == "nt" ): # on Windows, specify that the process is to be started without showing a console window startup_info = subprocess.STARTUPINFO() startup_info.dwFlags |= ( subprocess.STARTF_USESHOWWINDOW ) # specify that the wShowWindow field of `startup_info` contains a value startup_info.wShowWindow = ( subprocess.SW_HIDE ) # specify that the console window should be hidden else: startup_info = None # default startupinfo process = subprocess.Popen( [ flac_converter, "--stdout", "--totally-silent", # put the resulting FLAC file in stdout, and make sure it's not mixed with any program output "--best", # highest level of compression available "-", # the input FLAC file contents will be given in stdin ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, startupinfo=startup_info, ) flac_data, stderr = process.communicate(wav_data) return flac_data def get_flac_converter(): """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found.""" flac_converter = shutil_which("flac") # check for installed version first if flac_converter is None: # flac utility is not installed base_path = os.path.dirname( os.path.abspath(__file__) ) # directory of the current module file, where all the FLAC bundled binaries are stored system, machine = platform.system(), platform.machine() if system == "Windows" and machine in { "i686", "i786", "x86", "x86_64", "AMD64", }: flac_converter = os.path.join(base_path, "flac-win32.exe") elif system == "Darwin" and machine in { "i686", "i786", "x86", "x86_64", "AMD64", }: flac_converter = os.path.join(base_path, "flac-mac") elif system == "Linux" and machine in {"i686", "i786", "x86"}: flac_converter = os.path.join(base_path, "flac-linux-x86") elif system == "Linux" and machine in {"x86_64", "AMD64"}: flac_converter = os.path.join(base_path, "flac-linux-x86_64") else: # no FLAC converter available raise OSError( "FLAC conversion utility not available - consider installing the FLAC command line application by running `apt-get install flac` or your operating system's equivalent" ) # mark FLAC converter as executable if possible try: # handle known issue when running on docker: # run executable right after chmod() may result in OSError "Text file busy" # fix: flush FS with sync if not os.access(flac_converter, os.X_OK): stat_info = os.stat(flac_converter) os.chmod(flac_converter, stat_info.st_mode | stat.S_IEXEC) if "Linux" in platform.system(): os.sync() if sys.version_info >= (3, 3) else os.system("sync") except OSError: pass return flac_converter def shutil_which(pgm): """Python 2 compatibility: backport of ``shutil.which()`` from Python 3""" path = os.getenv("PATH") for p in path.split(os.path.pathsep): p = os.path.join(p, pgm) if os.path.exists(p) and os.access(p, os.X_OK): return p