From ae5c4f734017278de4ed27221669e8e4a799312f Mon Sep 17 00:00:00 2001 From: JacobLinCool Date: Wed, 31 Jan 2024 01:35:08 +0800 Subject: [PATCH] common : fix wav buffer detection (#1819) --- examples/common.cpp | 17 ++++++++++++++++- examples/common.h | 3 +++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/examples/common.cpp b/examples/common.cpp index 548156ec..f71c5912 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -615,6 +615,21 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat( } +bool is_wav_buffer(const std::string buf) { + // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format + // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html + if (buf.size() < 12 || buf.substr(0, 4) != "RIFF" || buf.substr(8, 4) != "WAVE") { + return false; + } + + uint32_t chunk_size = *reinterpret_cast(buf.data() + 4); + if (chunk_size + 8 != buf.size()) { + return false; + } + + return true; +} + bool read_wav(const std::string & fname, std::vector& pcmf32, std::vector>& pcmf32s, bool stereo) { drwav wav; std::vector wav_data; // used for pipe input from stdin @@ -639,7 +654,7 @@ bool read_wav(const std::string & fname, std::vector& pcmf32, std::vector fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); } - else if (fname.size() > 256 && (fname.substr(0, 4) == "RIFF" || fname.substr(8, 4) == "WAVE")) { + else if (is_wav_buffer(fname)) { if (drwav_init_memory(&wav, fname.c_str(), fname.size(), nullptr) == false) { fprintf(stderr, "error: failed to open WAV file from fname buffer\n"); return false; diff --git a/examples/common.h b/examples/common.h index aebeb0cd..09094a1b 100644 --- a/examples/common.h +++ b/examples/common.h @@ -135,6 +135,9 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat( // Audio utils // +// Check if a buffer is a WAV audio file +bool is_wav_buffer(const std::string buf); + // Read WAV audio file and store the PCM data into pcmf32 // fname can be a buffer of WAV data instead of a filename // The sample rate of the audio must be equal to COMMON_SAMPLE_RATE