mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-19 20:57:52 +00:00
server : automatically convert audio on the server (#1539)
* server : automatically convert audio on the server * server : remove rebundant comments * server : automatic conversion refactor * server : update server readme * server : remove unnecessary comments and tabs * server : put back remove calling * server : apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : check ffmpeg before the server lunch * server : fix indentation * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : fix function typo calling * server : fix function typo calling * server : add warning in readme --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
447d49530c
commit
23c21e92eb
@ -43,8 +43,12 @@ options:
|
||||
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
||||
--host HOST, [127.0.0.1] Hostname/ip-adress for the server
|
||||
--port PORT, [8080 ] Port number for the server
|
||||
--convert, [false ] Convert audio to WAV, requires ffmpeg on the server
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
|
||||
|
||||
## request examples
|
||||
|
||||
**/inference**
|
||||
|
@ -43,6 +43,8 @@ struct server_params
|
||||
int32_t port = 8080;
|
||||
int32_t read_timeout = 600;
|
||||
int32_t write_timeout = 600;
|
||||
|
||||
bool ffmpeg_converter = false;
|
||||
};
|
||||
|
||||
struct whisper_params {
|
||||
@ -157,6 +159,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " --host HOST, [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
|
||||
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
|
||||
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
|
||||
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
@ -203,6 +206,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
|
||||
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
|
||||
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
|
||||
else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params, sparams);
|
||||
@ -220,6 +224,45 @@ struct whisper_print_user_data {
|
||||
int progress_prev;
|
||||
};
|
||||
|
||||
void check_ffmpeg_availibility() {
|
||||
int result = system("ffmpeg -version");
|
||||
|
||||
if (result == 0) {
|
||||
std::cout << "ffmpeg is available." << std::endl;
|
||||
} else {
|
||||
// ffmpeg is not available
|
||||
std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
|
||||
std::cout << "and that its executable is included in your system's PATH. ";
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
|
||||
std::ostringstream cmd_stream;
|
||||
std::string converted_filename_temp = temp_filename + "_temp.wav";
|
||||
cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
|
||||
std::string cmd = cmd_stream.str();
|
||||
|
||||
int status = std::system(cmd.c_str());
|
||||
if (status != 0) {
|
||||
error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Remove the original file
|
||||
if (remove(temp_filename.c_str()) != 0) {
|
||||
error_resp = "{\"error\":\"Failed to remove the original file.\"}";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Rename the temporary file to match the original filename
|
||||
if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
|
||||
error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
|
||||
std::string speaker = "";
|
||||
const int64_t n_samples = pcmf32s[0].size();
|
||||
@ -407,6 +450,9 @@ int main(int argc, char ** argv) {
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (sparams.ffmpeg_converter) {
|
||||
check_ffmpeg_availibility();
|
||||
}
|
||||
// whisper init
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
@ -462,6 +508,18 @@ int main(int argc, char ** argv) {
|
||||
temp_file << audio_file.content;
|
||||
temp_file.close();
|
||||
|
||||
// if file is not wav, convert to wav
|
||||
|
||||
if (sparams.ffmpeg_converter) {
|
||||
std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
|
||||
const bool is_converted = convert_to_wav(temp_filename, error_resp);
|
||||
if (!is_converted) {
|
||||
res.set_content(error_resp, "application/json");
|
||||
whisper_mutex.unlock();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// read wav content into pcmf32
|
||||
if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
|
||||
fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
|
||||
@ -509,7 +567,6 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// run the inference
|
||||
{
|
||||
|
||||
printf("Running whisper.cpp inference on %s\n", filename.c_str());
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user