mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-02-17 15:50:24 +00:00
cmake : enable and fix -Wall -Wextra -Wpedantic C++ warnings
This commit is contained in:
parent
8e3f129b4d
commit
99da1e5cc8
@ -132,6 +132,12 @@ if (WHISPER_ALL_WARNINGS)
|
|||||||
-Wstrict-prototypes \
|
-Wstrict-prototypes \
|
||||||
-Wpointer-arith \
|
-Wpointer-arith \
|
||||||
")
|
")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
|
||||||
|
-Wall \
|
||||||
|
-Wextra \
|
||||||
|
-Wpedantic \
|
||||||
|
-Wcast-qual \
|
||||||
|
")
|
||||||
else()
|
else()
|
||||||
# todo : msvc
|
# todo : msvc
|
||||||
endif()
|
endif()
|
||||||
|
@ -33,7 +33,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
@ -81,7 +81,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
@ -387,7 +387,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
|
|||||||
float energy_all = 0.0f;
|
float energy_all = 0.0f;
|
||||||
float energy_last = 0.0f;
|
float energy_last = 0.0f;
|
||||||
|
|
||||||
for (size_t i = 0; i < n_samples; i++) {
|
for (int i = 0; i < n_samples; i++) {
|
||||||
energy_all += fabsf(pcmf32[i]);
|
energy_all += fabsf(pcmf32[i]);
|
||||||
|
|
||||||
if (i >= n_samples - n_samples_last) {
|
if (i >= n_samples - n_samples_last) {
|
||||||
@ -594,7 +594,7 @@ int main(int argc, char ** argv) {
|
|||||||
whisper_token tokens[1024];
|
whisper_token tokens[1024];
|
||||||
allowed_tokens.emplace_back();
|
allowed_tokens.emplace_back();
|
||||||
|
|
||||||
for (int l = 0; l < cmd.size(); ++l) {
|
for (int l = 0; l < (int) cmd.size(); ++l) {
|
||||||
// NOTE: very important to add the whitespace !
|
// NOTE: very important to add the whitespace !
|
||||||
// the reason is that the first decoded token starts with a whitespace too!
|
// the reason is that the first decoded token starts with a whitespace too!
|
||||||
std::string ss = std::string(" ") + cmd.substr(0, l + 1);
|
std::string ss = std::string(" ") + cmd.substr(0, l + 1);
|
||||||
@ -843,15 +843,15 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// best command
|
// best command
|
||||||
{
|
{
|
||||||
|
const auto t_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
|
fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
|
||||||
"\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first,
|
"\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first,
|
||||||
(int) std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t_start).count());
|
(int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto t_end = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
audio.clear();
|
audio.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -129,7 +129,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
|
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
@ -328,7 +328,7 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
|
|||||||
// karaoke video generation
|
// karaoke video generation
|
||||||
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
||||||
// TODO: font parameter adjustments
|
// TODO: font parameter adjustments
|
||||||
bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec) {
|
bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & /*params*/, float t_sec) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
|
|
||||||
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
||||||
@ -377,7 +377,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
|
|||||||
txt_ul = "\\ \\ ";
|
txt_ul = "\\ \\ ";
|
||||||
|
|
||||||
{
|
{
|
||||||
int ncnt = 0;
|
|
||||||
for (int k = 0; k < n; ++k) {
|
for (int k = 0; k < n; ++k) {
|
||||||
const auto & token2 = tokens[k];
|
const auto & token2 = tokens[k];
|
||||||
|
|
||||||
@ -401,8 +400,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
|
|||||||
txt_ul += "\\ ";
|
txt_ul += "\\ ";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ncnt += txt.size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
::replace_all(txt_bg, "'", "\u2019");
|
::replace_all(txt_bg, "'", "\u2019");
|
||||||
@ -637,7 +634,7 @@ int main(int argc, char ** argv) {
|
|||||||
{
|
{
|
||||||
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||||
|
|
||||||
wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) {
|
wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
|
||||||
bool is_aborted = *(bool*)user_data;
|
bool is_aborted = *(bool*)user_data;
|
||||||
return !is_aborted;
|
return !is_aborted;
|
||||||
};
|
};
|
||||||
|
@ -90,7 +90,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
@ -391,7 +391,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
|
|||||||
float energy_all = 0.0f;
|
float energy_all = 0.0f;
|
||||||
float energy_last = 0.0f;
|
float energy_last = 0.0f;
|
||||||
|
|
||||||
for (size_t i = 0; i < n_samples; i++) {
|
for (int i = 0; i < n_samples; i++) {
|
||||||
energy_all += fabsf(pcmf32[i]);
|
energy_all += fabsf(pcmf32[i]);
|
||||||
|
|
||||||
if (i >= n_samples - n_samples_last) {
|
if (i >= n_samples - n_samples_last) {
|
||||||
|
@ -78,7 +78,7 @@ gpt_vocab::id gpt_sample_top_k_top_p(
|
|||||||
const float * logits,
|
const float * logits,
|
||||||
int top_k,
|
int top_k,
|
||||||
double top_p,
|
double top_p,
|
||||||
double temp,
|
double /*temp*/,
|
||||||
std::mt19937 & rng) {
|
std::mt19937 & rng) {
|
||||||
int n_logits = vocab.id_to_token.size();
|
int n_logits = vocab.id_to_token.size();
|
||||||
|
|
||||||
@ -268,7 +268,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
|
|||||||
fin.read((char *) &len, sizeof(len));
|
fin.read((char *) &len, sizeof(len));
|
||||||
|
|
||||||
word.resize(len);
|
word.resize(len);
|
||||||
fin.read((char *) word.data(), len);
|
fin.read((char *) &word[0], len);
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
vocab.id_to_token[i] = word;
|
||||||
@ -884,7 +884,7 @@ std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens)
|
|||||||
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
|
for (int i = embd.size(); i < (int) embd_inp.size() + n_predict; i++) {
|
||||||
// predict
|
// predict
|
||||||
if (embd.size() > 0) {
|
if (embd.size() > 0) {
|
||||||
if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
|
if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
|
||||||
|
@ -79,7 +79,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
@ -397,7 +397,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
|
|||||||
float energy_all = 0.0f;
|
float energy_all = 0.0f;
|
||||||
float energy_last = 0.0f;
|
float energy_last = 0.0f;
|
||||||
|
|
||||||
for (size_t i = 0; i < n_samples; i++) {
|
for (int i = 0; i < n_samples; i++) {
|
||||||
energy_all += fabsf(pcmf32[i]);
|
energy_all += fabsf(pcmf32[i]);
|
||||||
|
|
||||||
if (i >= n_samples - n_samples_last) {
|
if (i >= n_samples - n_samples_last) {
|
||||||
@ -541,7 +541,6 @@ int main(int argc, char ** argv) {
|
|||||||
bool force_speak = false;
|
bool force_speak = false;
|
||||||
|
|
||||||
float prob0 = 0.0f;
|
float prob0 = 0.0f;
|
||||||
float prob = 0.0f;
|
|
||||||
|
|
||||||
std::vector<float> pcmf32_cur;
|
std::vector<float> pcmf32_cur;
|
||||||
std::vector<float> pcmf32_prompt;
|
std::vector<float> pcmf32_prompt;
|
||||||
|
@ -621,7 +621,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
|
const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
|
||||||
|
|
||||||
size_t ctx_size = 0;
|
size_t ctx_size = 0;
|
||||||
size_t ctx_mem_size = 0;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
const auto & hparams = model.hparams;
|
const auto & hparams = model.hparams;
|
||||||
@ -730,12 +729,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
|
ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k
|
|
||||||
ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v
|
|
||||||
|
|
||||||
ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k
|
|
||||||
ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v
|
|
||||||
|
|
||||||
ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
|
ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
|
||||||
|
|
||||||
fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
|
fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
|
||||||
@ -2043,7 +2036,7 @@ static void fft(const std::vector<float> & in, std::vector<float> & out) {
|
|||||||
static bool log_mel_spectrogram(
|
static bool log_mel_spectrogram(
|
||||||
const float * samples,
|
const float * samples,
|
||||||
const int n_samples,
|
const int n_samples,
|
||||||
const int sample_rate,
|
const int /*sample_rate*/,
|
||||||
const int fft_size,
|
const int fft_size,
|
||||||
const int fft_step,
|
const int fft_step,
|
||||||
const int n_mel,
|
const int n_mel,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user