Compare commits

..

4 Commits

Author SHA1 Message Date
c8b3bc6a0d cuda : use CUBLAS_COMPTE_F32 insted of CUBLAS_COMPUTE_F16 2023-11-27 11:57:07 +02:00
f52e74d4dc CI : Rectify the Clang-Related workflow issues (#1551)
* fix bugs in workflow

* fix missing clang in workflow

* Update build.yml
2023-11-27 11:35:37 +02:00
23c21e92eb server : automatically convert audio on the server (#1539)
* server : automatically convert audio on the server

* server : remove rebundant comments

* server : automatic conversion refactor

* server : update server readme

* server : remove unnecessary comments and tabs

* server : put back remove calling

* server : apply suggestions from code review

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* server : check ffmpeg before the server lunch

* server : fix indentation

* Apply suggestions from code review

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* server : fix function typo calling

* server : fix function typo calling

* server : add warning in readme

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2023-11-27 11:28:34 +02:00
447d49530c whisper : remove trailing whitespaces 2023-11-24 13:13:21 +02:00
5 changed files with 89 additions and 30 deletions

View File

@ -25,6 +25,7 @@ jobs:
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential libsdl2-dev
make
@ -86,6 +87,7 @@ jobs:
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential cmake libsdl2-dev
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
@ -113,8 +115,10 @@ jobs:
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential cmake libsdl2-dev
apt install -y clang
apt install -y clang build-essential cmake libsdl2-dev
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
make
ctest -L gh --output-on-failure'
@ -140,6 +144,7 @@ jobs:
docker run --platform ${{ matrix.arch }} --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
set -e
apt update
apt install -y build-essential cmake
cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
@ -217,10 +222,10 @@ jobs:
sdl2: [ON]
include:
- arch: Win32
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x86.zip
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
s2arc: x86
- arch: x64
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x64.zip
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
s2arc: x64
- sdl2: ON
s2ver: 2.26.0

View File

@ -43,8 +43,12 @@ options:
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
--host HOST, [127.0.0.1] Hostname/ip-adress for the server
--port PORT, [8080 ] Port number for the server
--convert, [false ] Convert audio to WAV, requires ffmpeg on the server
```
> [!WARNING]
> **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
## request examples
**/inference**

View File

@ -43,6 +43,8 @@ struct server_params
int32_t port = 8080;
int32_t read_timeout = 600;
int32_t write_timeout = 600;
bool ffmpeg_converter = false;
};
struct whisper_params {
@ -157,6 +159,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " --host HOST, [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
fprintf(stderr, "\n");
}
@ -203,6 +206,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params, sparams);
@ -220,6 +224,45 @@ struct whisper_print_user_data {
int progress_prev;
};
void check_ffmpeg_availibility() {
int result = system("ffmpeg -version");
if (result == 0) {
std::cout << "ffmpeg is available." << std::endl;
} else {
// ffmpeg is not available
std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
std::cout << "and that its executable is included in your system's PATH. ";
exit(0);
}
}
bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
std::ostringstream cmd_stream;
std::string converted_filename_temp = temp_filename + "_temp.wav";
cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
std::string cmd = cmd_stream.str();
int status = std::system(cmd.c_str());
if (status != 0) {
error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
return false;
}
// Remove the original file
if (remove(temp_filename.c_str()) != 0) {
error_resp = "{\"error\":\"Failed to remove the original file.\"}";
return false;
}
// Rename the temporary file to match the original filename
if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
return false;
}
return true;
}
std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
std::string speaker = "";
const int64_t n_samples = pcmf32s[0].size();
@ -407,6 +450,9 @@ int main(int argc, char ** argv) {
exit(0);
}
if (sparams.ffmpeg_converter) {
check_ffmpeg_availibility();
}
// whisper init
struct whisper_context_params cparams;
cparams.use_gpu = params.use_gpu;
@ -462,6 +508,18 @@ int main(int argc, char ** argv) {
temp_file << audio_file.content;
temp_file.close();
// if file is not wav, convert to wav
if (sparams.ffmpeg_converter) {
std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
const bool is_converted = convert_to_wav(temp_filename, error_resp);
if (!is_converted) {
res.set_content(error_resp, "application/json");
whisper_mutex.unlock();
return;
}
}
// read wav content into pcmf32
if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
@ -509,7 +567,6 @@ int main(int argc, char ** argv) {
// run the inference
{
printf("Running whisper.cpp inference on %s\n", filename.c_str());
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);

View File

@ -7432,7 +7432,7 @@ static void ggml_cuda_mul_mat_vec_nc(const ggml_tensor * src0, const ggml_tensor
}
__global__ static void k_compute_batched_ptrs(
const half * src0_as_f16, const half * src1_as_f16, half * dst_f16,
const half * src0_as_f16, const half * src1_as_f16, float * dst_f32,
const void ** ptrs_src, void ** ptrs_dst,
int ne12, int ne13,
int ne23,
@ -7452,7 +7452,7 @@ __global__ static void k_compute_batched_ptrs(
ptrs_src[0*ne23 + i12 + i13*ne12] = (const char *) src0_as_f16 + i02*nb02 + i03*nb03;
ptrs_src[1*ne23 + i12 + i13*ne12] = (const char *) src1_as_f16 + i12*nb12/2 + i13*nb13/2;
ptrs_dst[0*ne23 + i12 + i13*ne12] = ( char *) dst_f16 + i12* nb2/2 + i13* nb3/2;
ptrs_dst[0*ne23 + i12 + i13*ne12] = ( char *) dst_f32 + i12* nb2 + i13* nb3 ;
}
static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@ -7509,9 +7509,6 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
half * src1_as_f16 = (half *) ggml_cuda_pool_malloc(ne1 * sizeof(half), &src1_as);
to_fp16_cuda(src1_ddf, src1_as_f16, ne1, main_stream);
size_t dst_as = 0;
half * dst_f16 = (half *) ggml_cuda_pool_malloc(ne * sizeof(half), &dst_as);
GGML_ASSERT(ne12 % ne02 == 0);
GGML_ASSERT(ne13 % ne03 == 0);
@ -7519,8 +7516,8 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
const int64_t r2 = ne12/ne02;
const int64_t r3 = ne13/ne03;
const half alpha_f16 = 1.0f;
const half beta_f16 = 0.0f;
const float alpha = 1.0f;
const float beta = 0.0f;
#if 0
// use cublasGemmEx
@ -7533,10 +7530,10 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
CUBLAS_CHECK(
cublasGemmEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N,
ne01, ne11, ne10,
&alpha_f16, (const char *) src0_as_f16 + i02*src0->nb[2] + i03*src0->nb[3] , CUDA_R_16F, nb01/sizeof(half),
(const char *) src1_as_f16 + i12*src1->nb[2]/2 + i13*src1->nb[3]/2, CUDA_R_16F, nb11/sizeof(float),
&beta_f16, ( char *) dst_f16 + i12* dst->nb[2]/2 + i13* dst->nb[3]/2, CUDA_R_16F, ne01,
CUBLAS_COMPUTE_16F,
&alpha, (const char *) src0_as_f16 + i02*src0->nb[2] + i03*src0->nb[3] , CUDA_R_16F, nb01/sizeof(half),
(const char *) src1_as_f16 + i12*src1->nb[2]/2 + i13*src1->nb[3]/2, CUDA_R_16F, nb11/sizeof(float),
&beta, ( char *) dst_ddf + i12* dst->nb[2] + i13* dst->nb[3] , CUDA_R_32F, ne01,
CUBLAS_COMPUTE_32F,
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
}
}
@ -7548,11 +7545,11 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
CUBLAS_CHECK(
cublasGemmStridedBatchedEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N,
ne01, ne11, ne10,
&alpha_f16, (const char *) src0_as_f16, CUDA_R_16F, nb01/sizeof(half), src0->nb[2]/sizeof(half), // strideA
(const char *) src1_as_f16, CUDA_R_16F, nb11/sizeof(float), src1->nb[2]/sizeof(float), // strideB
&beta_f16, ( char *) dst_f16, CUDA_R_16F, ne01, dst->nb[2]/sizeof(float), // strideC
&alpha, (const char *) src0_as_f16, CUDA_R_16F, nb01/sizeof(half), src0->nb[2]/sizeof(half), // strideA
(const char *) src1_as_f16, CUDA_R_16F, nb11/sizeof(float), src1->nb[2]/sizeof(float), // strideB
&beta, ( char *) dst_ddf, CUDA_R_32F, ne01, dst->nb[2]/sizeof(float), // strideC
ne12*ne13,
CUBLAS_COMPUTE_16F,
CUBLAS_COMPUTE_32F,
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
} else {
// use cublasGemmBatchedEx
@ -7569,7 +7566,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
dim3 block_dims(ne13, ne12);
k_compute_batched_ptrs<<<1, block_dims, 0, main_stream>>>(
src0_as_f16, src1_as_f16, dst_f16,
src0_as_f16, src1_as_f16, dst_ddf,
ptrs_src, ptrs_dst,
ne12, ne13,
ne23,
@ -7582,11 +7579,11 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
CUBLAS_CHECK(
cublasGemmBatchedEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N,
ne01, ne11, ne10,
&alpha_f16, (const void **) (ptrs_src + 0*ne23), CUDA_R_16F, nb01/sizeof(half),
(const void **) (ptrs_src + 1*ne23), CUDA_R_16F, nb11/sizeof(float),
&beta_f16, ( void **) (ptrs_dst + 0*ne23), CUDA_R_16F, ne01,
&alpha, (const void **) (ptrs_src + 0*ne23), CUDA_R_16F, nb01/sizeof(half),
(const void **) (ptrs_src + 1*ne23), CUDA_R_16F, nb11/sizeof(float),
&beta, ( void **) (ptrs_dst + 0*ne23), CUDA_R_32F, ne01,
ne23,
CUBLAS_COMPUTE_16F,
CUBLAS_COMPUTE_32F,
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
if (ptrs_src_s != 0) {
@ -7598,11 +7595,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
}
#endif
const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(GGML_TYPE_F16);
to_fp32_cuda(dst_f16, dst_ddf, ne, main_stream);
ggml_cuda_pool_free(src1_as_f16, src1_as);
ggml_cuda_pool_free(dst_f16, dst_as);
}
static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {

View File

@ -51,7 +51,7 @@ extern "C" {
// ...
//
// whisper_context_params cparams = whisper_context_default_params();
//
//
// struct whisper_context * ctx = whisper_init_from_file_with_params("/path/to/ggml-base.en.bin", cparams);
//
// if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
@ -316,7 +316,7 @@ extern "C" {
WHISPER_API const char * whisper_lang_str(int id);
// Return the short string of the specified language name (e.g. 2 -> "german"), returns nullptr if not found
WHISPER_API const char * whisper_lang_str_full(int id);
WHISPER_API const char * whisper_lang_str_full(int id);
// Use mel data at offset_ms to try and auto-detect the spoken language
// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first