mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-06 09:11:33 +00:00
Some checks failed
Bindings Tests (Ruby) / ubuntu-22 (push) Has been cancelled
CI / determine-tag (push) Has been cancelled
CI / ubuntu-22 (linux/amd64) (push) Has been cancelled
CI / ubuntu-22 (linux/ppc64le) (push) Has been cancelled
CI / ubuntu-22-arm64 (linux/arm64) (push) Has been cancelled
CI / ubuntu-22-arm-v7 (linux/arm/v7) (push) Has been cancelled
CI / macOS-latest (generic/platform=iOS) (push) Has been cancelled
CI / macOS-latest (generic/platform=macOS) (push) Has been cancelled
CI / macOS-latest (generic/platform=tvOS) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/amd64, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/amd64, Release) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/ppc64le, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/ppc64le, Release) (push) Has been cancelled
CI / ubuntu-22-gcc-arm64 (linux/arm64, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc-arm64 (linux/arm64, Release) (push) Has been cancelled
CI / ubuntu-22-gcc-arm-v7 (linux/arm/v7, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc-arm-v7 (linux/arm/v7, Release) (push) Has been cancelled
CI / ubuntu-22-clang (linux/amd64, Debug) (push) Has been cancelled
CI / ubuntu-22-clang (linux/amd64, Release) (push) Has been cancelled
CI / ubuntu-22-clang (linux/arm64, Debug) (push) Has been cancelled
CI / ubuntu-22-clang (linux/arm64, Release) (push) Has been cancelled
CI / ubuntu-22-clang (linux/ppc64le, Debug) (push) Has been cancelled
CI / ubuntu-22-clang (linux/ppc64le, Release) (push) Has been cancelled
CI / ubuntu-22-gcc-sanitized (linux/amd64, ADDRESS) (push) Has been cancelled
CI / ubuntu-22-gcc-sanitized (linux/amd64, THREAD) (push) Has been cancelled
CI / ubuntu-22-gcc-sanitized (linux/amd64, UNDEFINED) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/amd64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/arm/v7, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/arm64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/ppc64le, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/amd64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm/v7, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/ppc64le, icx, icpx, ON) (push) Has been cancelled
CI / windows-msys2 (Release, clang-x86_64, CLANG64) (push) Has been cancelled
CI / windows-msys2 (Release, ucrt-x86_64, UCRT64) (push) Has been cancelled
CI / windows (Win32, Release, win32-x86, x86, 2.28.5, ON) (push) Has been cancelled
CI / windows (x64, Release, win32-x86-64, x64, 2.28.5, ON) (push) Has been cancelled
CI / windows-blas (Win32, ON, x86, 0.3.29, Release, x86, 2.28.5, ON) (push) Has been cancelled
CI / windows-blas (x64, ON, x64_64, 0.3.29, Release, x64, 2.28.5, ON) (push) Has been cancelled
CI / windows-cublas (x64, Release, ON, 11.8.0, ON, 2.28.5) (push) Has been cancelled
CI / windows-cublas (x64, Release, ON, 12.2.0, ON, 2.28.5) (push) Has been cancelled
CI / emscripten (Release) (push) Has been cancelled
CI / android (push) Has been cancelled
CI / android_java (push) Has been cancelled
CI / quantize (push) Has been cancelled
CI / vad (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main-musa.Dockerfile platform:linux/amd64 tag:main-musa]) (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main.Dockerfile platform:linux/amd64 tag:main]) (push) Has been cancelled
Examples WASM / deploy-wasm-github-pages (push) Has been cancelled
CI / ios-xcode-build (Release) (push) Has been cancelled
CI / bindings-java (push) Has been cancelled
CI / release (push) Has been cancelled
CI / coreml-base-en (push) Has been cancelled
* Add VAD models * Extract function to normalize model path from ruby_whisper_initialize() * Define ruby_whisper_vad_params struct * Add VAD-related features to Whisper::Params * Add tests for VAD-related features * Define Whisper::VADParams * Add Whisper::VAD::Params attributes * Add test suite for VAD::Params * Make older test to follow namespace change * Add test for transcription with VAD * Add assertion for test_vad_params * Add signatures for VAD-related methods * Define VAD::Params#== * Add test for VAD::Params#== * Fix Params#vad_params * Add test for Params#vad_params * Fix signature of Params#vad_params * Use macro to define VAD::Params params * Define VAD::Params#initialize * Add tests for VAD::Params#initialize * Add signature for VAD::Params.new * Add documentation on VAD in README * Wrap register_callbask in prepare_transcription for clear meanings * Set whisper_params.vad_params just before transcription * Don't touch NULL * Define ruby_whisper_params_type * Use TypedData_XXX for ruby_whisper_params instead of Data_XXX * Remove unused functions * Define rb_whisper_model_data_type * Use TypedData_XXX for ruby_whisper_model instead of Data_XXX * Define ruby_whisper_segment_type * Use TypedData_XXX for ruby_whisper_segment instead of Data_XXX * Define ruby_whisper_type * Use TypedData_XXX for ruby_whisper instead of Data_XXX * Qualify with const
144 lines
4.1 KiB
C
144 lines
4.1 KiB
C
#include <ruby.h>
|
|
#include "ruby_whisper.h"
|
|
|
|
extern const rb_data_type_t ruby_whisper_type;
|
|
|
|
extern VALUE cSegment;
|
|
|
|
static void
|
|
rb_whisper_segment_mark(void *p)
|
|
{
|
|
ruby_whisper_segment *rws = (ruby_whisper_segment *)p;
|
|
rb_gc_mark(rws->context);
|
|
}
|
|
|
|
static size_t
|
|
ruby_whisper_segment_memsize(const void *p)
|
|
{
|
|
const ruby_whisper_segment *rws = (const ruby_whisper_segment *)p;
|
|
size_t size = sizeof(rws);
|
|
if (!rws) {
|
|
return 0;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
static const rb_data_type_t ruby_whisper_segment_type = {
|
|
"ruby_whisper_segment",
|
|
{rb_whisper_segment_mark, RUBY_DEFAULT_FREE, ruby_whisper_segment_memsize,},
|
|
0, 0,
|
|
0
|
|
};
|
|
|
|
VALUE
|
|
ruby_whisper_segment_allocate(VALUE klass)
|
|
{
|
|
ruby_whisper_segment *rws;
|
|
return TypedData_Make_Struct(klass, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
}
|
|
|
|
VALUE
|
|
rb_whisper_segment_initialize(VALUE context, int index)
|
|
{
|
|
ruby_whisper_segment *rws;
|
|
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
|
TypedData_Get_Struct(segment, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
rws->context = context;
|
|
rws->index = index;
|
|
return segment;
|
|
};
|
|
|
|
/*
|
|
* Start time in milliseconds.
|
|
*
|
|
* call-seq:
|
|
* start_time -> Integer
|
|
*/
|
|
static VALUE
|
|
ruby_whisper_segment_get_start_time(VALUE self)
|
|
{
|
|
ruby_whisper_segment *rws;
|
|
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
ruby_whisper *rw;
|
|
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
|
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
|
return INT2NUM(t0 * 10);
|
|
}
|
|
|
|
/*
|
|
* End time in milliseconds.
|
|
*
|
|
* call-seq:
|
|
* end_time -> Integer
|
|
*/
|
|
static VALUE
|
|
ruby_whisper_segment_get_end_time(VALUE self)
|
|
{
|
|
ruby_whisper_segment *rws;
|
|
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
ruby_whisper *rw;
|
|
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
|
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
|
return INT2NUM(t1 * 10);
|
|
}
|
|
|
|
/*
|
|
* Whether the next segment is predicted as a speaker turn.
|
|
*
|
|
* call-seq:
|
|
* speaker_turn_next? -> bool
|
|
*/
|
|
static VALUE
|
|
ruby_whisper_segment_get_speaker_turn_next(VALUE self)
|
|
{
|
|
ruby_whisper_segment *rws;
|
|
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
ruby_whisper *rw;
|
|
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse;
|
|
}
|
|
|
|
/*
|
|
* call-seq:
|
|
* text -> String
|
|
*/
|
|
static VALUE
|
|
ruby_whisper_segment_get_text(VALUE self)
|
|
{
|
|
ruby_whisper_segment *rws;
|
|
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
ruby_whisper *rw;
|
|
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
const char * text = whisper_full_get_segment_text(rw->context, rws->index);
|
|
return rb_str_new2(text);
|
|
}
|
|
|
|
/*
|
|
* call-seq:
|
|
* no_speech_prob -> Float
|
|
*/
|
|
static VALUE
|
|
ruby_whisper_segment_get_no_speech_prob(VALUE self)
|
|
{
|
|
ruby_whisper_segment *rws;
|
|
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
ruby_whisper *rw;
|
|
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
return DBL2NUM(whisper_full_get_segment_no_speech_prob(rw->context, rws->index));
|
|
}
|
|
|
|
void
|
|
init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cContext)
|
|
{
|
|
cSegment = rb_define_class_under(*mWhisper, "Segment", rb_cObject);
|
|
|
|
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
|
|
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
|
|
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
|
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
|
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
|
rb_define_method(cSegment, "no_speech_prob", ruby_whisper_segment_get_no_speech_prob, 0);
|
|
}
|