ruby : add VAD support, migration to Ruby's newer API (#3197)
Some checks failed
Bindings Tests (Ruby) / ubuntu-22 (push) Has been cancelled
CI / determine-tag (push) Has been cancelled
CI / ubuntu-22 (linux/amd64) (push) Has been cancelled
CI / ubuntu-22 (linux/ppc64le) (push) Has been cancelled
CI / ubuntu-22-arm64 (linux/arm64) (push) Has been cancelled
CI / ubuntu-22-arm-v7 (linux/arm/v7) (push) Has been cancelled
CI / macOS-latest (generic/platform=iOS) (push) Has been cancelled
CI / macOS-latest (generic/platform=macOS) (push) Has been cancelled
CI / macOS-latest (generic/platform=tvOS) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/amd64, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/amd64, Release) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/ppc64le, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc (linux/ppc64le, Release) (push) Has been cancelled
CI / ubuntu-22-gcc-arm64 (linux/arm64, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc-arm64 (linux/arm64, Release) (push) Has been cancelled
CI / ubuntu-22-gcc-arm-v7 (linux/arm/v7, Debug) (push) Has been cancelled
CI / ubuntu-22-gcc-arm-v7 (linux/arm/v7, Release) (push) Has been cancelled
CI / ubuntu-22-clang (linux/amd64, Debug) (push) Has been cancelled
CI / ubuntu-22-clang (linux/amd64, Release) (push) Has been cancelled
CI / ubuntu-22-clang (linux/arm64, Debug) (push) Has been cancelled
CI / ubuntu-22-clang (linux/arm64, Release) (push) Has been cancelled
CI / ubuntu-22-clang (linux/ppc64le, Debug) (push) Has been cancelled
CI / ubuntu-22-clang (linux/ppc64le, Release) (push) Has been cancelled
CI / ubuntu-22-gcc-sanitized (linux/amd64, ADDRESS) (push) Has been cancelled
CI / ubuntu-22-gcc-sanitized (linux/amd64, THREAD) (push) Has been cancelled
CI / ubuntu-22-gcc-sanitized (linux/amd64, UNDEFINED) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/amd64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/arm/v7, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/arm64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/ppc64le, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/amd64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm/v7, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/ppc64le, icx, icpx, ON) (push) Has been cancelled
CI / windows-msys2 (Release, clang-x86_64, CLANG64) (push) Has been cancelled
CI / windows-msys2 (Release, ucrt-x86_64, UCRT64) (push) Has been cancelled
CI / windows (Win32, Release, win32-x86, x86, 2.28.5, ON) (push) Has been cancelled
CI / windows (x64, Release, win32-x86-64, x64, 2.28.5, ON) (push) Has been cancelled
CI / windows-blas (Win32, ON, x86, 0.3.29, Release, x86, 2.28.5, ON) (push) Has been cancelled
CI / windows-blas (x64, ON, x64_64, 0.3.29, Release, x64, 2.28.5, ON) (push) Has been cancelled
CI / windows-cublas (x64, Release, ON, 11.8.0, ON, 2.28.5) (push) Has been cancelled
CI / windows-cublas (x64, Release, ON, 12.2.0, ON, 2.28.5) (push) Has been cancelled
CI / emscripten (Release) (push) Has been cancelled
CI / android (push) Has been cancelled
CI / android_java (push) Has been cancelled
CI / quantize (push) Has been cancelled
CI / vad (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main-musa.Dockerfile platform:linux/amd64 tag:main-musa]) (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main.Dockerfile platform:linux/amd64 tag:main]) (push) Has been cancelled
Examples WASM / deploy-wasm-github-pages (push) Has been cancelled
CI / ios-xcode-build (Release) (push) Has been cancelled
CI / bindings-java (push) Has been cancelled
CI / release (push) Has been cancelled
CI / coreml-base-en (push) Has been cancelled

* Add VAD models

* Extract function to normalize model path from ruby_whisper_initialize()

* Define ruby_whisper_vad_params struct

* Add VAD-related features to Whisper::Params

* Add tests for VAD-related features

* Define Whisper::VADParams

* Add Whisper::VAD::Params attributes

* Add test suite for VAD::Params

* Make older test to follow namespace change

* Add test for transcription with VAD

* Add assertion for test_vad_params

* Add signatures for VAD-related methods

* Define VAD::Params#==

* Add test for VAD::Params#==

* Fix Params#vad_params

* Add test for Params#vad_params

* Fix signature of Params#vad_params

* Use macro to define VAD::Params params

* Define VAD::Params#initialize

* Add tests for VAD::Params#initialize

* Add signature for VAD::Params.new

* Add documentation on VAD in README

* Wrap register_callbask in prepare_transcription for clear meanings

* Set whisper_params.vad_params just before transcription

* Don't touch NULL

* Define ruby_whisper_params_type

* Use TypedData_XXX for ruby_whisper_params instead of Data_XXX

* Remove unused functions

* Define rb_whisper_model_data_type

* Use TypedData_XXX for ruby_whisper_model instead of Data_XXX

* Define ruby_whisper_segment_type

* Use TypedData_XXX for ruby_whisper_segment instead of Data_XXX

* Define ruby_whisper_type

* Use TypedData_XXX for ruby_whisper instead of Data_XXX

* Qualify with const
This commit is contained in:
KITAITI Makoto
2025-05-28 20:05:12 +09:00
committed by GitHub
parent 5720426d97
commit 1f5fdbecb4
14 changed files with 925 additions and 173 deletions

View File

@ -1,22 +1,44 @@
#include <ruby.h>
#include "ruby_whisper.h"
extern const rb_data_type_t ruby_whisper_type;
extern VALUE cModel;
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
rb_gc_mark(rwm->context);
static void rb_whisper_model_mark(void *p) {
ruby_whisper_model *rwm = (ruby_whisper_model *)p;
if (rwm->context) {
rb_gc_mark(rwm->context);
}
}
static size_t
ruby_whisper_model_memsize(const void *p)
{
const ruby_whisper_model *rwm = (const ruby_whisper_model *)p;
size_t size = sizeof(rwm);
if (!rwm) {
return 0;
}
return size;
}
static const rb_data_type_t rb_whisper_model_type = {
"ruby_whisper_model",
{rb_whisper_model_mark, RUBY_DEFAULT_FREE, ruby_whisper_model_memsize,},
0, 0,
0
};
static VALUE ruby_whisper_model_allocate(VALUE klass) {
ruby_whisper_model *rwm;
rwm = ALLOC(ruby_whisper_model);
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
}
VALUE rb_whisper_model_initialize(VALUE context) {
ruby_whisper_model *rwm;
const VALUE model = ruby_whisper_model_allocate(cModel);
Data_Get_Struct(model, ruby_whisper_model, rwm);
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
rwm->context = context;
return model;
};
@ -29,9 +51,9 @@ static VALUE
ruby_whisper_model_n_vocab(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_vocab(rw->context));
}
@ -43,9 +65,9 @@ static VALUE
ruby_whisper_model_n_audio_ctx(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
}
@ -57,9 +79,9 @@ static VALUE
ruby_whisper_model_n_audio_state(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_audio_state(rw->context));
}
@ -71,9 +93,9 @@ static VALUE
ruby_whisper_model_n_audio_head(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_audio_head(rw->context));
}
@ -85,9 +107,9 @@ static VALUE
ruby_whisper_model_n_audio_layer(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_audio_layer(rw->context));
}
@ -99,9 +121,9 @@ static VALUE
ruby_whisper_model_n_text_ctx(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_text_ctx(rw->context));
}
@ -113,9 +135,9 @@ static VALUE
ruby_whisper_model_n_text_state(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_text_state(rw->context));
}
@ -127,9 +149,9 @@ static VALUE
ruby_whisper_model_n_text_head(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_text_head(rw->context));
}
@ -141,9 +163,9 @@ static VALUE
ruby_whisper_model_n_text_layer(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_text_layer(rw->context));
}
@ -155,9 +177,9 @@ static VALUE
ruby_whisper_model_n_mels(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_n_mels(rw->context));
}
@ -169,9 +191,9 @@ static VALUE
ruby_whisper_model_ftype(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return INT2NUM(whisper_model_ftype(rw->context));
}
@ -183,9 +205,9 @@ static VALUE
ruby_whisper_model_type(VALUE self)
{
ruby_whisper_model *rwm;
Data_Get_Struct(self, ruby_whisper_model, rwm);
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
ruby_whisper *rw;
Data_Get_Struct(rwm->context, ruby_whisper, rw);
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
return rb_str_new2(whisper_model_type_readable(rw->context));
}