mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-20 05:07:52 +00:00
Merge branch 'ggerganov:master' into Reset_and_infinite_inference
This commit is contained in:
commit
b87d6691f5
@ -18,16 +18,17 @@ let package = Package(
|
||||
name: "whisper",
|
||||
path: ".",
|
||||
exclude: [
|
||||
"build",
|
||||
"bindings",
|
||||
"cmake",
|
||||
"coreml",
|
||||
"examples",
|
||||
"extra",
|
||||
"scripts",
|
||||
"models",
|
||||
"samples",
|
||||
"tests",
|
||||
"CMakeLists.txt",
|
||||
"Makefile"
|
||||
"Makefile",
|
||||
"ggml/src/ggml-metal-embed.metal"
|
||||
],
|
||||
sources: [
|
||||
"ggml/src/ggml.c",
|
||||
@ -38,7 +39,7 @@ let package = Package(
|
||||
"ggml/src/ggml-quants.c",
|
||||
"ggml/src/ggml-metal.m"
|
||||
],
|
||||
resources: [.process("ggml-metal.metal")],
|
||||
resources: [.process("ggml/src/ggml-metal.metal")],
|
||||
publicHeadersPath: "spm-headers",
|
||||
cSettings: [
|
||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
||||
|
@ -31,7 +31,7 @@ params.duration = 60_000
|
||||
params.max_text_tokens = 300
|
||||
params.translate = true
|
||||
params.print_timestamps = false
|
||||
params.prompt = "Initial prompt here."
|
||||
params.initial_prompt = "Initial prompt here."
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
|
||||
puts whole_text
|
||||
@ -107,5 +107,63 @@ whisper.transcribe("path/to/audio.wav", params)
|
||||
|
||||
```
|
||||
|
||||
You can see model information:
|
||||
|
||||
```ruby
|
||||
whisper = Whisper::Context.new("path/to/model.bin")
|
||||
model = whisper.model
|
||||
|
||||
model.n_vocab # => 51864
|
||||
model.n_audio_ctx # => 1500
|
||||
model.n_audio_state # => 512
|
||||
model.n_audio_head # => 8
|
||||
model.n_audio_layer # => 6
|
||||
model.n_text_ctx # => 448
|
||||
model.n_text_state # => 512
|
||||
model.n_text_head # => 8
|
||||
model.n_text_layer # => 6
|
||||
model.n_mels # => 80
|
||||
model.ftype # => 1
|
||||
model.type # => "base"
|
||||
|
||||
```
|
||||
|
||||
You can set log callback:
|
||||
|
||||
```ruby
|
||||
prefix = "[MyApp] "
|
||||
log_callback = ->(level, buffer, user_data) {
|
||||
case level
|
||||
when Whisper::LOG_LEVEL_NONE
|
||||
puts "#{user_data}none: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_INFO
|
||||
puts "#{user_data}info: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_WARN
|
||||
puts "#{user_data}warn: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_ERROR
|
||||
puts "#{user_data}error: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_DEBUG
|
||||
puts "#{user_data}debug: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_CONT
|
||||
puts "#{user_data}same to previous: #{buffer}"
|
||||
end
|
||||
}
|
||||
Whisper.log_set log_callback, prefix
|
||||
```
|
||||
|
||||
Using this feature, you are also able to suppress log:
|
||||
|
||||
```ruby
|
||||
Whisper.log_set ->(level, buffer, user_data) {
|
||||
# do nothing
|
||||
}, nil
|
||||
Whisper::Context.new(MODEL)
|
||||
```
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
The same to [whisper.cpp][].
|
||||
|
||||
[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
|
||||
[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models
|
||||
|
@ -23,8 +23,8 @@ CLEAN.include FileList[
|
||||
"ext/depend"
|
||||
]
|
||||
|
||||
task build: SOURCES + FileList[
|
||||
"ext/extconf.rb",
|
||||
task build: FileList[
|
||||
"ext/Makefile",
|
||||
"ext/ruby_whisper.h",
|
||||
"ext/ruby_whisper.cpp",
|
||||
"whispercpp.gemspec",
|
||||
@ -35,18 +35,27 @@ CLOBBER.include "pkg"
|
||||
|
||||
TEST_MODEL = "../../models/ggml-base.en.bin"
|
||||
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
||||
SO_FILE = File.join("ext", LIB_NAME)
|
||||
LIB_FILE = File.join("lib", LIB_NAME)
|
||||
|
||||
directory "lib"
|
||||
task LIB_FILE => SOURCES + ["lib"] do |t|
|
||||
file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
|
||||
Dir.chdir "ext" do
|
||||
ruby "extconf.rb"
|
||||
end
|
||||
end
|
||||
|
||||
file SO_FILE => "ext/Makefile" do |t|
|
||||
Dir.chdir "ext" do
|
||||
sh "ruby extconf.rb"
|
||||
sh "make"
|
||||
end
|
||||
mv "ext/#{LIB_NAME}", t.name
|
||||
end
|
||||
CLEAN.include LIB_FILE
|
||||
|
||||
directory "lib"
|
||||
file LIB_FILE => [SO_FILE, "lib"] do |t|
|
||||
copy t.source, t.name
|
||||
end
|
||||
|
||||
Rake::TestTask.new do |t|
|
||||
t.test_files = FileList["tests/test_*.rb"]
|
||||
end
|
||||
|
@ -2,6 +2,9 @@ require 'mkmf'
|
||||
|
||||
# need to use c++ compiler flags
|
||||
$CXXFLAGS << ' -std=c++11'
|
||||
|
||||
$LDFLAGS << ' -lstdc++'
|
||||
|
||||
# Set to true when building binary gems
|
||||
if enable_config('static-stdlib', false)
|
||||
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
||||
@ -12,34 +15,6 @@ if enable_config('march-tune-native', false)
|
||||
$CXXFLAGS << ' -march=native -mtune=native'
|
||||
end
|
||||
|
||||
def with_disabling_unsupported_files
|
||||
disabled_files = []
|
||||
|
||||
unless $GGML_METAL
|
||||
disabled_files << 'ggml-metal.h' << 'ggml-metal.m'
|
||||
end
|
||||
|
||||
unless $GGML_METAL_EMBED_LIBRARY
|
||||
disabled_files << 'ggml-metal.metal'
|
||||
end
|
||||
|
||||
unless $OBJ_ALL&.include? 'ggml-blas.o'
|
||||
disabled_files << 'ggml-blas.h' << 'ggml-blas.cpp'
|
||||
end
|
||||
|
||||
disabled_files.filter! {|file| File.exist? file}
|
||||
|
||||
disabled_files.each do |file|
|
||||
File.rename file, "#{file}.disabled"
|
||||
end
|
||||
|
||||
yield
|
||||
|
||||
disabled_files.each do |file|
|
||||
File.rename "#{file}.disabled", file
|
||||
end
|
||||
end
|
||||
|
||||
if ENV['WHISPER_METAL']
|
||||
$GGML_METAL ||= true
|
||||
$DEPRECATE_WARNING ||= true
|
||||
@ -66,10 +41,10 @@ $MK_CXXFLAGS = '-std=c++11 -fPIC'
|
||||
$MK_NVCCFLAGS = '-std=c++11'
|
||||
$MK_LDFLAGS = ''
|
||||
|
||||
$OBJ_GGML = ''
|
||||
$OBJ_WHISPER = ''
|
||||
$OBJ_COMMON = ''
|
||||
$OBJ_SDL = ''
|
||||
$OBJ_GGML = []
|
||||
$OBJ_WHISPER = []
|
||||
$OBJ_COMMON = []
|
||||
$OBJ_SDL = []
|
||||
|
||||
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
|
||||
|
||||
@ -152,7 +127,7 @@ unless ENV['GGML_NO_ACCELERATE']
|
||||
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
|
||||
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
|
||||
$MK_LDFLAGS << ' -framework Accelerate'
|
||||
$OBJ_GGML << ' ggml-blas.o'
|
||||
$OBJ_GGML << 'ggml-blas.o'
|
||||
end
|
||||
end
|
||||
|
||||
@ -160,20 +135,20 @@ if ENV['GGML_OPENBLAS']
|
||||
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
|
||||
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
|
||||
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
|
||||
$OBJ_GGML << ' ggml-blas.o'
|
||||
$OBJ_GGML << 'ggml-blas.o'
|
||||
end
|
||||
|
||||
if ENV['GGML_OPENBLAS64']
|
||||
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
|
||||
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
|
||||
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
|
||||
$OBJ_GGML << ' ggml-blas.o'
|
||||
$OBJ_GGML << 'ggml-blas.o'
|
||||
end
|
||||
|
||||
if $GGML_METAL
|
||||
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
|
||||
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
||||
$OBJ_GGML << ' ggml-metal.o'
|
||||
$OBJ_GGML << 'ggml-metal.o'
|
||||
|
||||
if ENV['GGML_METAL_NDEBUG']
|
||||
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
|
||||
@ -181,21 +156,22 @@ if $GGML_METAL
|
||||
|
||||
if $GGML_METAL_EMBED_LIBRARY
|
||||
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
|
||||
$OBJ_GGML << ' ggml-metal-embed.o'
|
||||
$OBJ_GGML << 'ggml-metal-embed.o'
|
||||
end
|
||||
end
|
||||
|
||||
$OBJ_GGML <<
|
||||
' ggml.o' <<
|
||||
' ggml-alloc.o' <<
|
||||
' ggml-backend.o' <<
|
||||
' ggml-quants.o' <<
|
||||
' ggml-aarch64.o'
|
||||
'ggml.o' <<
|
||||
'ggml-alloc.o' <<
|
||||
'ggml-backend.o' <<
|
||||
'ggml-quants.o' <<
|
||||
'ggml-aarch64.o'
|
||||
|
||||
$OBJ_WHISPER <<
|
||||
' whisper.o'
|
||||
'whisper.o'
|
||||
|
||||
$OBJ_ALL = "#{$OBJ_GGML} #{$OBJ_WHISPER} #{$OBJ_COMMON} #{$OBJ_SDL}"
|
||||
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
||||
$objs << "ruby_whisper.o"
|
||||
|
||||
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
||||
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
||||
@ -204,26 +180,13 @@ $CXXFLAGS = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
|
||||
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
|
||||
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
|
||||
|
||||
if $GGML_METAL_EMBED_LIBRARY
|
||||
File.write 'depend', "$(OBJS): $(OBJS) ggml-metal-embed.o\n"
|
||||
end
|
||||
|
||||
with_disabling_unsupported_files do
|
||||
|
||||
create_makefile('whisper')
|
||||
|
||||
end
|
||||
create_makefile('whisper')
|
||||
|
||||
File.open 'Makefile', 'a' do |file|
|
||||
file.puts 'include get-flags.mk'
|
||||
|
||||
if $GGML_METAL
|
||||
if $GGML_METAL_EMBED_LIBRARY
|
||||
# mkmf determines object files to compile dependent on existing *.{c,cpp,m} files
|
||||
# but ggml-metal-embed.c doesn't exist on creating Makefile.
|
||||
file.puts "objs := $(OBJS)"
|
||||
file.puts "OBJS = $(objs) 'ggml-metal-embed.o'"
|
||||
|
||||
file.puts 'include metal-embed.mk'
|
||||
end
|
||||
end
|
||||
|
@ -41,6 +41,8 @@ static ID id_call;
|
||||
static ID id___method__;
|
||||
static ID id_to_enum;
|
||||
|
||||
static bool is_log_callback_finalized = false;
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_max_id -> Integer
|
||||
@ -88,6 +90,39 @@ static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
||||
return rb_str_new2(str_full);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
||||
is_log_callback_finalized = true;
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
|
||||
*/
|
||||
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
|
||||
VALUE old_callback = rb_iv_get(self, "@log_callback");
|
||||
if (!NIL_P(old_callback)) {
|
||||
rb_undefine_finalizer(old_callback);
|
||||
}
|
||||
|
||||
rb_iv_set(self, "@log_callback", log_callback);
|
||||
rb_iv_set(self, "@user_data", user_data);
|
||||
|
||||
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
|
||||
rb_define_finalizer(log_callback, finalize_log_callback);
|
||||
|
||||
whisper_log_set([](ggml_log_level level, const char * buffer, void * user_data) {
|
||||
if (is_log_callback_finalized) {
|
||||
return;
|
||||
}
|
||||
VALUE log_callback = rb_iv_get(mWhisper, "@log_callback");
|
||||
VALUE udata = rb_iv_get(mWhisper, "@user_data");
|
||||
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
|
||||
}, nullptr);
|
||||
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static void ruby_whisper_free(ruby_whisper *rw) {
|
||||
if (rw->context) {
|
||||
whisper_free(rw->context);
|
||||
@ -389,6 +424,126 @@ static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
||||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_vocab -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_vocab(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_ctx -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_ctx(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_state -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_state(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_head -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_head(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_layer -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_layer(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_ctx -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_ctx(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_state -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_state(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_head -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_head(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_layer -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_layer(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_mels -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_mels(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_ftype -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_ftype(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_ftype(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_type -> String
|
||||
*/
|
||||
VALUE ruby_whisper_model_type(VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of segments.
|
||||
*
|
||||
@ -1015,7 +1170,12 @@ typedef struct {
|
||||
int index;
|
||||
} ruby_whisper_segment;
|
||||
|
||||
typedef struct {
|
||||
VALUE context;
|
||||
} ruby_whisper_model;
|
||||
|
||||
VALUE cSegment;
|
||||
VALUE cModel;
|
||||
|
||||
static void rb_whisper_segment_mark(ruby_whisper_segment *rws) {
|
||||
rb_gc_mark(rws->context);
|
||||
@ -1188,6 +1348,176 @@ static VALUE ruby_whisper_segment_get_text(VALUE self) {
|
||||
return rb_str_new2(text);
|
||||
}
|
||||
|
||||
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
|
||||
rb_gc_mark(rwm->context);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
||||
ruby_whisper_model *rwm;
|
||||
rwm = ALLOC(ruby_whisper_model);
|
||||
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
|
||||
}
|
||||
|
||||
static VALUE rb_whisper_model_initialize(VALUE context) {
|
||||
ruby_whisper_model *rwm;
|
||||
const VALUE model = ruby_whisper_model_allocate(cModel);
|
||||
Data_Get_Struct(model, ruby_whisper_model, rwm);
|
||||
rwm->context = context;
|
||||
return model;
|
||||
};
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model -> Whisper::Model
|
||||
*/
|
||||
static VALUE ruby_whisper_get_model(VALUE self) {
|
||||
return rb_whisper_model_initialize(self);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_vocab -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_vocab(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_ctx -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_audio_ctx(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_state -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_audio_state(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_head -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_audio_head(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_layer -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_audio_layer(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_ctx -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_text_ctx(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_state -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_text_state(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_head -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_text_head(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_layer -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_text_layer(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_mels -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_n_mels(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ftype -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_ftype(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return INT2NUM(whisper_model_ftype(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* type -> String
|
||||
*/
|
||||
static VALUE ruby_whisper_c_model_type(VALUE self) {
|
||||
ruby_whisper_model *rwm;
|
||||
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||
ruby_whisper *rw;
|
||||
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||
}
|
||||
|
||||
void Init_whisper() {
|
||||
id_to_s = rb_intern("to_s");
|
||||
id_call = rb_intern("call");
|
||||
@ -1198,15 +1528,36 @@ void Init_whisper() {
|
||||
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
|
||||
cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
|
||||
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
|
||||
|
||||
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
||||
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
||||
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
||||
rb_define_singleton_method(mWhisper, "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
||||
|
||||
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
||||
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
||||
|
||||
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
||||
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
|
||||
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
||||
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
||||
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
||||
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
||||
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
||||
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
|
||||
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
|
||||
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
||||
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
|
||||
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
|
||||
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
|
||||
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
||||
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
||||
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
||||
@ -1284,6 +1635,22 @@ void Init_whisper() {
|
||||
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
||||
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
||||
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
||||
|
||||
cModel = rb_define_class_under(mWhisper, "Model", rb_cObject);
|
||||
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
|
||||
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
|
||||
rb_define_method(cModel, "n_vocab", ruby_whisper_c_model_n_vocab, 0);
|
||||
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_c_model_n_audio_ctx, 0);
|
||||
rb_define_method(cModel, "n_audio_state", ruby_whisper_c_model_n_audio_state, 0);
|
||||
rb_define_method(cModel, "n_audio_head", ruby_whisper_c_model_n_audio_head, 0);
|
||||
rb_define_method(cModel, "n_audio_layer", ruby_whisper_c_model_n_audio_layer, 0);
|
||||
rb_define_method(cModel, "n_text_ctx", ruby_whisper_c_model_n_text_ctx, 0);
|
||||
rb_define_method(cModel, "n_text_state", ruby_whisper_c_model_n_text_state, 0);
|
||||
rb_define_method(cModel, "n_text_head", ruby_whisper_c_model_n_text_head, 0);
|
||||
rb_define_method(cModel, "n_text_layer", ruby_whisper_c_model_n_text_layer, 0);
|
||||
rb_define_method(cModel, "n_mels", ruby_whisper_c_model_n_mels, 0);
|
||||
rb_define_method(cModel, "ftype", ruby_whisper_c_model_ftype, 0);
|
||||
rb_define_method(cModel, "type", ruby_whisper_c_model_type, 0);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
7
bindings/ruby/tests/helper.rb
Normal file
7
bindings/ruby/tests/helper.rb
Normal file
@ -0,0 +1,7 @@
|
||||
require "test/unit"
|
||||
require "whisper"
|
||||
|
||||
class TestBase < Test::Unit::TestCase
|
||||
MODEL = File.join(__dir__, "..", "..", "..", "models", "ggml-base.en.bin")
|
||||
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
|
||||
end
|
44
bindings/ruby/tests/test_model.rb
Normal file
44
bindings/ruby/tests/test_model.rb
Normal file
@ -0,0 +1,44 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestModel < TestBase
|
||||
def test_model
|
||||
whisper = Whisper::Context.new(MODEL)
|
||||
assert_instance_of Whisper::Model, whisper.model
|
||||
end
|
||||
|
||||
def test_attributes
|
||||
whisper = Whisper::Context.new(MODEL)
|
||||
model = whisper.model
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_gc
|
||||
model = Whisper::Context.new(MODEL).model
|
||||
GC.start
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
end
|
@ -1,9 +1,9 @@
|
||||
require 'test/unit'
|
||||
require_relative "helper"
|
||||
require 'tempfile'
|
||||
require 'tmpdir'
|
||||
require 'shellwords'
|
||||
|
||||
class TestPackage < Test::Unit::TestCase
|
||||
class TestPackage < TestBase
|
||||
def test_build
|
||||
Tempfile.create do |file|
|
||||
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
||||
|
@ -1,7 +1,6 @@
|
||||
require 'test/unit'
|
||||
require 'whisper'
|
||||
require_relative "helper"
|
||||
|
||||
class TestParams < Test::Unit::TestCase
|
||||
class TestParams < TestBase
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
|
@ -1,18 +1,14 @@
|
||||
require "test/unit"
|
||||
require "whisper"
|
||||
|
||||
class TestSegment < Test::Unit::TestCase
|
||||
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
||||
require_relative "helper"
|
||||
|
||||
class TestSegment < TestBase
|
||||
class << self
|
||||
attr_reader :whisper
|
||||
|
||||
def startup
|
||||
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
|
||||
@whisper = Whisper::Context.new(TestBase::MODEL)
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
|
||||
@whisper.transcribe(jfk, params)
|
||||
@whisper.transcribe(TestBase::AUDIO, params)
|
||||
end
|
||||
end
|
||||
|
||||
@ -60,7 +56,7 @@ class TestSegment < Test::Unit::TestCase
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
whisper.transcribe(File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav'), params)
|
||||
whisper.transcribe(AUDIO, params)
|
||||
assert_equal 0, seg.start_time
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
|
||||
end
|
||||
@ -76,7 +72,7 @@ class TestSegment < Test::Unit::TestCase
|
||||
assert_same seg, segment
|
||||
return
|
||||
end
|
||||
whisper.transcribe(File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav'), params)
|
||||
whisper.transcribe(AUDIO, params)
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -1,20 +1,20 @@
|
||||
require 'whisper'
|
||||
require 'test/unit'
|
||||
require_relative "helper"
|
||||
require "stringio"
|
||||
|
||||
class TestWhisper < Test::Unit::TestCase
|
||||
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
||||
# Exists to detect memory-related bug
|
||||
Whisper.log_set ->(level, buffer, user_data) {}, nil
|
||||
|
||||
class TestWhisper < TestBase
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
|
||||
def test_whisper
|
||||
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
|
||||
@whisper = Whisper::Context.new(MODEL)
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
|
||||
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
|
||||
@whisper.transcribe(jfk, params) {|text|
|
||||
@whisper.transcribe(AUDIO, params) {|text|
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
||||
}
|
||||
end
|
||||
@ -24,11 +24,10 @@ class TestWhisper < Test::Unit::TestCase
|
||||
attr_reader :whisper
|
||||
|
||||
def startup
|
||||
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
|
||||
@whisper = Whisper::Context.new(TestBase::MODEL)
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
|
||||
@whisper.transcribe(jfk, params)
|
||||
@whisper.transcribe(TestBase::AUDIO, params)
|
||||
end
|
||||
end
|
||||
|
||||
@ -96,4 +95,33 @@ class TestWhisper < Test::Unit::TestCase
|
||||
Whisper.lang_str_full(Whisper.lang_max_id + 1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_log_set
|
||||
user_data = Object.new
|
||||
logs = []
|
||||
log_callback = ->(level, buffer, udata) {
|
||||
logs << [level, buffer, udata]
|
||||
}
|
||||
Whisper.log_set log_callback, user_data
|
||||
Whisper::Context.new(MODEL)
|
||||
|
||||
assert logs.length > 30
|
||||
logs.each do |log|
|
||||
assert_equal Whisper::LOG_LEVEL_INFO, log[0]
|
||||
assert_same user_data, log[2]
|
||||
end
|
||||
end
|
||||
|
||||
def test_log_suppress
|
||||
stderr = $stderr
|
||||
Whisper.log_set ->(level, buffer, user_data) {
|
||||
# do nothing
|
||||
}, nil
|
||||
dev = StringIO.new("")
|
||||
$stderr = dev
|
||||
Whisper::Context.new(MODEL)
|
||||
assert_empty dev.string
|
||||
ensure
|
||||
$stderr = stderr
|
||||
end
|
||||
end
|
||||
|
@ -204,8 +204,6 @@ static int decode_audio(struct audio_buffer *audio_buf, s16 **data, int *size)
|
||||
const size_t errbuffsize = 1024;
|
||||
char errbuff[errbuffsize];
|
||||
|
||||
av_register_all(); // from avformat. Still a must-have call for ffmpeg v3! (can be skipped for later versions)
|
||||
|
||||
fmt_ctx = avformat_alloc_context();
|
||||
avio_ctx_buffer = (u8*)av_malloc(AVIO_CTX_BUF_SZ);
|
||||
LOG("Creating an avio context: AVIO_CTX_BUF_SZ=%d\n", AVIO_CTX_BUF_SZ);
|
||||
|
@ -1,4 +1,5 @@
|
||||
import Foundation
|
||||
import UIKit
|
||||
import whisper
|
||||
|
||||
enum WhisperError: Error {
|
||||
@ -55,11 +56,93 @@ actor WhisperContext {
|
||||
return transcription
|
||||
}
|
||||
|
||||
static func benchMemcpy(nThreads: Int32) async -> String {
|
||||
return String.init(cString: whisper_bench_memcpy_str(nThreads))
|
||||
}
|
||||
|
||||
static func benchGgmlMulMat(nThreads: Int32) async -> String {
|
||||
return String.init(cString: whisper_bench_ggml_mul_mat_str(nThreads))
|
||||
}
|
||||
|
||||
private func systemInfo() -> String {
|
||||
var info = ""
|
||||
if (ggml_cpu_has_neon() != 0) { info += "NEON " }
|
||||
if (ggml_cpu_has_metal() != 0) { info += "METAL " }
|
||||
if (ggml_cpu_has_blas() != 0) { info += "BLAS " }
|
||||
return String(info.dropLast())
|
||||
}
|
||||
|
||||
func benchFull(modelName: String, nThreads: Int32) async -> String {
|
||||
let nMels = whisper_model_n_mels(context)
|
||||
if (whisper_set_mel(context, nil, 0, nMels) != 0) {
|
||||
return "error: failed to set mel"
|
||||
}
|
||||
|
||||
// heat encoder
|
||||
if (whisper_encode(context, 0, nThreads) != 0) {
|
||||
return "error: failed to encode"
|
||||
}
|
||||
|
||||
var tokens = [whisper_token](repeating: 0, count: 512)
|
||||
|
||||
// prompt heat
|
||||
if (whisper_decode(context, &tokens, 256, 0, nThreads) != 0) {
|
||||
return "error: failed to decode"
|
||||
}
|
||||
|
||||
// text-generation heat
|
||||
if (whisper_decode(context, &tokens, 1, 256, nThreads) != 0) {
|
||||
return "error: failed to decode"
|
||||
}
|
||||
|
||||
whisper_reset_timings(context)
|
||||
|
||||
// actual run
|
||||
if (whisper_encode(context, 0, nThreads) != 0) {
|
||||
return "error: failed to encode"
|
||||
}
|
||||
|
||||
// text-generation
|
||||
for i in 0..<256 {
|
||||
if (whisper_decode(context, &tokens, 1, Int32(i), nThreads) != 0) {
|
||||
return "error: failed to decode"
|
||||
}
|
||||
}
|
||||
|
||||
// batched decoding
|
||||
for _ in 0..<64 {
|
||||
if (whisper_decode(context, &tokens, 5, 0, nThreads) != 0) {
|
||||
return "error: failed to decode"
|
||||
}
|
||||
}
|
||||
|
||||
// prompt processing
|
||||
for _ in 0..<16 {
|
||||
if (whisper_decode(context, &tokens, 256, 0, nThreads) != 0) {
|
||||
return "error: failed to decode"
|
||||
}
|
||||
}
|
||||
|
||||
whisper_print_timings(context)
|
||||
|
||||
let deviceModel = await UIDevice.current.model
|
||||
let systemName = await UIDevice.current.systemName
|
||||
let systemInfo = self.systemInfo()
|
||||
let timings: whisper_timings = whisper_get_timings(context).pointee
|
||||
let encodeMs = String(format: "%.2f", timings.encode_ms)
|
||||
let decodeMs = String(format: "%.2f", timings.decode_ms)
|
||||
let batchdMs = String(format: "%.2f", timings.batchd_ms)
|
||||
let promptMs = String(format: "%.2f", timings.prompt_ms)
|
||||
return "| \(deviceModel) | \(systemName) | \(systemInfo) | \(modelName) | \(nThreads) | 1 | \(encodeMs) | \(decodeMs) | \(batchdMs) | \(promptMs) | <todo> |"
|
||||
}
|
||||
|
||||
static func createContext(path: String) throws -> WhisperContext {
|
||||
var params = whisper_context_default_params()
|
||||
#if targetEnvironment(simulator)
|
||||
params.use_gpu = false
|
||||
print("Running on the simulator, using CPU")
|
||||
#else
|
||||
params.flash_attn = true // Enabled by default for Metal
|
||||
#endif
|
||||
let context = whisper_init_from_file_with_params(path, params)
|
||||
if let context {
|
||||
|
@ -0,0 +1,17 @@
|
||||
import Foundation
|
||||
|
||||
struct Model: Identifiable {
|
||||
var id = UUID()
|
||||
var name: String
|
||||
var info: String
|
||||
var url: String
|
||||
|
||||
var filename: String
|
||||
var fileURL: URL {
|
||||
FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent(filename)
|
||||
}
|
||||
|
||||
func fileExists() -> Bool {
|
||||
FileManager.default.fileExists(atPath: fileURL.path)
|
||||
}
|
||||
}
|
@ -14,7 +14,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
||||
private var recordedFile: URL? = nil
|
||||
private var audioPlayer: AVAudioPlayer?
|
||||
|
||||
private var modelUrl: URL? {
|
||||
private var builtInModelUrl: URL? {
|
||||
Bundle.main.url(forResource: "ggml-base.en", withExtension: "bin", subdirectory: "models")
|
||||
}
|
||||
|
||||
@ -28,23 +28,59 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
||||
|
||||
override init() {
|
||||
super.init()
|
||||
loadModel()
|
||||
}
|
||||
|
||||
func loadModel(path: URL? = nil, log: Bool = true) {
|
||||
do {
|
||||
try loadModel()
|
||||
whisperContext = nil
|
||||
if (log) { messageLog += "Loading model...\n" }
|
||||
let modelUrl = path ?? builtInModelUrl
|
||||
if let modelUrl {
|
||||
whisperContext = try WhisperContext.createContext(path: modelUrl.path())
|
||||
if (log) { messageLog += "Loaded model \(modelUrl.lastPathComponent)\n" }
|
||||
} else {
|
||||
if (log) { messageLog += "Could not locate model\n" }
|
||||
}
|
||||
canTranscribe = true
|
||||
} catch {
|
||||
print(error.localizedDescription)
|
||||
messageLog += "\(error.localizedDescription)\n"
|
||||
if (log) { messageLog += "\(error.localizedDescription)\n" }
|
||||
}
|
||||
}
|
||||
|
||||
private func loadModel() throws {
|
||||
messageLog += "Loading model...\n"
|
||||
if let modelUrl {
|
||||
whisperContext = try WhisperContext.createContext(path: modelUrl.path())
|
||||
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
||||
} else {
|
||||
messageLog += "Could not locate model\n"
|
||||
func benchCurrentModel() async {
|
||||
if whisperContext == nil {
|
||||
messageLog += "Cannot bench without loaded model\n"
|
||||
return
|
||||
}
|
||||
messageLog += "Running benchmark for loaded model\n"
|
||||
let result = await whisperContext?.benchFull(modelName: "<current>", nThreads: Int32(min(4, cpuCount())))
|
||||
if (result != nil) { messageLog += result! + "\n" }
|
||||
}
|
||||
|
||||
func bench(models: [Model]) async {
|
||||
let nThreads = Int32(min(4, cpuCount()))
|
||||
|
||||
// messageLog += "Running memcpy benchmark\n"
|
||||
// messageLog += await WhisperContext.benchMemcpy(nThreads: nThreads) + "\n"
|
||||
//
|
||||
// messageLog += "Running ggml_mul_mat benchmark with \(nThreads) threads\n"
|
||||
// messageLog += await WhisperContext.benchGgmlMulMat(nThreads: nThreads) + "\n"
|
||||
|
||||
messageLog += "Running benchmark for all downloaded models\n"
|
||||
messageLog += "| CPU | OS | Config | Model | Th | FA | Enc. | Dec. | Bch5 | PP | Commit |\n"
|
||||
messageLog += "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\n"
|
||||
for model in models {
|
||||
loadModel(path: model.fileURL, log: false)
|
||||
if whisperContext == nil {
|
||||
messageLog += "Cannot bench without loaded model\n"
|
||||
break
|
||||
}
|
||||
let result = await whisperContext?.benchFull(modelName: model.name, nThreads: nThreads)
|
||||
if (result != nil) { messageLog += result! + "\n" }
|
||||
}
|
||||
messageLog += "Benchmarking completed\n"
|
||||
}
|
||||
|
||||
func transcribeSample() async {
|
||||
@ -160,3 +196,8 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
||||
isRecording = false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fileprivate func cpuCount() -> Int {
|
||||
ProcessInfo.processInfo.processorCount
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
import SwiftUI
|
||||
import AVFoundation
|
||||
import Foundation
|
||||
|
||||
struct ContentView: View {
|
||||
@StateObject var whisperState = WhisperState()
|
||||
@ -29,15 +30,125 @@ struct ContentView: View {
|
||||
Text(verbatim: whisperState.messageLog)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
}
|
||||
.font(.footnote)
|
||||
.padding()
|
||||
.background(Color.gray.opacity(0.1))
|
||||
.cornerRadius(10)
|
||||
|
||||
HStack {
|
||||
Button("Clear Logs", action: {
|
||||
whisperState.messageLog = ""
|
||||
})
|
||||
.font(.footnote)
|
||||
.buttonStyle(.bordered)
|
||||
|
||||
Button("Copy Logs", action: {
|
||||
UIPasteboard.general.string = whisperState.messageLog
|
||||
})
|
||||
.font(.footnote)
|
||||
.buttonStyle(.bordered)
|
||||
|
||||
Button("Bench", action: {
|
||||
Task {
|
||||
await whisperState.benchCurrentModel()
|
||||
}
|
||||
})
|
||||
.font(.footnote)
|
||||
.buttonStyle(.bordered)
|
||||
.disabled(!whisperState.canTranscribe)
|
||||
|
||||
Button("Bench All", action: {
|
||||
Task {
|
||||
await whisperState.bench(models: ModelsView.getDownloadedModels())
|
||||
}
|
||||
})
|
||||
.font(.footnote)
|
||||
.buttonStyle(.bordered)
|
||||
.disabled(!whisperState.canTranscribe)
|
||||
}
|
||||
|
||||
NavigationLink(destination: ModelsView(whisperState: whisperState)) {
|
||||
Text("View Models")
|
||||
}
|
||||
.font(.footnote)
|
||||
.padding()
|
||||
}
|
||||
.navigationTitle("Whisper SwiftUI Demo")
|
||||
.padding()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ContentView_Previews: PreviewProvider {
|
||||
static var previews: some View {
|
||||
ContentView()
|
||||
struct ModelsView: View {
|
||||
@ObservedObject var whisperState: WhisperState
|
||||
@Environment(\.dismiss) var dismiss
|
||||
|
||||
private static let models: [Model] = [
|
||||
Model(name: "tiny", info: "(F16, 75 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin", filename: "tiny.bin"),
|
||||
Model(name: "tiny-q5_1", info: "(31 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q5_1.bin", filename: "tiny-q5_1.bin"),
|
||||
Model(name: "tiny-q8_0", info: "(42 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny-q8_0.bin", filename: "tiny-q8_0.bin"),
|
||||
Model(name: "tiny.en", info: "(F16, 75 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin", filename: "tiny.en.bin"),
|
||||
Model(name: "tiny.en-q5_1", info: "(31 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin", filename: "tiny.en-q5_1.bin"),
|
||||
Model(name: "tiny.en-q8_0", info: "(42 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q8_0.bin", filename: "tiny.en-q8_0.bin"),
|
||||
Model(name: "base", info: "(F16, 142 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin", filename: "base.bin"),
|
||||
Model(name: "base-q5_1", info: "(57 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q5_1.bin", filename: "base-q5_1.bin"),
|
||||
Model(name: "base-q8_0", info: "(78 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base-q8_0.bin", filename: "base-q8_0.bin"),
|
||||
Model(name: "base.en", info: "(F16, 142 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin", filename: "base.en.bin"),
|
||||
Model(name: "base.en-q5_1", info: "(57 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q5_1.bin", filename: "base.en-q5_1.bin"),
|
||||
Model(name: "base.en-q8_0", info: "(78 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en-q8_0.bin", filename: "base.en-q8_0.bin"),
|
||||
Model(name: "small", info: "(F16, 466 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin", filename: "small.bin"),
|
||||
Model(name: "small-q5_1", info: "(181 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin", filename: "small-q5_1.bin"),
|
||||
Model(name: "small-q8_0", info: "(252 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q8_0.bin", filename: "small-q8_0.bin"),
|
||||
Model(name: "small.en", info: "(F16, 466 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin", filename: "small.en.bin"),
|
||||
Model(name: "small.en-q5_1", info: "(181 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q5_1.bin", filename: "small.en-q5_1.bin"),
|
||||
Model(name: "small.en-q8_0", info: "(252 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en-q8_0.bin", filename: "small.en-q8_0.bin"),
|
||||
Model(name: "medium", info: "(F16, 1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin", filename: "medium.bin"),
|
||||
Model(name: "medium-q5_0", info: "(514 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin", filename: "medium-q5_0.bin"),
|
||||
Model(name: "medium-q8_0", info: "(785 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q8_0.bin", filename: "medium-q8_0.bin"),
|
||||
Model(name: "medium.en", info: "(F16, 1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin", filename: "medium.en.bin"),
|
||||
Model(name: "medium.en-q5_0", info: "(514 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q5_0.bin", filename: "medium.en-q5_0.bin"),
|
||||
Model(name: "medium.en-q8_0", info: "(785 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en-q8_0.bin", filename: "medium.en-q8_0.bin"),
|
||||
Model(name: "large-v1", info: "(F16, 2.9 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large.bin", filename: "large.bin"),
|
||||
Model(name: "large-v2", info: "(F16, 2.9 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2.bin", filename: "large-v2.bin"),
|
||||
Model(name: "large-v2-q5_0", info: "(1.1 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2-q5_0.bin", filename: "large-v2-q5_0.bin"),
|
||||
Model(name: "large-v2-q8_0", info: "(1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2-q8_0.bin", filename: "large-v2-q8_0.bin"),
|
||||
Model(name: "large-v3", info: "(F16, 2.9 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin", filename: "large-v3.bin"),
|
||||
Model(name: "large-v3-q5_0", info: "(1.1 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-q5_0.bin", filename: "large-v3-q5_0.bin"),
|
||||
Model(name: "large-v3-turbo", info: "(F16, 1.5 GiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin", filename: "large-v3-turbo.bin"),
|
||||
Model(name: "large-v3-turbo-q5_0", info: "(547 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin", filename: "large-v3-turbo-q5_0.bin"),
|
||||
Model(name: "large-v3-turbo-q8_0", info: "(834 MiB)", url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q8_0.bin", filename: "large-v3-turbo-q8_0.bin"),
|
||||
]
|
||||
|
||||
static func getDownloadedModels() -> [Model] {
|
||||
// Filter models that have been downloaded
|
||||
return models.filter {
|
||||
FileManager.default.fileExists(atPath: $0.fileURL.path())
|
||||
}
|
||||
}
|
||||
|
||||
func loadModel(model: Model) {
|
||||
Task {
|
||||
dismiss()
|
||||
whisperState.loadModel(path: model.fileURL)
|
||||
}
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
List {
|
||||
Section(header: Text("Models")) {
|
||||
ForEach(ModelsView.models) { model in
|
||||
DownloadButton(model: model)
|
||||
.onLoad(perform: loadModel)
|
||||
}
|
||||
}
|
||||
}
|
||||
.listStyle(GroupedListStyle())
|
||||
.navigationBarTitle("Models", displayMode: .inline).toolbar {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//struct ContentView_Previews: PreviewProvider {
|
||||
// static var previews: some View {
|
||||
// ContentView()
|
||||
// }
|
||||
//}
|
||||
|
@ -0,0 +1,102 @@
|
||||
import SwiftUI
|
||||
|
||||
struct DownloadButton: View {
|
||||
private var model: Model
|
||||
|
||||
@State private var status: String
|
||||
|
||||
@State private var downloadTask: URLSessionDownloadTask?
|
||||
@State private var progress = 0.0
|
||||
@State private var observation: NSKeyValueObservation?
|
||||
|
||||
private var onLoad: ((_ model: Model) -> Void)?
|
||||
|
||||
init(model: Model) {
|
||||
self.model = model
|
||||
status = model.fileExists() ? "downloaded" : "download"
|
||||
}
|
||||
|
||||
func onLoad(perform action: @escaping (_ model: Model) -> Void) -> DownloadButton {
|
||||
var button = self
|
||||
button.onLoad = action
|
||||
return button
|
||||
}
|
||||
|
||||
private func download() {
|
||||
status = "downloading"
|
||||
print("Downloading model \(model.name) from \(model.url)")
|
||||
guard let url = URL(string: model.url) else { return }
|
||||
|
||||
downloadTask = URLSession.shared.downloadTask(with: url) { temporaryURL, response, error in
|
||||
if let error = error {
|
||||
print("Error: \(error.localizedDescription)")
|
||||
return
|
||||
}
|
||||
|
||||
guard let response = response as? HTTPURLResponse, (200...299).contains(response.statusCode) else {
|
||||
print("Server error!")
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
if let temporaryURL = temporaryURL {
|
||||
try FileManager.default.copyItem(at: temporaryURL, to: model.fileURL)
|
||||
print("Writing to \(model.filename) completed")
|
||||
status = "downloaded"
|
||||
}
|
||||
} catch let err {
|
||||
print("Error: \(err.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
observation = downloadTask?.progress.observe(\.fractionCompleted) { progress, _ in
|
||||
self.progress = progress.fractionCompleted
|
||||
}
|
||||
|
||||
downloadTask?.resume()
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
Button(action: {
|
||||
if (status == "download") {
|
||||
download()
|
||||
} else if (status == "downloading") {
|
||||
downloadTask?.cancel()
|
||||
status = "download"
|
||||
} else if (status == "downloaded") {
|
||||
if !model.fileExists() {
|
||||
download()
|
||||
}
|
||||
onLoad?(model)
|
||||
}
|
||||
}) {
|
||||
let title = "\(model.name) \(model.info)"
|
||||
if (status == "download") {
|
||||
Text("Download \(title)")
|
||||
} else if (status == "downloading") {
|
||||
Text("\(title) (Downloading \(Int(progress * 100))%)")
|
||||
} else if (status == "downloaded") {
|
||||
Text("Load \(title)")
|
||||
} else {
|
||||
Text("Unknown status")
|
||||
}
|
||||
}.swipeActions {
|
||||
if (status == "downloaded") {
|
||||
Button("Delete") {
|
||||
do {
|
||||
try FileManager.default.removeItem(at: model.fileURL)
|
||||
} catch {
|
||||
print("Error deleting file: \(error)")
|
||||
}
|
||||
status = "download"
|
||||
}
|
||||
.tint(.red)
|
||||
}
|
||||
}
|
||||
}
|
||||
.onDisappear() {
|
||||
downloadTask?.cancel()
|
||||
}
|
||||
}
|
||||
}
|
@ -17,6 +17,8 @@
|
||||
0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
|
||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
|
||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
|
||||
7F79E0EE2CE0A78000ACD7BF /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */; };
|
||||
7F79E0F02CE0C6F700ACD7BF /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7F79E0EF2CE0C6F700ACD7BF /* Model.swift */; };
|
||||
E3F92DC52AFA8E3800A6A9D4 /* whisper in Frameworks */ = {isa = PBXBuildFile; productRef = E3F92DC42AFA8E3800A6A9D4 /* whisper */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
@ -33,6 +35,8 @@
|
||||
0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
|
||||
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
|
||||
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
|
||||
7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
|
||||
7F79E0EF2CE0C6F700ACD7BF /* Model.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
|
||||
E3F92DC22AFA8DD800A6A9D4 /* whisper.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = whisper.cpp; path = ../..; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
@ -52,6 +56,7 @@
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */,
|
||||
7F79E0EF2CE0C6F700ACD7BF /* Model.swift */,
|
||||
);
|
||||
path = Models;
|
||||
sourceTree = "<group>";
|
||||
@ -119,6 +124,7 @@
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
0AAC5D9C29539CCF003032C3 /* ContentView.swift */,
|
||||
7F79E0ED2CE0A78000ACD7BF /* DownloadButton.swift */,
|
||||
);
|
||||
path = UI;
|
||||
sourceTree = "<group>";
|
||||
@ -220,7 +226,9 @@
|
||||
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
|
||||
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
|
||||
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
|
||||
7F79E0EE2CE0A78000ACD7BF /* DownloadButton.swift in Sources */,
|
||||
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
|
||||
7F79E0F02CE0C6F700ACD7BF /* Model.swift in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -2049,7 +2049,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
||||
fp16 = fp16 && vk12_features.shaderFloat16;
|
||||
|
||||
std::string device_name = props2.properties.deviceName.data();
|
||||
std::cerr << GGML_VK_NAME << idx << ": " << device_name << " (" << driver_props.driverName << ") | uma: " << uma << " | fp16: " << fp16 << " | warp size: " << subgroup_size << std::endl;
|
||||
GGML_LOG_DEBUG("ggml_vulkan: %d = %s (%s) | uma: %d | fp16: %d | warp size: %d\n",
|
||||
idx, device_name.c_str(), driver_props.driverName, uma, fp16, subgroup_size);
|
||||
|
||||
if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) {
|
||||
std::cerr << "ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want." << std::endl;
|
||||
@ -2107,8 +2108,7 @@ void ggml_vk_instance_init() {
|
||||
};
|
||||
validation_features.setPNext(nullptr);
|
||||
instance_create_info.setPNext(&validation_features);
|
||||
|
||||
std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
|
||||
GGML_LOG_DEBUG("ggml_vulkan: Validation layers enabled\n");
|
||||
}
|
||||
vk_instance.instance = vk::createInstance(instance_create_info);
|
||||
|
||||
@ -2222,8 +2222,8 @@ void ggml_vk_instance_init() {
|
||||
vk_instance.device_indices.push_back(0);
|
||||
}
|
||||
}
|
||||
GGML_LOG_DEBUG("ggml_vulkan: Found %d Vulkan devices:\n", vk_instance.device_indices.size());
|
||||
|
||||
std::cerr << "ggml_vulkan: Found " << vk_instance.device_indices.size() << " Vulkan devices:" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < vk_instance.device_indices.size(); i++) {
|
||||
ggml_vk_print_gpu_info(i);
|
||||
|
@ -423,6 +423,14 @@ extern "C" {
|
||||
WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
|
||||
|
||||
// Performance information from the default state.
|
||||
struct whisper_timings {
|
||||
float sample_ms;
|
||||
float encode_ms;
|
||||
float decode_ms;
|
||||
float batchd_ms;
|
||||
float prompt_ms;
|
||||
};
|
||||
WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
|
||||
WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
|
||||
WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);
|
||||
|
||||
|
@ -4186,6 +4186,19 @@ whisper_token whisper_token_transcribe(struct whisper_context * ctx) {
|
||||
return ctx->vocab.token_transcribe;
|
||||
}
|
||||
|
||||
struct whisper_timings * whisper_get_timings(struct whisper_context * ctx) {
|
||||
if (ctx->state == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
whisper_timings * timings = new whisper_timings;
|
||||
timings->sample_ms = 1e-3f * ctx->state->t_sample_us / std::max(1, ctx->state->n_sample);
|
||||
timings->encode_ms = 1e-3f * ctx->state->t_encode_us / std::max(1, ctx->state->n_encode);
|
||||
timings->decode_ms = 1e-3f * ctx->state->t_decode_us / std::max(1, ctx->state->n_decode);
|
||||
timings->batchd_ms = 1e-3f * ctx->state->t_batchd_us / std::max(1, ctx->state->n_batchd);
|
||||
timings->prompt_ms = 1e-3f * ctx->state->t_prompt_us / std::max(1, ctx->state->n_prompt);
|
||||
return timings;
|
||||
}
|
||||
|
||||
void whisper_print_timings(struct whisper_context * ctx) {
|
||||
const int64_t t_end_us = ggml_time_us();
|
||||
|
||||
@ -7391,6 +7404,7 @@ static void whisper_exp_compute_token_level_timestamps_dtw(
|
||||
void whisper_log_set(ggml_log_callback log_callback, void * user_data) {
|
||||
g_state.log_callback = log_callback ? log_callback : whisper_log_callback_default;
|
||||
g_state.log_callback_user_data = user_data;
|
||||
ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
|
||||
}
|
||||
|
||||
GGML_ATTRIBUTE_FORMAT(2, 3)
|
||||
|
Loading…
Reference in New Issue
Block a user