diff --git a/examples/wchess/libwchess/CMakeLists.txt b/examples/wchess/libwchess/CMakeLists.txt index 7c89883d..b785fb2c 100644 --- a/examples/wchess/libwchess/CMakeLists.txt +++ b/examples/wchess/libwchess/CMakeLists.txt @@ -3,14 +3,15 @@ add_library(libwchess WChess.h Chessboard.cpp Chessboard.h - ) +) target_link_libraries(libwchess PUBLIC whisper + common ) target_include_directories(libwchess PUBLIC "$" - ) +) diff --git a/examples/wchess/libwchess/WChess.cpp b/examples/wchess/libwchess/WChess.cpp index 249b7b53..99fb6d5e 100644 --- a/examples/wchess/libwchess/WChess.cpp +++ b/examples/wchess/libwchess/WChess.cpp @@ -1,49 +1,48 @@ #include "WChess.h" +#include "Chessboard.h" #include "grammar-parser.h" #include "common.h" #include -Chess::Chess(whisper_context * ctx, +WChess::WChess(whisper_context * ctx, const whisper_full_params & wparams, - StatusSetter status_setter, - ISRunning running, - AudioGetter audio, - MovesSetter m_moveSetter) + callbacks cb, + settings s) : m_ctx(ctx) , m_wparams(wparams) - , m_status_setter(status_setter) - , m_running(running) - , m_audio(audio) - , m_moveSetter( m_moveSetter) + , m_cb(cb) + , m_settings(s) + , m_board(new Chessboard()) {} -void Chess::set_status(const char * msg) { - if (m_status_setter) (*m_status_setter)(msg); +WChess::~WChess() = default; + +void WChess::set_status(const std::string& msg) const { + if (m_cb.set_status) (*m_cb.set_status)(msg); } -void Chess::set_moves(const std::string& moves) { - if (m_moveSetter) (*m_moveSetter)(moves); +void WChess::set_moves(const std::string& moves) const { + if (m_cb.set_moves) (*m_cb.set_moves)(moves); } -bool Chess::check_running() { - if (m_running) return (*m_running)(); +bool WChess::check_running() const { + if (m_cb.check_running) return (*m_cb.check_running)(); return false; } -void Chess::get_audio(int ms, std::vector& pcmf32) { - if (m_audio) (*m_audio)(ms, pcmf32); +void WChess::get_audio(int ms, std::vector& pcmf32) const { + if (m_cb.get_audio) (*m_cb.get_audio)(ms, pcmf32); } -std::string Chess::stringifyBoard() { - return m_board.stringifyBoard(); +std::string WChess::stringify_board() const { + return m_board->stringifyBoard(); } -void Chess::run() { +void WChess::run() { set_status("loading data ..."); bool have_prompt = false; bool ask_prompt = true; - bool print_energy = false; float logprob_min0 = 0.0f; float logprob_min = 0.0f; @@ -87,13 +86,6 @@ void Chess::run() { m_wparams.grammar_penalty = 100.0; } - const int32_t vad_ms = 2000; - const int32_t prompt_ms = 5000; - const int32_t command_ms = 4000; - - const float vad_thold = 0.1f; - const float freq_thold = -1.0f; - while (check_running()) { // delay std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -115,14 +107,14 @@ void Chess::run() { int64_t t_ms = 0; { - get_audio(vad_ms, pcmf32_cur); + get_audio(m_settings.vad_ms, pcmf32_cur); - if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) { + if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, m_settings.vad_thold, m_settings.freq_thold, m_settings.print_energy)) { fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__); set_status("Speech detected! Processing ..."); if (!have_prompt) { - get_audio(prompt_ms, pcmf32_cur); + get_audio(m_settings.prompt_ms, pcmf32_cur); m_wparams.i_start_rule = grammar_parsed.symbol_ids.at("prompt"); const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms)); @@ -151,7 +143,7 @@ void Chess::run() { have_prompt = true; } } else { - get_audio(command_ms, pcmf32_cur); + get_audio(m_settings.command_ms, pcmf32_cur); // prepend 3 second of silence pcmf32_cur.insert(pcmf32_cur.begin(), 3*WHISPER_SAMPLE_RATE, 0.0f); @@ -198,18 +190,15 @@ void Chess::run() { set_status(txt); } if (!command.empty()) { - set_moves(m_board.processTranscription(command)); + set_moves(m_board->processTranscription(command)); } - } - - } } } } -std::string Chess::transcribe( +std::string WChess::transcribe( const std::vector & pcmf32, float & logprob_min, float & logprob_sum, @@ -223,7 +212,7 @@ std::string Chess::transcribe( t_ms = 0; if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) { - return ""; + return {}; } std::string result; diff --git a/examples/wchess/libwchess/WChess.h b/examples/wchess/libwchess/WChess.h index f97980e8..fd019594 100644 --- a/examples/wchess/libwchess/WChess.h +++ b/examples/wchess/libwchess/WChess.h @@ -1,39 +1,59 @@ #pragma once -#include "Chessboard.h" #include "whisper.h" #include #include +#include -class Chess { +class Chessboard; + +class WChess { public: - using StatusSetter = void (*)(const std::string & status); - using ISRunning = bool (*)(); - using AudioGetter = void (*)(int, std::vector&); - using MovesSetter = void (*)(const std::string & moves); - Chess( whisper_context * ctx, - const whisper_full_params & wparams, - StatusSetter status_setter, - ISRunning running, - AudioGetter audio, - MovesSetter moveSetter); + using SetStatusCb = void (*)(const std::string &); + using CheckRunningCb = bool (*)(); + using GetAudioCb = void (*)(int, std::vector &); + using SetMovesCb = void (*)(const std::string &); + + struct callbacks { + SetStatusCb set_status = nullptr; + CheckRunningCb check_running = nullptr; + GetAudioCb get_audio = nullptr; + SetMovesCb set_moves = nullptr; + }; + + struct settings { + int32_t vad_ms = 2000; + int32_t prompt_ms = 5000; + int32_t command_ms = 4000; + float vad_thold = 0.1f; + float freq_thold = -1.0f; + bool print_energy = false; + }; + + WChess( + whisper_context * ctx, + const whisper_full_params & wparams, + callbacks cb, + settings s + ); + ~WChess(); + void run(); - std::string stringifyBoard(); + std::string stringify_board() const; private: - void get_audio(int ms, std::vector& pcmf32); - void set_status(const char* msg); - void set_moves(const std::string& moves); - bool check_running(); + void get_audio(int ms, std::vector& pcmf32) const; + void set_status(const std::string& msg) const; + void set_moves(const std::string& moves) const; + bool check_running() const; std::string transcribe( const std::vector & pcmf32, float & logprob_min, float & logprob_sum, int & n_tokens, int64_t & t_ms); + whisper_context * m_ctx; whisper_full_params m_wparams; - StatusSetter m_status_setter; - ISRunning m_running; - AudioGetter m_audio; - MovesSetter m_moveSetter; - Chessboard m_board; + const callbacks m_cb; + const settings m_settings; + std::unique_ptr m_board; }; diff --git a/examples/wchess/wchess.cmd/CMakeLists.txt b/examples/wchess/wchess.cmd/CMakeLists.txt index 4cd93ac2..a976d93f 100644 --- a/examples/wchess/wchess.cmd/CMakeLists.txt +++ b/examples/wchess/wchess.cmd/CMakeLists.txt @@ -4,5 +4,5 @@ if (WHISPER_SDL2) include(DefaultTargetOptions) - target_link_libraries(${TARGET} PRIVATE libwchess common common-sdl ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(${TARGET} PRIVATE libwchess common-sdl ${CMAKE_THREAD_LIBS_INIT}) endif () \ No newline at end of file diff --git a/examples/wchess/wchess.cmd/wchess.cmd.cpp b/examples/wchess/wchess.cmd/wchess.cmd.cpp index 32e126a8..b3b30023 100644 --- a/examples/wchess/wchess.cmd/wchess.cmd.cpp +++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp @@ -1,30 +1,15 @@ -// Voice assistant example +// Command line voice assisted chess // -// Speak short text commands to the microphone. -// This program will detect your voice command and convert them to text. +// Speak chess move commands to the microphone. +// The moves will translated to chessboard positions. // -// ref: https://github.com/ggerganov/whisper.cpp/issues/171 // -#include "common-sdl.h" -#include "common.h" #include "WChess.h" +#include "common-sdl.h" -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include - -bool file_exists(const std::string & fname) { - std::ifstream f(fname.c_str()); - return f.good(); -} // command-line parameters struct whisper_params { @@ -81,7 +66,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n", params.commands.c_str()); fprintf(stderr, " -p, --prompt [%-7s] the required activation prompt\n", params.prompt.c_str()); fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str()); - fprintf(stderr, " --grammar GRAMMAR [%-7s] GBNF grammar to guide decoding\n", params.grammar.c_str()); fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty); fprintf(stderr, "\n"); } @@ -124,10 +108,9 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { return true; } - -std::unique_ptr g_chess; -void set_moves(const std::string & /* moves */) { - fprintf(stdout, "%s", g_chess->stringifyBoard().c_str()); +std::unique_ptr g_wchess; +void set_moves(const std::string & moves) { + if (!moves.empty()) fprintf(stdout, "%s", g_wchess->stringify_board().c_str()); } audio_async g_audio(30*1000); @@ -135,10 +118,6 @@ void get_audio(int ms, std::vector & pcmf32_cur) { g_audio.get(ms, pcmf32_cur); } -bool check_running() { - return sdl_poll_events(); -} - int main(int argc, char ** argv) { whisper_params params; @@ -189,15 +168,30 @@ int main(int argc, char ** argv) { wparams.beam_search.beam_size = 5; + wparams.initial_prompt = params.context.data(); + g_audio.resume(); // wait for 1 second to avoid any buffered noise std::this_thread::sleep_for(std::chrono::milliseconds(1000)); g_audio.clear(); - g_chess.reset(new Chess(ctx, wparams, nullptr, sdl_poll_events, get_audio, set_moves)); - set_moves({}); - g_chess->run(); + WChess::callbacks cb; + cb.check_running = sdl_poll_events; + cb.get_audio = get_audio; + cb.set_moves = set_moves; + + WChess::settings s; + s.vad_ms = 2000; + s.prompt_ms = params.prompt_ms; + s.command_ms = params.command_ms; + s.vad_thold = params.vad_thold; + s.freq_thold = params.freq_thold; + s.print_energy = params.print_energy; + + g_wchess.reset(new WChess(ctx, wparams, cb, s)); + set_moves("start"); + g_wchess->run(); g_audio.pause(); diff --git a/examples/wchess/wchess.wasm/index-tmpl.html b/examples/wchess/wchess.wasm/index-tmpl.html index 90aecbf1..48c1ccdc 100644 --- a/examples/wchess/wchess.wasm/index-tmpl.html +++ b/examples/wchess/wchess.wasm/index-tmpl.html @@ -1,7 +1,7 @@ - command : Voice assistant example using Whisper + WebAssembly + wchess : Voice assistant example using Whisper + WebAssembly