wchess: tidy up entry files

This commit is contained in:
Fraxy V 2023-11-25 11:34:06 +02:00
parent f07ff2aa6a
commit a44b21bce0
7 changed files with 150 additions and 155 deletions

View File

@ -3,14 +3,15 @@ add_library(libwchess
WChess.h
Chessboard.cpp
Chessboard.h
)
)
target_link_libraries(libwchess
PUBLIC
whisper
common
)
target_include_directories(libwchess
PUBLIC
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
)
)

View File

@ -1,49 +1,48 @@
#include "WChess.h"
#include "Chessboard.h"
#include "grammar-parser.h"
#include "common.h"
#include <thread>
Chess::Chess(whisper_context * ctx,
WChess::WChess(whisper_context * ctx,
const whisper_full_params & wparams,
StatusSetter status_setter,
ISRunning running,
AudioGetter audio,
MovesSetter m_moveSetter)
callbacks cb,
settings s)
: m_ctx(ctx)
, m_wparams(wparams)
, m_status_setter(status_setter)
, m_running(running)
, m_audio(audio)
, m_moveSetter( m_moveSetter)
, m_cb(cb)
, m_settings(s)
, m_board(new Chessboard())
{}
void Chess::set_status(const char * msg) {
if (m_status_setter) (*m_status_setter)(msg);
WChess::~WChess() = default;
void WChess::set_status(const std::string& msg) const {
if (m_cb.set_status) (*m_cb.set_status)(msg);
}
void Chess::set_moves(const std::string& moves) {
if (m_moveSetter) (*m_moveSetter)(moves);
void WChess::set_moves(const std::string& moves) const {
if (m_cb.set_moves) (*m_cb.set_moves)(moves);
}
bool Chess::check_running() {
if (m_running) return (*m_running)();
bool WChess::check_running() const {
if (m_cb.check_running) return (*m_cb.check_running)();
return false;
}
void Chess::get_audio(int ms, std::vector<float>& pcmf32) {
if (m_audio) (*m_audio)(ms, pcmf32);
void WChess::get_audio(int ms, std::vector<float>& pcmf32) const {
if (m_cb.get_audio) (*m_cb.get_audio)(ms, pcmf32);
}
std::string Chess::stringifyBoard() {
return m_board.stringifyBoard();
std::string WChess::stringify_board() const {
return m_board->stringifyBoard();
}
void Chess::run() {
void WChess::run() {
set_status("loading data ...");
bool have_prompt = false;
bool ask_prompt = true;
bool print_energy = false;
float logprob_min0 = 0.0f;
float logprob_min = 0.0f;
@ -87,13 +86,6 @@ void Chess::run() {
m_wparams.grammar_penalty = 100.0;
}
const int32_t vad_ms = 2000;
const int32_t prompt_ms = 5000;
const int32_t command_ms = 4000;
const float vad_thold = 0.1f;
const float freq_thold = -1.0f;
while (check_running()) {
// delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
@ -115,14 +107,14 @@ void Chess::run() {
int64_t t_ms = 0;
{
get_audio(vad_ms, pcmf32_cur);
get_audio(m_settings.vad_ms, pcmf32_cur);
if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) {
if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, m_settings.vad_thold, m_settings.freq_thold, m_settings.print_energy)) {
fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
set_status("Speech detected! Processing ...");
if (!have_prompt) {
get_audio(prompt_ms, pcmf32_cur);
get_audio(m_settings.prompt_ms, pcmf32_cur);
m_wparams.i_start_rule = grammar_parsed.symbol_ids.at("prompt");
const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
@ -151,7 +143,7 @@ void Chess::run() {
have_prompt = true;
}
} else {
get_audio(command_ms, pcmf32_cur);
get_audio(m_settings.command_ms, pcmf32_cur);
// prepend 3 second of silence
pcmf32_cur.insert(pcmf32_cur.begin(), 3*WHISPER_SAMPLE_RATE, 0.0f);
@ -198,18 +190,15 @@ void Chess::run() {
set_status(txt);
}
if (!command.empty()) {
set_moves(m_board.processTranscription(command));
set_moves(m_board->processTranscription(command));
}
}
}
}
}
}
std::string Chess::transcribe(
std::string WChess::transcribe(
const std::vector<float> & pcmf32,
float & logprob_min,
float & logprob_sum,
@ -223,7 +212,7 @@ std::string Chess::transcribe(
t_ms = 0;
if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
return "";
return {};
}
std::string result;

View File

@ -1,39 +1,59 @@
#pragma once
#include "Chessboard.h"
#include "whisper.h"
#include <string>
#include <vector>
#include <memory>
class Chess {
class Chessboard;
class WChess {
public:
using StatusSetter = void (*)(const std::string & status);
using ISRunning = bool (*)();
using AudioGetter = void (*)(int, std::vector<float>&);
using MovesSetter = void (*)(const std::string & moves);
Chess( whisper_context * ctx,
const whisper_full_params & wparams,
StatusSetter status_setter,
ISRunning running,
AudioGetter audio,
MovesSetter moveSetter);
using SetStatusCb = void (*)(const std::string &);
using CheckRunningCb = bool (*)();
using GetAudioCb = void (*)(int, std::vector<float> &);
using SetMovesCb = void (*)(const std::string &);
struct callbacks {
SetStatusCb set_status = nullptr;
CheckRunningCb check_running = nullptr;
GetAudioCb get_audio = nullptr;
SetMovesCb set_moves = nullptr;
};
struct settings {
int32_t vad_ms = 2000;
int32_t prompt_ms = 5000;
int32_t command_ms = 4000;
float vad_thold = 0.1f;
float freq_thold = -1.0f;
bool print_energy = false;
};
WChess(
whisper_context * ctx,
const whisper_full_params & wparams,
callbacks cb,
settings s
);
~WChess();
void run();
std::string stringifyBoard();
std::string stringify_board() const;
private:
void get_audio(int ms, std::vector<float>& pcmf32);
void set_status(const char* msg);
void set_moves(const std::string& moves);
bool check_running();
void get_audio(int ms, std::vector<float>& pcmf32) const;
void set_status(const std::string& msg) const;
void set_moves(const std::string& moves) const;
bool check_running() const;
std::string transcribe(
const std::vector<float> & pcmf32,
float & logprob_min,
float & logprob_sum,
int & n_tokens,
int64_t & t_ms);
whisper_context * m_ctx;
whisper_full_params m_wparams;
StatusSetter m_status_setter;
ISRunning m_running;
AudioGetter m_audio;
MovesSetter m_moveSetter;
Chessboard m_board;
const callbacks m_cb;
const settings m_settings;
std::unique_ptr<Chessboard> m_board;
};

View File

@ -4,5 +4,5 @@ if (WHISPER_SDL2)
include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE libwchess common common-sdl ${CMAKE_THREAD_LIBS_INIT})
target_link_libraries(${TARGET} PRIVATE libwchess common-sdl ${CMAKE_THREAD_LIBS_INIT})
endif ()

View File

@ -1,30 +1,15 @@
// Voice assistant example
// Command line voice assisted chess
//
// Speak short text commands to the microphone.
// This program will detect your voice command and convert them to text.
// Speak chess move commands to the microphone.
// The moves will translated to chessboard positions.
//
// ref: https://github.com/ggerganov/whisper.cpp/issues/171
//
#include "common-sdl.h"
#include "common.h"
#include "WChess.h"
#include "common-sdl.h"
#include <sstream>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <mutex>
#include <regex>
#include <string>
#include <memory>
#include <thread>
#include <vector>
#include <map>
bool file_exists(const std::string & fname) {
std::ifstream f(fname.c_str());
return f.good();
}
// command-line parameters
struct whisper_params {
@ -81,7 +66,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n", params.commands.c_str());
fprintf(stderr, " -p, --prompt [%-7s] the required activation prompt\n", params.prompt.c_str());
fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str());
fprintf(stderr, " --grammar GRAMMAR [%-7s] GBNF grammar to guide decoding\n", params.grammar.c_str());
fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty);
fprintf(stderr, "\n");
}
@ -124,10 +108,9 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
return true;
}
std::unique_ptr<Chess> g_chess;
void set_moves(const std::string & /* moves */) {
fprintf(stdout, "%s", g_chess->stringifyBoard().c_str());
std::unique_ptr<WChess> g_wchess;
void set_moves(const std::string & moves) {
if (!moves.empty()) fprintf(stdout, "%s", g_wchess->stringify_board().c_str());
}
audio_async g_audio(30*1000);
@ -135,10 +118,6 @@ void get_audio(int ms, std::vector<float> & pcmf32_cur) {
g_audio.get(ms, pcmf32_cur);
}
bool check_running() {
return sdl_poll_events();
}
int main(int argc, char ** argv) {
whisper_params params;
@ -189,15 +168,30 @@ int main(int argc, char ** argv) {
wparams.beam_search.beam_size = 5;
wparams.initial_prompt = params.context.data();
g_audio.resume();
// wait for 1 second to avoid any buffered noise
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
g_audio.clear();
g_chess.reset(new Chess(ctx, wparams, nullptr, sdl_poll_events, get_audio, set_moves));
set_moves({});
g_chess->run();
WChess::callbacks cb;
cb.check_running = sdl_poll_events;
cb.get_audio = get_audio;
cb.set_moves = set_moves;
WChess::settings s;
s.vad_ms = 2000;
s.prompt_ms = params.prompt_ms;
s.command_ms = params.command_ms;
s.vad_thold = params.vad_thold;
s.freq_thold = params.freq_thold;
s.print_energy = params.print_energy;
g_wchess.reset(new WChess(ctx, wparams, cb, s));
set_moves("start");
g_wchess->run();
g_audio.pause();

View File

@ -1,7 +1,7 @@
<!doctype html>
<html lang="en-us">
<head>
<title>command : Voice assistant example using Whisper + WebAssembly</title>
<title>wchess : Voice assistant example using Whisper + WebAssembly</title>
<style>
#output {
@ -28,7 +28,7 @@
</head>
<body onload="loadWhisper()">
<div id="main-container">
<b>command : Voice assistant example using Whisper + WebAssembly</b>
<b>wchess : Voice assistant example using Whisper + WebAssembly</b>
<br><br>
@ -56,6 +56,14 @@
-->
</div>
<br>
<div id="myBoard" style="width: 400px"></div>
<script src="js/jquery-3.7.1.min.js"></script>
<script src="js/chessboard-1.0.0.min.js"></script>
<script>
var board = Chessboard('myBoard', 'start')
</script>
<br>
<div id="input">
@ -72,14 +80,6 @@
<pre id="state-moves">[The moves will be displayed here]</pre>
</div>
<br><br>
<div id="myBoard" style="width: 400px"></div>
<script src="js/jquery-3.7.1.min.js"></script>
<script src="js/chessboard-1.0.0.min.js"></script>
<script>
var board = Chessboard('myBoard', 'start')
</script>
<hr>
Debug output:

View File

@ -1,19 +1,8 @@
#include "ggml.h"
#include "common.h"
#include <emscripten.h>
#include <WChess.h>
#include <emscripten/bind.h>
#include <WChess.h>
#include <atomic>
#include <cmath>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include <regex>
constexpr int N_THREAD = 8;
@ -59,6 +48,45 @@ bool check_running() {
return g_running;
}
void wchess_main(size_t i) {
struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
wparams.offset_ms = 0;
wparams.translate = false;
wparams.no_context = true;
wparams.single_segment = true;
wparams.print_realtime = false;
wparams.print_progress = false;
wparams.print_timestamps = true;
wparams.print_special = false;
wparams.max_tokens = 32;
// wparams.audio_ctx = 768; // partial encoder context for better performance
wparams.temperature = 0.4f;
wparams.temperature_inc = 1.0f;
wparams.greedy.best_of = 1;
wparams.beam_search.beam_size = 5;
wparams.language = "en";
printf("command: using %d threads\n", wparams.n_threads);
WChess::callbacks cb;
cb.set_status = set_status;
cb.check_running = check_running;
cb.get_audio = get_audio;
cb.set_moves = set_moves;
WChess(g_contexts[i], wparams, cb, {}).run();
if (i < g_contexts.size()) {
whisper_free(g_contexts[i]);
g_contexts[i] = nullptr;
}
}
EMSCRIPTEN_BINDINGS(command) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) {
@ -70,44 +98,7 @@ EMSCRIPTEN_BINDINGS(command) {
g_worker.join();
}
g_worker = std::thread([i]() {
struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
wparams.offset_ms = 0;
wparams.translate = false;
wparams.no_context = true;
wparams.single_segment = true;
wparams.print_realtime = false;
wparams.print_progress = false;
wparams.print_timestamps = true;
wparams.print_special = false;
wparams.max_tokens = 32;
// wparams.audio_ctx = 768; // partial encoder context for better performance
wparams.temperature = 0.4f;
wparams.temperature_inc = 1.0f;
wparams.greedy.best_of = 1;
wparams.beam_search.beam_size = 5;
wparams.language = "en";
printf("command: using %d threads\n", wparams.n_threads);
Chess(g_contexts[i],
wparams,
set_status,
check_running,
get_audio,
set_moves).run();
if (i < g_contexts.size()) {
whisper_free(g_contexts[i]);
g_contexts[i] = nullptr;
}
wchess_main(i);
});
return i + 1;