mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-20 09:01:42 +00:00
wchess: tidy up entry files
This commit is contained in:
parent
f07ff2aa6a
commit
a44b21bce0
@ -3,14 +3,15 @@ add_library(libwchess
|
||||
WChess.h
|
||||
Chessboard.cpp
|
||||
Chessboard.h
|
||||
)
|
||||
)
|
||||
|
||||
target_link_libraries(libwchess
|
||||
PUBLIC
|
||||
whisper
|
||||
common
|
||||
)
|
||||
|
||||
target_include_directories(libwchess
|
||||
PUBLIC
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
|
||||
)
|
||||
)
|
||||
|
@ -1,49 +1,48 @@
|
||||
#include "WChess.h"
|
||||
#include "Chessboard.h"
|
||||
#include "grammar-parser.h"
|
||||
#include "common.h"
|
||||
#include <thread>
|
||||
|
||||
Chess::Chess(whisper_context * ctx,
|
||||
WChess::WChess(whisper_context * ctx,
|
||||
const whisper_full_params & wparams,
|
||||
StatusSetter status_setter,
|
||||
ISRunning running,
|
||||
AudioGetter audio,
|
||||
MovesSetter m_moveSetter)
|
||||
callbacks cb,
|
||||
settings s)
|
||||
: m_ctx(ctx)
|
||||
, m_wparams(wparams)
|
||||
, m_status_setter(status_setter)
|
||||
, m_running(running)
|
||||
, m_audio(audio)
|
||||
, m_moveSetter( m_moveSetter)
|
||||
, m_cb(cb)
|
||||
, m_settings(s)
|
||||
, m_board(new Chessboard())
|
||||
{}
|
||||
|
||||
void Chess::set_status(const char * msg) {
|
||||
if (m_status_setter) (*m_status_setter)(msg);
|
||||
WChess::~WChess() = default;
|
||||
|
||||
void WChess::set_status(const std::string& msg) const {
|
||||
if (m_cb.set_status) (*m_cb.set_status)(msg);
|
||||
}
|
||||
|
||||
void Chess::set_moves(const std::string& moves) {
|
||||
if (m_moveSetter) (*m_moveSetter)(moves);
|
||||
void WChess::set_moves(const std::string& moves) const {
|
||||
if (m_cb.set_moves) (*m_cb.set_moves)(moves);
|
||||
}
|
||||
|
||||
bool Chess::check_running() {
|
||||
if (m_running) return (*m_running)();
|
||||
bool WChess::check_running() const {
|
||||
if (m_cb.check_running) return (*m_cb.check_running)();
|
||||
return false;
|
||||
}
|
||||
|
||||
void Chess::get_audio(int ms, std::vector<float>& pcmf32) {
|
||||
if (m_audio) (*m_audio)(ms, pcmf32);
|
||||
void WChess::get_audio(int ms, std::vector<float>& pcmf32) const {
|
||||
if (m_cb.get_audio) (*m_cb.get_audio)(ms, pcmf32);
|
||||
}
|
||||
|
||||
std::string Chess::stringifyBoard() {
|
||||
return m_board.stringifyBoard();
|
||||
std::string WChess::stringify_board() const {
|
||||
return m_board->stringifyBoard();
|
||||
}
|
||||
|
||||
void Chess::run() {
|
||||
void WChess::run() {
|
||||
set_status("loading data ...");
|
||||
|
||||
bool have_prompt = false;
|
||||
bool ask_prompt = true;
|
||||
bool print_energy = false;
|
||||
|
||||
float logprob_min0 = 0.0f;
|
||||
float logprob_min = 0.0f;
|
||||
@ -87,13 +86,6 @@ void Chess::run() {
|
||||
m_wparams.grammar_penalty = 100.0;
|
||||
}
|
||||
|
||||
const int32_t vad_ms = 2000;
|
||||
const int32_t prompt_ms = 5000;
|
||||
const int32_t command_ms = 4000;
|
||||
|
||||
const float vad_thold = 0.1f;
|
||||
const float freq_thold = -1.0f;
|
||||
|
||||
while (check_running()) {
|
||||
// delay
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
@ -115,14 +107,14 @@ void Chess::run() {
|
||||
int64_t t_ms = 0;
|
||||
|
||||
{
|
||||
get_audio(vad_ms, pcmf32_cur);
|
||||
get_audio(m_settings.vad_ms, pcmf32_cur);
|
||||
|
||||
if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) {
|
||||
if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, m_settings.vad_thold, m_settings.freq_thold, m_settings.print_energy)) {
|
||||
fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
|
||||
set_status("Speech detected! Processing ...");
|
||||
|
||||
if (!have_prompt) {
|
||||
get_audio(prompt_ms, pcmf32_cur);
|
||||
get_audio(m_settings.prompt_ms, pcmf32_cur);
|
||||
|
||||
m_wparams.i_start_rule = grammar_parsed.symbol_ids.at("prompt");
|
||||
const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
|
||||
@ -151,7 +143,7 @@ void Chess::run() {
|
||||
have_prompt = true;
|
||||
}
|
||||
} else {
|
||||
get_audio(command_ms, pcmf32_cur);
|
||||
get_audio(m_settings.command_ms, pcmf32_cur);
|
||||
|
||||
// prepend 3 second of silence
|
||||
pcmf32_cur.insert(pcmf32_cur.begin(), 3*WHISPER_SAMPLE_RATE, 0.0f);
|
||||
@ -198,18 +190,15 @@ void Chess::run() {
|
||||
set_status(txt);
|
||||
}
|
||||
if (!command.empty()) {
|
||||
set_moves(m_board.processTranscription(command));
|
||||
set_moves(m_board->processTranscription(command));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string Chess::transcribe(
|
||||
std::string WChess::transcribe(
|
||||
const std::vector<float> & pcmf32,
|
||||
float & logprob_min,
|
||||
float & logprob_sum,
|
||||
@ -223,7 +212,7 @@ std::string Chess::transcribe(
|
||||
t_ms = 0;
|
||||
|
||||
if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
||||
return "";
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
@ -1,39 +1,59 @@
|
||||
#pragma once
|
||||
#include "Chessboard.h"
|
||||
#include "whisper.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
class Chess {
|
||||
class Chessboard;
|
||||
|
||||
class WChess {
|
||||
public:
|
||||
using StatusSetter = void (*)(const std::string & status);
|
||||
using ISRunning = bool (*)();
|
||||
using AudioGetter = void (*)(int, std::vector<float>&);
|
||||
using MovesSetter = void (*)(const std::string & moves);
|
||||
Chess( whisper_context * ctx,
|
||||
const whisper_full_params & wparams,
|
||||
StatusSetter status_setter,
|
||||
ISRunning running,
|
||||
AudioGetter audio,
|
||||
MovesSetter moveSetter);
|
||||
using SetStatusCb = void (*)(const std::string &);
|
||||
using CheckRunningCb = bool (*)();
|
||||
using GetAudioCb = void (*)(int, std::vector<float> &);
|
||||
using SetMovesCb = void (*)(const std::string &);
|
||||
|
||||
struct callbacks {
|
||||
SetStatusCb set_status = nullptr;
|
||||
CheckRunningCb check_running = nullptr;
|
||||
GetAudioCb get_audio = nullptr;
|
||||
SetMovesCb set_moves = nullptr;
|
||||
};
|
||||
|
||||
struct settings {
|
||||
int32_t vad_ms = 2000;
|
||||
int32_t prompt_ms = 5000;
|
||||
int32_t command_ms = 4000;
|
||||
float vad_thold = 0.1f;
|
||||
float freq_thold = -1.0f;
|
||||
bool print_energy = false;
|
||||
};
|
||||
|
||||
WChess(
|
||||
whisper_context * ctx,
|
||||
const whisper_full_params & wparams,
|
||||
callbacks cb,
|
||||
settings s
|
||||
);
|
||||
~WChess();
|
||||
|
||||
void run();
|
||||
std::string stringifyBoard();
|
||||
std::string stringify_board() const;
|
||||
private:
|
||||
void get_audio(int ms, std::vector<float>& pcmf32);
|
||||
void set_status(const char* msg);
|
||||
void set_moves(const std::string& moves);
|
||||
bool check_running();
|
||||
void get_audio(int ms, std::vector<float>& pcmf32) const;
|
||||
void set_status(const std::string& msg) const;
|
||||
void set_moves(const std::string& moves) const;
|
||||
bool check_running() const;
|
||||
std::string transcribe(
|
||||
const std::vector<float> & pcmf32,
|
||||
float & logprob_min,
|
||||
float & logprob_sum,
|
||||
int & n_tokens,
|
||||
int64_t & t_ms);
|
||||
|
||||
whisper_context * m_ctx;
|
||||
whisper_full_params m_wparams;
|
||||
StatusSetter m_status_setter;
|
||||
ISRunning m_running;
|
||||
AudioGetter m_audio;
|
||||
MovesSetter m_moveSetter;
|
||||
Chessboard m_board;
|
||||
const callbacks m_cb;
|
||||
const settings m_settings;
|
||||
std::unique_ptr<Chessboard> m_board;
|
||||
};
|
||||
|
@ -4,5 +4,5 @@ if (WHISPER_SDL2)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE libwchess common common-sdl ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_link_libraries(${TARGET} PRIVATE libwchess common-sdl ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif ()
|
@ -1,30 +1,15 @@
|
||||
// Voice assistant example
|
||||
// Command line voice assisted chess
|
||||
//
|
||||
// Speak short text commands to the microphone.
|
||||
// This program will detect your voice command and convert them to text.
|
||||
// Speak chess move commands to the microphone.
|
||||
// The moves will translated to chessboard positions.
|
||||
//
|
||||
// ref: https://github.com/ggerganov/whisper.cpp/issues/171
|
||||
//
|
||||
|
||||
#include "common-sdl.h"
|
||||
#include "common.h"
|
||||
#include "WChess.h"
|
||||
#include "common-sdl.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
bool file_exists(const std::string & fname) {
|
||||
std::ifstream f(fname.c_str());
|
||||
return f.good();
|
||||
}
|
||||
|
||||
// command-line parameters
|
||||
struct whisper_params {
|
||||
@ -81,7 +66,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n", params.commands.c_str());
|
||||
fprintf(stderr, " -p, --prompt [%-7s] the required activation prompt\n", params.prompt.c_str());
|
||||
fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str());
|
||||
fprintf(stderr, " --grammar GRAMMAR [%-7s] GBNF grammar to guide decoding\n", params.grammar.c_str());
|
||||
fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
@ -124,10 +108,9 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<Chess> g_chess;
|
||||
void set_moves(const std::string & /* moves */) {
|
||||
fprintf(stdout, "%s", g_chess->stringifyBoard().c_str());
|
||||
std::unique_ptr<WChess> g_wchess;
|
||||
void set_moves(const std::string & moves) {
|
||||
if (!moves.empty()) fprintf(stdout, "%s", g_wchess->stringify_board().c_str());
|
||||
}
|
||||
|
||||
audio_async g_audio(30*1000);
|
||||
@ -135,10 +118,6 @@ void get_audio(int ms, std::vector<float> & pcmf32_cur) {
|
||||
g_audio.get(ms, pcmf32_cur);
|
||||
}
|
||||
|
||||
bool check_running() {
|
||||
return sdl_poll_events();
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
whisper_params params;
|
||||
|
||||
@ -189,15 +168,30 @@ int main(int argc, char ** argv) {
|
||||
|
||||
wparams.beam_search.beam_size = 5;
|
||||
|
||||
wparams.initial_prompt = params.context.data();
|
||||
|
||||
g_audio.resume();
|
||||
|
||||
// wait for 1 second to avoid any buffered noise
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
g_audio.clear();
|
||||
|
||||
g_chess.reset(new Chess(ctx, wparams, nullptr, sdl_poll_events, get_audio, set_moves));
|
||||
set_moves({});
|
||||
g_chess->run();
|
||||
WChess::callbacks cb;
|
||||
cb.check_running = sdl_poll_events;
|
||||
cb.get_audio = get_audio;
|
||||
cb.set_moves = set_moves;
|
||||
|
||||
WChess::settings s;
|
||||
s.vad_ms = 2000;
|
||||
s.prompt_ms = params.prompt_ms;
|
||||
s.command_ms = params.command_ms;
|
||||
s.vad_thold = params.vad_thold;
|
||||
s.freq_thold = params.freq_thold;
|
||||
s.print_energy = params.print_energy;
|
||||
|
||||
g_wchess.reset(new WChess(ctx, wparams, cb, s));
|
||||
set_moves("start");
|
||||
g_wchess->run();
|
||||
|
||||
g_audio.pause();
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
<!doctype html>
|
||||
<html lang="en-us">
|
||||
<head>
|
||||
<title>command : Voice assistant example using Whisper + WebAssembly</title>
|
||||
<title>wchess : Voice assistant example using Whisper + WebAssembly</title>
|
||||
|
||||
<style>
|
||||
#output {
|
||||
@ -28,7 +28,7 @@
|
||||
</head>
|
||||
<body onload="loadWhisper()">
|
||||
<div id="main-container">
|
||||
<b>command : Voice assistant example using Whisper + WebAssembly</b>
|
||||
<b>wchess : Voice assistant example using Whisper + WebAssembly</b>
|
||||
|
||||
<br><br>
|
||||
|
||||
@ -56,6 +56,14 @@
|
||||
-->
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<div id="myBoard" style="width: 400px"></div>
|
||||
<script src="js/jquery-3.7.1.min.js"></script>
|
||||
<script src="js/chessboard-1.0.0.min.js"></script>
|
||||
<script>
|
||||
var board = Chessboard('myBoard', 'start')
|
||||
</script>
|
||||
|
||||
<br>
|
||||
|
||||
<div id="input">
|
||||
@ -72,14 +80,6 @@
|
||||
<pre id="state-moves">[The moves will be displayed here]</pre>
|
||||
</div>
|
||||
|
||||
<br><br>
|
||||
<div id="myBoard" style="width: 400px"></div>
|
||||
<script src="js/jquery-3.7.1.min.js"></script>
|
||||
<script src="js/chessboard-1.0.0.min.js"></script>
|
||||
<script>
|
||||
var board = Chessboard('myBoard', 'start')
|
||||
</script>
|
||||
|
||||
<hr>
|
||||
|
||||
Debug output:
|
||||
|
@ -1,19 +1,8 @@
|
||||
#include "ggml.h"
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#include <emscripten.h>
|
||||
#include <WChess.h>
|
||||
#include <emscripten/bind.h>
|
||||
|
||||
#include <WChess.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <regex>
|
||||
|
||||
constexpr int N_THREAD = 8;
|
||||
|
||||
@ -59,6 +48,45 @@ bool check_running() {
|
||||
return g_running;
|
||||
}
|
||||
|
||||
void wchess_main(size_t i) {
|
||||
struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
|
||||
wparams.offset_ms = 0;
|
||||
wparams.translate = false;
|
||||
wparams.no_context = true;
|
||||
wparams.single_segment = true;
|
||||
wparams.print_realtime = false;
|
||||
wparams.print_progress = false;
|
||||
wparams.print_timestamps = true;
|
||||
wparams.print_special = false;
|
||||
|
||||
wparams.max_tokens = 32;
|
||||
// wparams.audio_ctx = 768; // partial encoder context for better performance
|
||||
|
||||
wparams.temperature = 0.4f;
|
||||
wparams.temperature_inc = 1.0f;
|
||||
wparams.greedy.best_of = 1;
|
||||
|
||||
wparams.beam_search.beam_size = 5;
|
||||
|
||||
wparams.language = "en";
|
||||
|
||||
printf("command: using %d threads\n", wparams.n_threads);
|
||||
|
||||
WChess::callbacks cb;
|
||||
cb.set_status = set_status;
|
||||
cb.check_running = check_running;
|
||||
cb.get_audio = get_audio;
|
||||
cb.set_moves = set_moves;
|
||||
|
||||
WChess(g_contexts[i], wparams, cb, {}).run();
|
||||
if (i < g_contexts.size()) {
|
||||
whisper_free(g_contexts[i]);
|
||||
g_contexts[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
EMSCRIPTEN_BINDINGS(command) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||
@ -70,44 +98,7 @@ EMSCRIPTEN_BINDINGS(command) {
|
||||
g_worker.join();
|
||||
}
|
||||
g_worker = std::thread([i]() {
|
||||
|
||||
struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
|
||||
wparams.offset_ms = 0;
|
||||
wparams.translate = false;
|
||||
wparams.no_context = true;
|
||||
wparams.single_segment = true;
|
||||
wparams.print_realtime = false;
|
||||
wparams.print_progress = false;
|
||||
wparams.print_timestamps = true;
|
||||
wparams.print_special = false;
|
||||
|
||||
wparams.max_tokens = 32;
|
||||
// wparams.audio_ctx = 768; // partial encoder context for better performance
|
||||
|
||||
wparams.temperature = 0.4f;
|
||||
wparams.temperature_inc = 1.0f;
|
||||
wparams.greedy.best_of = 1;
|
||||
|
||||
wparams.beam_search.beam_size = 5;
|
||||
|
||||
wparams.language = "en";
|
||||
|
||||
printf("command: using %d threads\n", wparams.n_threads);
|
||||
|
||||
Chess(g_contexts[i],
|
||||
wparams,
|
||||
set_status,
|
||||
check_running,
|
||||
get_audio,
|
||||
set_moves).run();
|
||||
|
||||
if (i < g_contexts.size()) {
|
||||
whisper_free(g_contexts[i]);
|
||||
g_contexts[i] = nullptr;
|
||||
}
|
||||
|
||||
wchess_main(i);
|
||||
});
|
||||
|
||||
return i + 1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user