chess : tuning performance

wchess: c++17 -> c++11
wchess: off/on prompt
2025-06-26 18:03:21 +00:00 · 2023-11-30 10:50:47 +02:00 · 2023-11-30 08:37:54 +02:00 · 2023-11-30 01:17:29 +02:00 · 2023-11-29 19:30:57 +02:00 · 2023-11-29 18:53:28 +02:00
53 changed files with 38799 additions and 305 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -320,6 +320,13 @@ jobs:
          cd ./build
          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
      - name: Copy CUDA DLLs
        run: >
          Copy-Item -PassThru
          -Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
          -Include cudart64_*,cublas64_*,cublasLt64_*
          -Destination build/bin/${{ matrix.build }}
      - name: Copy SDL2.dll
        if: matrix.sdl2 == 'ON'
        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
--- a/.gitignore
+++ b/.gitignore
@ -31,6 +31,7 @@ build-sanitize-thread/
 /talk-llama
 /bench
 /quantize
 /server
 /lsp
 arm_neon.h
--- a/7
+++ b/7
@ -1,4 +1,4 @@
-default: main bench quantize
+default: main bench quantize server
 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
@ -338,7 +338,7 @@ libwhisper.so: $(WHISPER_OBJ)
 	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)
 clean:
-	rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
+	rm -f *.o main stream command talk talk-llama bench quantize server lsp libwhisper.a libwhisper.so
 #
 # Examples
@ -359,6 +359,9 @@ bench: examples/bench/bench.cpp $(WHISPER_OBJ)
 quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
 	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
 server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
 	$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
 stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
 	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
--- a/bindings/ios
+++ b/bindings/ios
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -65,6 +65,7 @@ elseif(CMAKE_JS_VERSION)
 else()
    add_subdirectory(main)
    add_subdirectory(stream)
    add_subdirectory(server)
    add_subdirectory(command)
    add_subdirectory(bench)
    add_subdirectory(quantize)
@ -72,3 +73,5 @@ else()
    add_subdirectory(talk-llama)
    add_subdirectory(lsp)
 endif()
 add_subdirectory(wchess)
--- a/examples/common-sdl.cpp
+++ b/examples/common-sdl.cpp
@ -139,10 +139,13 @@ void audio_async::callback(uint8_t * stream, int len) {
        return;
    }
-    const size_t n_samples = len / sizeof(float);
+    size_t n_samples = len / sizeof(float);
-    m_audio_new.resize(n_samples);
+    if (n_samples > m_audio.size()) {
-    memcpy(m_audio_new.data(), stream, n_samples * sizeof(float));
+        n_samples = m_audio.size();
        stream += (len - (n_samples * sizeof(float)));
    }
    //fprintf(stderr, "%s: %zu samples, pos %zu, len %zu\n", __func__, n_samples, m_audio_pos, m_audio_len);
@ -153,7 +156,7 @@ void audio_async::callback(uint8_t * stream, int len) {
            const size_t n0 = m_audio.size() - m_audio_pos;
            memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
-            memcpy(&m_audio[0], &stream[n0], (n_samples - n0) * sizeof(float));
+            memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
            m_audio_len = m_audio.size();
--- a/examples/common-sdl.h
+++ b/examples/common-sdl.h
@ -41,7 +41,6 @@ private:
    std::mutex       m_mutex;
    std::vector<float> m_audio;
    std::vector<float> m_audio_new;
    size_t             m_audio_pos = 0;
    size_t             m_audio_len = 0;
 };
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@ -0,0 +1,6 @@
 set(TARGET server)
 add_executable(${TARGET} server.cpp httplib.h json.hpp)
 include(DefaultTargetOptions)
 target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -0,0 +1,59 @@
 # whisper.cpp http server
 Simple http server. WAV Files are passed to the inference model via http requests.
 ```
 ./server -h
 usage: ./bin/server [options]
 options:
  -h,        --help              [default] show this help message and exit
  -t N,      --threads N         [4      ] number of threads to use during computation
  -p N,      --processors N      [1      ] number of processors to use during computation
  -ot N,     --offset-t N        [0      ] time offset in milliseconds
  -on N,     --offset-n N        [0      ] segment index offset
  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
  -ml N,     --max-len N         [0      ] maximum segment length in characters
  -sow,      --split-on-word     [false  ] split on word rather than on token
  -bo N,     --best-of N         [2      ] number of best candidates to keep
  -bs N,     --beam-size N       [-1     ] beam size for beam search
  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
  -debug,    --debug-mode        [false  ] enable debug mode (eg. dump log_mel)
  -tr,       --translate         [false  ] translate from source language to english
  -di,       --diarize           [false  ] stereo audio diarization
  -tdrz,     --tinydiarize       [false  ] enable tinydiarize (requires a tdrz model)
  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
  -pp,       --print-progress    [false  ] print progress
  -nt,       --no-timestamps     [false  ] do not print timestamps
  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
  -dl,       --detect-language   [false  ] exit after automatically detecting language
             --prompt PROMPT     [       ] initial prompt
  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
  --host HOST,                   [127.0.0.1] Hostname/ip-adress for the server
  --port PORT,                   [8080   ] Port number for the server
 ```
 ## request examples
 **/inference**
 ```
 curl 127.0.0.1:8080/inference \
 -H "Content-Type: multipart/form-data" \
 -F file="@<file-path>" \
 -F temperature="0.2" \
 -F response-format="json"
 ```
 **/load**
 ```
 curl 127.0.0.1:8080/load \
 -H "Content-Type: multipart/form-data" \
 -F model="<path-to-model-file>"
 ```
--- a/examples/server/httplib.h
+++ b/examples/server/httplib.h
--- a/examples/server/json.hpp
+++ b/examples/server/json.hpp
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -0,0 +1,699 @@
 #include "common.h"
 #include "whisper.h"
 #include "httplib.h"
 #include "json.hpp"
 #include <cmath>
 #include <fstream>
 #include <cstdio>
 #include <string>
 #include <thread>
 #include <vector>
 #include <cstring>
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 using namespace httplib;
 using json = nlohmann::json;
 namespace {
 // Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
 // Lowest is red, middle is yellow, highest is green.
 const std::vector<std::string> k_colors = {
    "\033[38;5;196m", "\033[38;5;202m", "\033[38;5;208m", "\033[38;5;214m", "\033[38;5;220m",
    "\033[38;5;226m", "\033[38;5;190m", "\033[38;5;154m", "\033[38;5;118m", "\033[38;5;82m",
 };
 // output formats
 const std::string json_format   = "json";
 const std::string text_format   = "text";
 const std::string srt_format    = "srt";
 const std::string vjson_format  = "verbose_json";
 const std::string vtt_format    = "vtt";
 struct server_params
 {
    std::string hostname = "127.0.0.1";
    std::string public_path = "examples/server/public";
    int32_t port          = 8080;
    int32_t read_timeout  = 600;
    int32_t write_timeout = 600;
 };
 struct whisper_params {
    int32_t n_threads    = std::min(4, (int32_t) std::thread::hardware_concurrency());
    int32_t n_processors =  1;
    int32_t offset_t_ms  =  0;
    int32_t offset_n     =  0;
    int32_t duration_ms  =  0;
    int32_t progress_step =  5;
    int32_t max_context  = -1;
    int32_t max_len      =  0;
    int32_t best_of      =  2;
    int32_t beam_size    = -1;
    float word_thold    =  0.01f;
    float entropy_thold =  2.40f;
    float logprob_thold = -1.00f;
    float userdef_temp  =  0.20f;
    bool speed_up        = false;
    bool debug_mode      = false;
    bool translate       = false;
    bool detect_language = false;
    bool diarize         = false;
    bool tinydiarize     = false;
    bool split_on_word   = false;
    bool no_fallback     = false;
    bool print_special   = false;
    bool print_colors    = false;
    bool print_progress  = false;
    bool no_timestamps   = false;
    bool use_gpu         = true;
    std::string language        = "en";
    std::string prompt          = "";
    std::string font_path       = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
    std::string model           = "models/ggml-base.en.bin";
    std::string response_format     = json_format;
    // [TDRZ] speaker turn string
    std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
    std::string openvino_encode_device = "CPU";
 };
 //  500 -> 00:05.000
 // 6000 -> 01:00.000
 std::string to_timestamp(int64_t t, bool comma = false) {
    int64_t msec = t * 10;
    int64_t hr = msec / (1000 * 60 * 60);
    msec = msec - hr * (1000 * 60 * 60);
    int64_t min = msec / (1000 * 60);
    msec = msec - min * (1000 * 60);
    int64_t sec = msec / 1000;
    msec = msec - sec * 1000;
    char buf[32];
    snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
    return std::string(buf);
 }
 int timestamp_to_sample(int64_t t, int n_samples) {
    return std::max(0, std::min((int) n_samples - 1, (int) ((t*WHISPER_SAMPLE_RATE)/100)));
 }
 bool is_file_exist(const char *fileName)
 {
    std::ifstream infile(fileName);
    return infile.good();
 }
 void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params,
                         const server_params& sparams) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options] \n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, "options:\n");
    fprintf(stderr, "  -h,        --help              [default] show this help message and exit\n");
    fprintf(stderr, "  -t N,      --threads N         [%-7d] number of threads to use during computation\n",    params.n_threads);
    fprintf(stderr, "  -p N,      --processors N      [%-7d] number of processors to use during computation\n", params.n_processors);
    fprintf(stderr, "  -ot N,     --offset-t N        [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
    fprintf(stderr, "  -on N,     --offset-n N        [%-7d] segment index offset\n",                           params.offset_n);
    fprintf(stderr, "  -d  N,     --duration N        [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
    fprintf(stderr, "  -mc N,     --max-context N     [%-7d] maximum number of text context tokens to store\n", params.max_context);
    fprintf(stderr, "  -ml N,     --max-len N         [%-7d] maximum segment length in characters\n",           params.max_len);
    fprintf(stderr, "  -sow,      --split-on-word     [%-7s] split on word rather than on token\n",             params.split_on_word ? "true" : "false");
    fprintf(stderr, "  -bo N,     --best-of N         [%-7d] number of best candidates to keep\n",              params.best_of);
    fprintf(stderr, "  -bs N,     --beam-size N       [%-7d] beam size for beam search\n",                      params.beam_size);
    fprintf(stderr, "  -wt N,     --word-thold N      [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
    fprintf(stderr, "  -et N,     --entropy-thold N   [%-7.2f] entropy threshold for decoder fail\n",           params.entropy_thold);
    fprintf(stderr, "  -lpt N,    --logprob-thold N   [%-7.2f] log probability threshold for decoder fail\n",   params.logprob_thold);
    // fprintf(stderr, "  -su,       --speed-up          [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
    fprintf(stderr, "  -debug,    --debug-mode        [%-7s] enable debug mode (eg. dump log_mel)\n",           params.debug_mode ? "true" : "false");
    fprintf(stderr, "  -tr,       --translate         [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
    fprintf(stderr, "  -di,       --diarize           [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
    fprintf(stderr, "  -tdrz,     --tinydiarize       [%-7s] enable tinydiarize (requires a tdrz model)\n",     params.tinydiarize ? "true" : "false");
    fprintf(stderr, "  -nf,       --no-fallback       [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
    fprintf(stderr, "  -ps,       --print-special     [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
    fprintf(stderr, "  -pc,       --print-colors      [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
    fprintf(stderr, "  -pp,       --print-progress    [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
    fprintf(stderr, "  -nt,       --no-timestamps     [%-7s] do not print timestamps\n",                        params.no_timestamps ? "true" : "false");
    fprintf(stderr, "  -l LANG,   --language LANG     [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
    fprintf(stderr, "  -dl,       --detect-language   [%-7s] exit after automatically detecting language\n",    params.detect_language ? "true" : "false");
    fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
    fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
    // server params
    fprintf(stderr, "  --host HOST,                   [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
    fprintf(stderr, "  --port PORT,                   [%-7d] Port number for the server\n", sparams.port);
    fprintf(stderr, "  --public PATH,                 [%-7s] Path to the public folder\n", sparams.public_path.c_str());
    fprintf(stderr, "\n");
 }
 bool whisper_params_parse(int argc, char ** argv, whisper_params & params, server_params & sparams) {
    for (int i = 1; i < argc; i++) {
        std::string arg = argv[i];
        if (arg == "-h" || arg == "--help") {
            whisper_print_usage(argc, argv, params, sparams);
            exit(0);
        }
        else if (arg == "-t"    || arg == "--threads")         { params.n_threads       = std::stoi(argv[++i]); }
        else if (arg == "-p"    || arg == "--processors")      { params.n_processors    = std::stoi(argv[++i]); }
        else if (arg == "-ot"   || arg == "--offset-t")        { params.offset_t_ms     = std::stoi(argv[++i]); }
        else if (arg == "-on"   || arg == "--offset-n")        { params.offset_n        = std::stoi(argv[++i]); }
        else if (arg == "-d"    || arg == "--duration")        { params.duration_ms     = std::stoi(argv[++i]); }
        else if (arg == "-mc"   || arg == "--max-context")     { params.max_context     = std::stoi(argv[++i]); }
        else if (arg == "-ml"   || arg == "--max-len")         { params.max_len         = std::stoi(argv[++i]); }
        else if (arg == "-bo"   || arg == "--best-of")         { params.best_of         = std::stoi(argv[++i]); }
        else if (arg == "-bs"   || arg == "--beam-size")       { params.beam_size       = std::stoi(argv[++i]); }
        else if (arg == "-wt"   || arg == "--word-thold")      { params.word_thold      = std::stof(argv[++i]); }
        else if (arg == "-et"   || arg == "--entropy-thold")   { params.entropy_thold   = std::stof(argv[++i]); }
        else if (arg == "-lpt"  || arg == "--logprob-thold")   { params.logprob_thold   = std::stof(argv[++i]); }
        // else if (arg == "-su"   || arg == "--speed-up")        { params.speed_up        = true; }
        else if (arg == "-debug"|| arg == "--debug-mode")      { params.debug_mode      = true; }
        else if (arg == "-tr"   || arg == "--translate")       { params.translate       = true; }
        else if (arg == "-di"   || arg == "--diarize")         { params.diarize         = true; }
        else if (arg == "-tdrz" || arg == "--tinydiarize")     { params.tinydiarize     = true; }
        else if (arg == "-sow"  || arg == "--split-on-word")   { params.split_on_word   = true; }
        else if (arg == "-nf"   || arg == "--no-fallback")     { params.no_fallback     = true; }
        else if (arg == "-fp"   || arg == "--font-path")       { params.font_path       = argv[++i]; }
        else if (arg == "-ps"   || arg == "--print-special")   { params.print_special   = true; }
        else if (arg == "-pc"   || arg == "--print-colors")    { params.print_colors    = true; }
        else if (arg == "-pp"   || arg == "--print-progress")  { params.print_progress  = true; }
        else if (arg == "-nt"   || arg == "--no-timestamps")   { params.no_timestamps   = true; }
        else if (arg == "-l"    || arg == "--language")        { params.language        = argv[++i]; }
        else if (arg == "-dl"   || arg == "--detect-language") { params.detect_language = true; }
        else if (                  arg == "--prompt")          { params.prompt          = argv[++i]; }
        else if (arg == "-m"    || arg == "--model")           { params.model           = argv[++i]; }
        else if (arg == "-oved" || arg == "--ov-e-device")     { params.openvino_encode_device = argv[++i]; }
        else if (arg == "-ng"   || arg == "--no-gpu")          { params.use_gpu         = false; }
        // server params
        else if (                  arg == "--port")            { sparams.port        = std::stoi(argv[++i]); }
        else if (                  arg == "--host")            { sparams.hostname    = argv[++i]; }
        else if (                  arg == "--public")          { sparams.public_path = argv[++i]; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params, sparams);
            exit(0);
        }
    }
    return true;
 }
 struct whisper_print_user_data {
    const whisper_params * params;
    const std::vector<std::vector<float>> * pcmf32s;
    int progress_prev;
 };
 std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
    std::string speaker = "";
    const int64_t n_samples = pcmf32s[0].size();
    const int64_t is0 = timestamp_to_sample(t0, n_samples);
    const int64_t is1 = timestamp_to_sample(t1, n_samples);
    double energy0 = 0.0f;
    double energy1 = 0.0f;
    for (int64_t j = is0; j < is1; j++) {
        energy0 += fabs(pcmf32s[0][j]);
        energy1 += fabs(pcmf32s[1][j]);
    }
    if (energy0 > 1.1*energy1) {
        speaker = "0";
    } else if (energy1 > 1.1*energy0) {
        speaker = "1";
    } else {
        speaker = "?";
    }
    //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, speaker = %s\n", is0, is1, energy0, energy1, speaker.c_str());
    if (!id_only) {
        speaker.insert(0, "(speaker ");
        speaker.append(")");
    }
    return speaker;
 }
 void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
    int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
    int * progress_prev  = &(((whisper_print_user_data *) user_data)->progress_prev);
    if (progress >= *progress_prev + progress_step) {
        *progress_prev += progress_step;
        fprintf(stderr, "%s: progress = %3d%%\n", __func__, progress);
    }
 }
 void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
    const auto & params  = *((whisper_print_user_data *) user_data)->params;
    const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
    const int n_segments = whisper_full_n_segments(ctx);
    std::string speaker = "";
    int64_t t0 = 0;
    int64_t t1 = 0;
    // print the last n_new segments
    const int s0 = n_segments - n_new;
    if (s0 == 0) {
        printf("\n");
    }
    for (int i = s0; i < n_segments; i++) {
        if (!params.no_timestamps || params.diarize) {
            t0 = whisper_full_get_segment_t0(ctx, i);
            t1 = whisper_full_get_segment_t1(ctx, i);
        }
        if (!params.no_timestamps) {
            printf("[%s --> %s]  ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
        }
        if (params.diarize && pcmf32s.size() == 2) {
            speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
        }
        if (params.print_colors) {
            for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
                if (params.print_special == false) {
                    const whisper_token id = whisper_full_get_token_id(ctx, i, j);
                    if (id >= whisper_token_eot(ctx)) {
                        continue;
                    }
                }
                const char * text = whisper_full_get_token_text(ctx, i, j);
                const float  p    = whisper_full_get_token_p   (ctx, i, j);
                const int col = std::max(0, std::min((int) k_colors.size() - 1, (int) (std::pow(p, 3)*float(k_colors.size()))));
                printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
            }
        } else {
            const char * text = whisper_full_get_segment_text(ctx, i);
            printf("%s%s", speaker.c_str(), text);
        }
        if (params.tinydiarize) {
            if (whisper_full_get_segment_speaker_turn_next(ctx, i)) {
                printf("%s", params.tdrz_speaker_turn.c_str());
            }
        }
        // with timestamps or speakers: each segment on new line
        if (!params.no_timestamps || params.diarize) {
            printf("\n");
        }
        fflush(stdout);
    }
 }
 std::string output_str(struct whisper_context * ctx, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
    std::stringstream result;
    const int n_segments = whisper_full_n_segments(ctx);
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(ctx, i);
        std::string speaker = "";
        if (params.diarize && pcmf32s.size() == 2)
        {
            const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
            const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
            speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
        }
        result << speaker << text << "\n";
    }
    return result.str();
 }
 void get_req_parameters(const Request & req, whisper_params & params)
 {
    // user model configu.has_fileion
    if (req.has_file("offset-t"))
    {
        params.offset_t_ms = std::stoi(req.get_file_value("offset-t").content);
    }
    if (req.has_file("offset-n"))
    {
        params.offset_n = std::stoi(req.get_file_value("offset-n").content);
    }
    if (req.has_file("duration"))
    {
        params.duration_ms = std::stoi(req.get_file_value("duration").content);
    }
    if (req.has_file("max-context"))
    {
        params.max_context = std::stoi(req.get_file_value("max-context").content);
    }
    if (req.has_file("prompt"))
    {
        params.prompt = req.get_file_value("prompt").content;
    }
    if (req.has_file("response-format"))
    {
        params.response_format = req.get_file_value("response-format").content;
    }
    if (req.has_file("temerature"))
    {
        params.userdef_temp = std::stof(req.get_file_value("temperature").content);
    }
 }
 }  // namespace
 int main(int argc, char ** argv) {
    whisper_params params;
    server_params sparams;
    std::mutex whisper_mutex;
    if (whisper_params_parse(argc, argv, params, sparams) == false) {
        whisper_print_usage(argc, argv, params, sparams);
        return 1;
    }
    if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
        whisper_print_usage(argc, argv, params, sparams);
        exit(0);
    }
    if (params.diarize && params.tinydiarize) {
        fprintf(stderr, "error: cannot use both --diarize and --tinydiarize\n");
        whisper_print_usage(argc, argv, params, sparams);
        exit(0);
    }
    // whisper init
    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    if (ctx == nullptr) {
        fprintf(stderr, "error: failed to initialize whisper context\n");
        return 3;
    }
    // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
    whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
    Server svr;
    std::string const default_content = "<html>hello</html>";
    // this is only called if no index.html is found in the public --path
    svr.Get("/", [&default_content](const Request &, Response &res){
        res.set_content(default_content, "text/html");
        return false;
    });
    svr.Post("/inference", [&](const Request &req, Response &res){
        // aquire whisper model mutex lock
        whisper_mutex.lock();
        // first check user requested fields of the request
        if (!req.has_file("file"))
        {
            fprintf(stderr, "error: no 'file' field in the request\n");
            const std::string error_resp = "{\"error\":\"no 'file' field in the request\"}";
            res.set_content(error_resp, "application/json");
            whisper_mutex.unlock();
            return;
        }
        auto audio_file = req.get_file_value("file");
        // check non-required fields
        get_req_parameters(req, params);
        std::string filename{audio_file.filename};
        printf("Received request: %s\n", filename.c_str());
        // audio arrays
        std::vector<float> pcmf32;               // mono-channel F32 PCM
        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
        // write file to temporary file
        std::ofstream temp_file{filename, std::ios::binary};
        temp_file << audio_file.content;
        // read wav content into pcmf32
        if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) {
            fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
            const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
            res.set_content(error_resp, "application/json");
            whisper_mutex.unlock();
            return;
        }
        // remove temp file
        std::remove(filename.c_str());
        printf("Successfully loaded %s\n", filename.c_str());
        // print system information
        {
            fprintf(stderr, "\n");
            fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
                    params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
        }
        // print some info about the processing
        {
            fprintf(stderr, "\n");
            if (!whisper_is_multilingual(ctx)) {
                if (params.language != "en" || params.translate) {
                    params.language = "en";
                    params.translate = false;
                    fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
                }
            }
            if (params.detect_language) {
                params.language = "auto";
            }
            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, %stimestamps = %d ...\n",
                    __func__, filename.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
                    params.n_threads, params.n_processors,
                    params.language.c_str(),
                    params.translate ? "translate" : "transcribe",
                    params.tinydiarize ? "tdrz = 1, " : "",
                    params.no_timestamps ? 0 : 1);
            fprintf(stderr, "\n");
        }
        // run the inference
        {
            printf("Running whisper.cpp inference on %s\n", filename.c_str());
            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
            wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
            wparams.print_realtime   = false;
            wparams.print_progress   = params.print_progress;
            wparams.print_timestamps = !params.no_timestamps;
            wparams.print_special    = params.print_special;
            wparams.translate        = params.translate;
            wparams.language         = params.language.c_str();
            wparams.detect_language  = params.detect_language;
            wparams.n_threads        = params.n_threads;
            wparams.n_max_text_ctx   = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
            wparams.offset_ms        = params.offset_t_ms;
            wparams.duration_ms      = params.duration_ms;
            wparams.thold_pt         = params.word_thold;
            wparams.split_on_word    = params.split_on_word;
            wparams.speed_up         = params.speed_up;
            wparams.debug_mode       = params.debug_mode;
            wparams.tdrz_enable      = params.tinydiarize; // [TDRZ]
            wparams.initial_prompt   = params.prompt.c_str();
            wparams.greedy.best_of        = params.best_of;
            wparams.beam_search.beam_size = params.beam_size;
            wparams.temperature_inc  = params.userdef_temp;
            wparams.entropy_thold    = params.entropy_thold;
            wparams.logprob_thold    = params.logprob_thold;
            whisper_print_user_data user_data = { &params, &pcmf32s, 0 };
            // this callback is called on each new segment
            if (!wparams.print_realtime) {
                wparams.new_segment_callback           = whisper_print_segment_callback;
                wparams.new_segment_callback_user_data = &user_data;
            }
            if (wparams.print_progress) {
                wparams.progress_callback           = whisper_print_progress_callback;
                wparams.progress_callback_user_data = &user_data;
            }
            // examples for abort mechanism
            // in examples below, we do not abort the processing, but we could if the flag is set to true
            // the callback is called before every encoder run - if it returns false, the processing is aborted
            {
                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
                wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
                    bool is_aborted = *(bool*)user_data;
                    return !is_aborted;
                };
                wparams.encoder_begin_callback_user_data = &is_aborted;
            }
            // the callback is called before every computation - if it returns true, the computation is aborted
            {
                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
                wparams.abort_callback = [](void * user_data) {
                    bool is_aborted = *(bool*)user_data;
                    return is_aborted;
                };
                wparams.abort_callback_user_data = &is_aborted;
            }
            if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
                const std::string error_resp = "{\"error\":\"failed to process audio\"}";
                res.set_content(error_resp, "application/json");
                whisper_mutex.unlock();
                return;
            }
        }
        // return results to user
        if (params.response_format == text_format)
        {
            std::string results = output_str(ctx, params, pcmf32s);
            res.set_content(results.c_str(), "text/html");
        }
        // TODO add more output formats
        else
        {
            std::string results = output_str(ctx, params, pcmf32s);
            json jres = json{
                {"text", results}
            };
            res.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace),
                            "application/json");
        }
        // return whisper model mutex lock
        whisper_mutex.unlock();
    });
    svr.Post("/load", [&](const Request &req, Response &res){
        whisper_mutex.lock();
        if (!req.has_file("model"))
        {
            fprintf(stderr, "error: no 'model' field in the request\n");
            const std::string error_resp = "{\"error\":\"no 'model' field in the request\"}";
            res.set_content(error_resp, "application/json");
            whisper_mutex.unlock();
            return;
        }
        std::string model = req.get_file_value("model").content;
        if (!is_file_exist(model.c_str()))
        {
            fprintf(stderr, "error: 'model': %s not found!\n", model.c_str());
            const std::string error_resp = "{\"error\":\"model not found!\"}";
            res.set_content(error_resp, "application/json");
            whisper_mutex.unlock();
            return;
        }
        // clean up
        whisper_free(ctx);
        // whisper init
        ctx = whisper_init_from_file_with_params(model.c_str(), cparams);
        // TODO perhaps load prior model here instead of exit
        if (ctx == nullptr) {
            fprintf(stderr, "error: model init  failed, no model loaded must exit\n");
            exit(1);
        }
        // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
        whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
        const std::string success = "Load was successful!";
        res.set_content(success, "application/text");
        // check if the model is in the file system
        whisper_mutex.unlock();
    });
    svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
        const char fmt[] = "500 Internal Server Error\n%s";
        char buf[BUFSIZ];
        try {
            std::rethrow_exception(std::move(ep));
        } catch (std::exception &e) {
            snprintf(buf, sizeof(buf), fmt, e.what());
        } catch (...) {
            snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
        }
        res.set_content(buf, "text/plain");
        res.status = 500;
    });
    svr.set_error_handler([](const Request &, Response &res) {
        if (res.status == 400) {
            res.set_content("Invalid request", "text/plain");
        } else if (res.status != 500) {
            res.set_content("File Not Found", "text/plain");
            res.status = 404;
        }
    });
    // set timeouts and change hostname and port
    svr.set_read_timeout(sparams.read_timeout);
    svr.set_write_timeout(sparams.write_timeout);
    if (!svr.bind_to_port(sparams.hostname, sparams.port))
    {
        fprintf(stderr, "\ncouldn't bind to server socket: hostname=%s port=%d\n\n",
                sparams.hostname.c_str(), sparams.port);
        return 1;
    }
    // Set the base directory for serving static files
    svr.set_base_dir(sparams.public_path);
    // to make it ctrl+clickable:
    printf("\nwhisper server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port);
    if (!svr.listen_after_bind())
    {
        return 1;
    }
    whisper_print_timings(ctx);
    whisper_free(ctx);
    return 0;
 }
--- a/examples/wchess/CMakeLists.txt
+++ b/examples/wchess/CMakeLists.txt
@ -0,0 +1,9 @@
 set(CMAKE_CXX_STANDARD 11)
 add_subdirectory(libwchess)
 if (EMSCRIPTEN)
    add_subdirectory(wchess.wasm)
 else()
    add_subdirectory(wchess.cmd)
 endif()
--- a/examples/wchess/libwchess/CMakeLists.txt
+++ b/examples/wchess/libwchess/CMakeLists.txt
@ -0,0 +1,19 @@
 add_library(libwchess
    WChess.cpp
    WChess.h
    Chessboard.cpp
    Chessboard.h
 )
 target_link_libraries(libwchess
    PUBLIC
    whisper
    common
 )
 target_include_directories(libwchess
    PUBLIC
    "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
 )
 add_executable(test-chessboard test-chessboard.cpp Chessboard.cpp)
--- a/examples/wchess/libwchess/Chessboard.cpp
+++ b/examples/wchess/libwchess/Chessboard.cpp
@ -0,0 +1,714 @@
 #include "Chessboard.h"
 #include <vector>
 #include <algorithm>
 #include <cstring>
 #include <set>
 namespace {
 // remove std::string_view, c++17 -> c++11
 constexpr std::array<const char*, 64> positions = {
    "a1", "b1", "c1", "d1", "e1", "f1", "g1", "h1",
    "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2",
    "a3", "b3", "c3", "d3", "e3", "f3", "g3", "h3",
    "a4", "b4", "c4", "d4", "e4", "f4", "g4", "h4",
    "a5", "b5", "c5", "d5", "e5", "f5", "g5", "h5",
    "a6", "b6", "c6", "d6", "e6", "f6", "g6", "h6",
    "a7", "b7", "c7", "d7", "e7", "f7", "g7", "h7",
    "a8", "b8", "c8", "d8", "e8", "f8", "g8", "h8",
 };
 constexpr int INVALID_POS = positions.size();
 constexpr int R = 0; // rank index
 constexpr int F = 1; // file index
 #define POS ((c[F] - '1') * 8 + (c[R] - 'a'))
 constexpr int operator ""_P(const char * c, size_t size) {
    return size < 2 || POS < 0 || POS > INVALID_POS ? INVALID_POS : POS;
 }
 #undef POS
 struct sview {
    const char * ptr = nullptr;
    size_t size = 0;
    sview() = default;
    sview(const char * p, size_t s) : ptr(p), size(s) {}
    sview(const std::string& s) : ptr(s.data()), size(s.size()) {}
    size_t find(char del, size_t pos) {
        while (pos < size && ptr[pos] != del) ++pos;
        return pos < size ? pos : std::string::npos;
    }
 };
 std::vector<sview> split(sview str, char del) {
    std::vector<sview> res;
    size_t cur = 0;
    size_t last = 0;
    while (cur != std::string::npos) {
        if (str.ptr[last] == ' ') {
            ++last;
            continue;
        }
        cur = str.find(del, last);
        size_t len = cur == std::string::npos ? str.size - last : cur - last;
        res.emplace_back(str.ptr + last, len);
        last = cur + 1;
    }
    return res;
 }
 size_t strToPos(sview str) {
    return operator ""_P(str.ptr, str.size);
 }
 constexpr std::array<const char*, 6> pieceNames =  {
    "pawn", "knight", "bishop", "rook", "queen", "king",
 };
 int strToType(sview str) {
    auto it = std::find_if(pieceNames.begin(), pieceNames.end(), [str] (const char* name) { return strncmp(name, str.ptr, str.size) == 0; });
    return it != pieceNames.end() ? int(it - pieceNames.begin()) : pieceNames.size();
 }
 }
 Chessboard::Chessboard()
    : blackPieces {{
        {Piece::Pawn, Piece::Black, "a7"_P },
        {Piece::Pawn, Piece::Black, "b7"_P },
        {Piece::Pawn, Piece::Black, "c7"_P },
        {Piece::Pawn, Piece::Black, "d7"_P },
        {Piece::Pawn, Piece::Black, "e7"_P },
        {Piece::Pawn, Piece::Black, "f7"_P },
        {Piece::Pawn, Piece::Black, "g7"_P },
        {Piece::Pawn, Piece::Black, "h7"_P },
        {Piece::Rook, Piece::Black, "a8"_P },
        {Piece::Knight, Piece::Black, "b8"_P },
        {Piece::Bishop, Piece::Black, "c8"_P },
        {Piece::Queen, Piece::Black, "d8"_P },
        {Piece::King, Piece::Black, "e8"_P },
        {Piece::Bishop, Piece::Black, "f8"_P },
        {Piece::Knight, Piece::Black, "g8"_P },
        {Piece::Rook, Piece::Black, "h8"_P },
    }}
    , whitePieces {{
        {Piece::Pawn, Piece::White, "a2"_P },
        {Piece::Pawn, Piece::White, "b2"_P },
        {Piece::Pawn, Piece::White, "c2"_P },
        {Piece::Pawn, Piece::White, "d2"_P },
        {Piece::Pawn, Piece::White, "e2"_P },
        {Piece::Pawn, Piece::White, "f2"_P },
        {Piece::Pawn, Piece::White, "g2"_P },
        {Piece::Pawn, Piece::White, "h2"_P },
        {Piece::Rook, Piece::White, "a1"_P },
        {Piece::Knight, Piece::White, "b1"_P },
        {Piece::Bishop, Piece::White, "c1"_P },
        {Piece::Queen, Piece::White, "d1"_P },
        {Piece::King, Piece::White, "e1"_P },
        {Piece::Bishop, Piece::White, "f1"_P },
        {Piece::Knight, Piece::White, "g1"_P },
        {Piece::Rook, Piece::White, "h1"_P },
    }}
    , board {{
        &whitePieces[ 8], &whitePieces[ 9], &whitePieces[10], &whitePieces[11], &whitePieces[12], &whitePieces[13], &whitePieces[14], &whitePieces[15],
        &whitePieces[ 0], &whitePieces[ 1], &whitePieces[ 2], &whitePieces[ 3], &whitePieces[ 4], &whitePieces[ 5], &whitePieces[ 6], &whitePieces[ 7],
        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
        &blackPieces[ 0], &blackPieces[ 1], &blackPieces[ 2], &blackPieces[ 3], &blackPieces[ 4], &blackPieces[ 5], &blackPieces[ 6], &blackPieces[ 7],
        &blackPieces[ 8], &blackPieces[ 9], &blackPieces[10], &blackPieces[11], &blackPieces[12], &blackPieces[13], &blackPieces[14], &blackPieces[15],
    }}
    , whiteMoves {
        {"b1"_P, "a3"_P}, {"b1"_P, "c3"_P},
        {"g1"_P, "f3"_P}, {"g1"_P, "h3"_P},
        {"a2"_P, "a3"_P}, {"a2"_P, "a4"_P},
        {"b2"_P, "b3"_P}, {"b2"_P, "b4"_P},
        {"c2"_P, "c3"_P}, {"c2"_P, "c4"_P},
        {"d2"_P, "d3"_P}, {"d2"_P, "d4"_P},
        {"e2"_P, "e3"_P}, {"e2"_P, "e4"_P},
        {"f2"_P, "f3"_P}, {"f2"_P, "f4"_P},
        {"g2"_P, "g3"_P}, {"g2"_P, "g4"_P},
        {"h2"_P, "h3"_P}, {"h2"_P, "h4"_P},
    }
    , blackMoves {
        {"a7"_P, "a5"_P}, {"a7"_P, "a6"_P},
        {"b7"_P, "b5"_P}, {"b7"_P, "b6"_P},
        {"c7"_P, "c5"_P}, {"c7"_P, "c6"_P},
        {"d7"_P, "d5"_P}, {"d7"_P, "d6"_P},
        {"e7"_P, "e5"_P}, {"e7"_P, "e6"_P},
        {"f7"_P, "f5"_P}, {"f7"_P, "f6"_P},
        {"g7"_P, "g5"_P}, {"g7"_P, "g6"_P},
        {"h7"_P, "h5"_P}, {"h7"_P, "h6"_P},
        {"b8"_P, "a6"_P}, {"b8"_P, "c6"_P},
        {"g8"_P, "f6"_P}, {"g8"_P, "h6"_P},
    }
 {
    static_assert(pieceNames.size() == Chessboard::Piece::Taken, "Mismatch between piece names and types");
    std::sort(whiteMoves.begin(), whiteMoves.end());
    std::sort(blackMoves.begin(), blackMoves.end());
 }
 std::string Chessboard::getRules(const std::string& prompt) const {
    // leading space is very important!
    std::string result =
    "\n"
    "# leading space is very important!\n"
    "\n";
    if (prompt.empty()) {
        result += "move ::= \" \" ((piece | frompos) \" \" \"to \"?)? topos\n";
        //result += "move ::= \" \" frompos \" \" \"to \"? topos\n";
    }
    else {
        // result += "move ::= prompt \" \" ((piece | frompos) \" \" \"to \"?)? topos\n"
        result += "move ::= prompt \" \" frompos \" \" \"to \"? topos\n"
        "\n"
        "prompt ::= \" " + prompt + "\"\n";
    }
    std::set<std::string> pieces;
    std::set<std::string> from_pos;
    std::set<std::string> to_pos;
    auto& allowed_moves =  m_moveCounter % 2 ? blackMoves : whiteMoves;
    for (auto& m : allowed_moves) {
        if (board[m.first]->type != Piece::Taken) pieces.insert(pieceNames[board[m.first]->type]);
        from_pos.insert(positions[m.first]);
        to_pos.insert(positions[m.second]);
    }
    if (!pieces.empty()) {
        result += "piece ::= (";
        for (auto& p : pieces) result += " \"" + p + "\" |";
        result.pop_back();
        result += ")\n\n";
    }
    if (!from_pos.empty()) {
        result += "frompos ::= (";
        for (auto& p : from_pos) result += " \"" + p + "\" |";
        result.pop_back();
        result += ")\n";
    }
    if (!to_pos.empty()) {
        result += "topos ::= (";
        for (auto& p : to_pos) result += " \"" + p + "\" |";
        result.pop_back();
        result += ")\n";
    }
    return result;
 }
 std::string Chessboard::stringifyBoard() {
    static constexpr std::array<char, 6> blackShort =  {
        'p', 'n', 'b', 'r', 'q', 'k',
    };
    static constexpr std::array<char, 6> whiteShort =  {
        'P', 'N', 'B', 'R', 'Q', 'K',
    };
    std::string result;
    result.reserve(16 + 2 * 64 + 16);
    for (char rank = 'a'; rank <= 'h'; ++rank) {
        result.push_back(rank);
        result.push_back(' ');
    }
    result.back() = '\n';
    for (int i = 7; i >= 0; --i) {
        for (int j = 0; j < 8; ++j) {
            auto p = board[i * 8 + j];
            if (p) result.push_back(p->color == Piece::White ? whiteShort[p->type] : blackShort[p->type]);
            else result.push_back((i + j) % 2 ? '.' : '*');
            result.push_back(' ');
        }
        result.push_back('0' + i + 1);
        result.push_back('\n');
    }
    return result;
 }
 std::string Chessboard::process(const std::string& command) {
    auto color = Piece::Colors(m_moveCounter % 2);
    fprintf(stdout, "%s: Command to %s: '%s%.*s%s'\n", __func__, (color ? "Black" : "White"), "\033[1m", int(command.size()), command.data(), "\033[0m");
    if (command.empty()) return "";
    auto tokens = split(command, ' ');
    for (auto& t : tokens) fprintf(stdout, "%s: Token %.*s\n", __func__, int(t.size), t.ptr);
    auto pos_from = INVALID_POS;
    auto type = Piece::Types::Taken;
    auto pos_to = INVALID_POS;
    if (tokens.size() == 1) {
        type = Piece::Types::Pawn;
        pos_to = strToPos(tokens.front());
    }
    else {
        pos_from = strToPos(tokens.front());
        if (pos_from == INVALID_POS) type = Piece::Types(strToType(tokens.front()));
        pos_to = strToPos(tokens.back());
    }
    if (pos_to == INVALID_POS) return "";
    if (pos_from == INVALID_POS) {
        if (type == Piece::Types::Taken) return "";
        auto& pieces = color ? blackPieces : whitePieces;
        auto pieceIndex = 0u;
        for (; pieceIndex < pieces.size(); ++pieceIndex) {
            if (pieces[pieceIndex].type == type && validateMove(pieces[pieceIndex], pos_to)) break;
        }
        if (pieceIndex == pieces.size()) return "";
        pos_from = pieces[pieceIndex].pos;
    }
    if (board[pos_from] == nullptr) return "";
    if (board[pos_from]->color != color) return "";
    Move m = {pos_from, pos_to};
    auto& allowed_moves = color ? blackMoves : whiteMoves;
    fprintf(stdout, "%s:allowed size %d :\n", __func__, int(allowed_moves.size()));
    for (auto& m : allowed_moves) fprintf(stdout, " %s %s; ", positions[m.first], positions[m.second]);
    fprintf(stdout, "\n");
    if (!std::binary_search(allowed_moves.begin(), allowed_moves.end(), m)) return "";
    move(m);
    {
        auto it = std::remove_if(allowed_moves.begin(), allowed_moves.end(), [m] (const Move& move) { return move.first == m.first; });
        allowed_moves.erase(it, allowed_moves.end());
    }
    std::vector<Piece*> affected = { board[m.second] };
    for (auto& p : whitePieces) {
        if (&p == board[m.second]
            || validateMove(p, m.first)
            || validateMove(p, m.second)
            || std::binary_search(whiteMoves.begin(), whiteMoves.end(), Move(p.pos, m.second))
        ) {
            auto it = std::remove_if(whiteMoves.begin(), whiteMoves.end(), [&p] (const Move& m) { return m.first == p.pos; });
            whiteMoves.erase(it, whiteMoves.end());
            affected.push_back(&p);
        }
    }
    for (auto& p : blackPieces) {
        if (&p == board[m.second]
            || validateMove(p, m.first)
            || validateMove(p, m.second)
            || std::binary_search(blackMoves.begin(), blackMoves.end(), Move(p.pos, m.second))
        ) {
            auto it = std::remove_if(blackMoves.begin(), blackMoves.end(), [&p] (const Move& m) { return m.first == p.pos; });
            blackMoves.erase(it, blackMoves.end());
            affected.push_back(&p);
        }
    }
    for (auto& p : affected) getValidMoves(*p, p->color ? blackMoves : whiteMoves);
    std::sort(blackMoves.begin(), blackMoves.end());
    std::sort(whiteMoves.begin(), whiteMoves.end());
    std::string result = positions[m.first];
    result += "-";
    result += positions[m.second];
    ++m_moveCounter;
    fprintf(stdout, "%s: Move '%s%s%s'\n", __func__, "\033[1m", result.data(), "\033[0m");
    return result;
 }
 void Chessboard::getValidMoves(const Piece& piece, std::vector<Move>& result) {
    std::string cur = positions[piece.pos];
    switch (piece.type) {
        case Piece::Pawn: {
            std::string next = cur;
            piece.color ? --next[F] : ++next[F]; // one down / up
            std::string left = { char(next[R] - 1), next[F]};
            auto pos = strToPos(left);
            if (pos != INVALID_POS && board[pos] && board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
            std::string right = { char(next[R] + 1), next[F]};
            pos = strToPos(right);
            if (pos != INVALID_POS && board[pos] && board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
            pos = strToPos(next);
            if (pos != INVALID_POS && !board[pos]) result.emplace_back(piece.pos, pos);
            else break;
            if (piece.color ? cur[F] != '7' : cur[F] != '2') break;
            piece.color ? --next[F] : ++next[F]; // one down / up
            pos = strToPos(next);
            if (pos != INVALID_POS && !board[pos]) result.emplace_back(piece.pos, pos);
            break;
        }
        case Piece::Knight: {
            std::string next = cur;
            --next[F]; --next[F]; --next[R];
            auto pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            --next[F]; --next[F]; ++next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[F]; ++next[F]; --next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[F]; ++next[F]; ++next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            --next[F]; --next[R]; --next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[F]; --next[R]; --next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            --next[F]; ++next[R]; ++next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[F]; ++next[R]; ++next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            break;
        }
        case Piece::Bishop: {
            std::string next = cur;
            while (true) {
                --next[R]; --next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                --next[R]; ++next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[R]; --next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[R]; ++next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            break;
        }
        case Piece::Rook: {
            std::string next = cur;
            while (true) {
                --next[R];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[R];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                --next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            break;
        }
        case Piece::Queen: {
            std::string next = cur;
            while (true) {
                --next[R]; --next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                --next[R]; ++next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[R]; --next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[R]; ++next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                --next[R];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[R];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                --next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            next = cur;
            while (true) {
                ++next[F];
                auto pos = strToPos(next);
                if (pos == INVALID_POS) break;
                else if (board[pos]) {
                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
                    break;
                }
                result.emplace_back(piece.pos, pos);
            }
            break;
        }
        case Piece::King: {
            std::string next = cur;
            --next[R]; --next[F];
            auto pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            --next[R]; ++next[F];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[R]; --next[F];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[R]; ++next[F];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            --next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[R];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            --next[F];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            next = cur;
            ++next[F];
            pos = strToPos(next);
            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
            break;
        }
        case Piece::Taken: break;
        default: break;
    }
 }
 bool Chessboard::validatePawnMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
    int direction = color == Piece::White ? 1 : -1;
    bool two_ranks = color == Piece::White ? from_rank == 1 : from_rank == 6;
    if (from_file == to_file) {
        if (from_rank == to_rank - direction) return board[to_rank * 8 + to_file] == nullptr;
        if (two_ranks && from_rank == to_rank - direction * 2) return board[(to_rank - direction) * 8 + to_file] == nullptr && board[to_rank * 8 + to_file] == nullptr;
    }
    else if (from_file + 1 == to_file || from_file - 1 == to_file) {
        if (from_rank == to_rank - direction) return board[to_rank * 8 + to_file] != nullptr && board[to_rank * 8 + to_file]->color != color;
    }
    return false;
 }
 bool Chessboard::validateKnightMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
    int dr = std::abs(from_rank - to_rank);
    int df = std::abs(from_file - to_file);
    if ((dr == 2 && df == 1) || (dr == 1 && df == 2)) return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
    return false;
 }
 bool Chessboard::validateBishopMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
    if (from_rank - from_file == to_rank - to_file) {
        int direction = from_rank < to_rank ? 1 : -1;
        from_rank += direction;
        from_file += direction;
        while (from_rank != to_rank) {
            if (board[from_rank * 8 + from_file]) return false;
            from_rank += direction;
            from_file += direction;
        }
        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
    }
    if (from_rank + from_file == to_rank + to_file) {
        int direction = from_rank < to_rank ? 1 : -1;
        from_rank += direction;
        from_file -= direction;
        while (from_rank != to_rank) {
            if (board[from_rank * 8 + from_file]) return false;
            from_rank += direction;
            from_file -= direction;
        }
        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
    }
    return false;
 }
 bool Chessboard::validateRookMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
    if (from_rank == to_rank) {
        int direction = from_file < to_file ? 1 : -1;
        from_file += direction;
        while (from_file != to_file) {
            if (board[from_rank * 8 + from_file]) return false;
            from_file += direction;
        }
        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
    }
    if (from_file == to_file) {
        int direction = from_rank < to_rank ? 1 : -1;
        from_rank += direction;
        while (from_rank != to_rank) {
            if (board[from_rank * 8 + from_file]) return false;
            from_rank += direction;
        }
        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
    }
    return false;
 }
 bool Chessboard::validateQueenMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
    if (validateBishopMove(color, from_rank, from_file, to_rank, to_file)) return true;
    return validateRookMove(color, from_rank, from_file, to_rank, to_file);
 }
 bool Chessboard::validateKingMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
    if (std::abs(from_rank - to_rank) < 2 && std::abs(from_file - to_file) < 2) {
        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
    }
    return false;
 }
 bool Chessboard::validateMove(const Piece& piece, int pos) {
    if (piece.type == Piece::Taken) return false;
    if (piece.pos == pos) return false;
    int i = piece.pos / 8;
    int j = piece.pos - i * 8;
    int ii = pos / 8;
    int jj = pos - ii * 8;
    switch (piece.type) {
        case Piece::Pawn: return validatePawnMove(piece.color, i, j, ii, jj);
        case Piece::Knight: return validateKnightMove(piece.color, i, j, ii, jj);
        case Piece::Bishop: return validateBishopMove(piece.color, i, j, ii, jj);
        case Piece::Rook: return validateRookMove(piece.color, i, j, ii, jj);
        case Piece::Queen: return validateQueenMove(piece.color, i, j, ii, jj);
        case Piece::King: return validateKingMove(piece.color, i, j, ii, jj);
        default: break;
    }
    return false;
 }
 bool Chessboard::move(const Move& m) {
    if (!board[m.first] || (board[m.second] && board[m.first]->color == board[m.second]->color)) return false;
    if (board[m.second]) board[m.second]->type = Piece::Taken;
    board[m.second] = board[m.first];
    board[m.first] = nullptr;
    board[m.second]->pos = m.second;
    return true;
 }
--- a/examples/wchess/libwchess/Chessboard.h
+++ b/examples/wchess/libwchess/Chessboard.h
@ -0,0 +1,59 @@
 #pragma once
 #include <string>
 #include <array>
 #include <vector>
 class Chessboard {
 public:
    Chessboard();
    std::string process(const std::string& t);
    std::string stringifyBoard();
    std::string getRules(const std::string & prompt) const;
    using Move = std::pair<int, int>;
 private:
    bool move(const Move& move);
    struct Piece {
        enum Types {
            Pawn,
            Knight,
            Bishop,
            Rook,
            Queen,
            King,
            Taken,
        };
        enum Colors {
            White,
            Black,
        };
        Types type;
        Colors color;
        int pos;
    };
    using PieceSet = std::array<Piece, 16>;
    PieceSet blackPieces;
    PieceSet whitePieces;
    int m_moveCounter = 0;
    using Board = std::array<Piece*, 64>;
    Board board;
    std::vector<Move> whiteMoves;
    std::vector<Move> blackMoves;
    bool validateMove(const Piece& piece, int pos);
    void getValidMoves(const Piece& piece, std::vector<Move>& moves);
    // just basic validation
    // fixme: missing en passant, castling, promotion, etc.
    bool validatePawnMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
    bool validateKnightMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
    bool validateBishopMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
    bool validateRookMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
    bool validateQueenMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
    bool validateKingMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
 };
--- a/examples/wchess/libwchess/WChess.cpp
+++ b/examples/wchess/libwchess/WChess.cpp
@ -0,0 +1,220 @@
 #include "WChess.h"
 #include "Chessboard.h"
 #include "grammar-parser.h"
 #include "common.h"
 #include <thread>
 WChess::WChess(whisper_context * ctx,
        const whisper_full_params & wparams,
        callbacks cb,
        settings s)
        : m_ctx(ctx)
        , m_wparams(wparams)
        , m_cb(cb)
        , m_settings(s)
        , m_board(new Chessboard())
 {}
 WChess::~WChess() = default;
 void WChess::set_status(const std::string& msg) const {
    if (m_cb.set_status) (*m_cb.set_status)(msg);
 }
 void WChess::set_moves(const std::string& moves) const {
    if (m_cb.set_moves) (*m_cb.set_moves)(moves);
 }
 bool WChess::check_running() const {
    if (m_cb.check_running) return (*m_cb.check_running)();
    return false;
 }
 void WChess::clear_audio() const {
    if (m_cb.clear_audio) (*m_cb.clear_audio)();
 }
 void WChess::get_audio(int ms, std::vector<float>& pcmf32) const {
    if (m_cb.get_audio) (*m_cb.get_audio)(ms, pcmf32);
 }
 std::string WChess::stringify_board() const {
    return m_board->stringifyBoard();
 }
 void WChess::run() {
    set_status("loading data ...");
    bool have_prompt  = true;
    bool ask_prompt   = !have_prompt;
    float logprob_min0 = 0.0f;
    float logprob_min  = 0.0f;
    float logprob_sum0 = 0.0f;
    float logprob_sum  = 0.0f;
    int n_tokens0 = 0;
    int n_tokens  = 0;
    std::vector<float> pcmf32_cur;
    std::vector<float> pcmf32_prompt;
    const std::string k_prompt = have_prompt ? "" : "checkmate";
    while (check_running()) {
        // delay
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
        if (ask_prompt) {
            fprintf(stdout, "\n");
            fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
            fprintf(stdout, "\n");
            {
                char txt[1024];
                snprintf(txt, sizeof(txt), "Say the following phrase: '%s'", k_prompt.c_str());
                set_status(txt);
            }
            ask_prompt = false;
        }
        int64_t t_ms = 0;
        {
            get_audio(m_settings.vad_ms, pcmf32_cur);
            if (!pcmf32_cur.empty()) {
                fprintf(stdout, "%s: Processing ...\n", __func__);
                set_status("Processing ...");
                if (!have_prompt) {
                    const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
                    fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
                    const float sim = similarity(txt, k_prompt);
                    if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
                        fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
                        ask_prompt = true;
                    } else {
                        fprintf(stdout, "\n");
                        fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
                        fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
                        fprintf(stdout, "\n");
                        {
                            char txt[1024];
                            snprintf(txt, sizeof(txt), "Success! Waiting for voice commands ...");
                            set_status(txt);
                        }
                        // save the audio for the prompt
                        pcmf32_prompt = pcmf32_cur;
                        have_prompt = true;
                    }
                } else {
                    if (!pcmf32_prompt.empty()) pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
                    static const size_t MIN_SIZE = 1.2 * WHISPER_SAMPLE_RATE;
                    if (MIN_SIZE > pcmf32_cur.size()) pcmf32_cur.insert(pcmf32_cur.begin(), MIN_SIZE - pcmf32_cur.size(), 0.0f);
                    std::string rules = m_board->getRules(k_prompt);
                    fprintf(stdout, "%s: grammar rules:\n'%s'\n", __func__, rules.c_str());
                    auto grammar_parsed = grammar_parser::parse(rules.c_str());
                    auto grammar_rules = grammar_parsed.c_rules();
                    m_wparams.grammar_rules   = grammar_rules.data();
                    m_wparams.n_grammar_rules = grammar_rules.size();
                    m_wparams.i_start_rule    = grammar_parsed.symbol_ids.at("move");
                    auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
                    const float p = 100.0f * std::exp(logprob_min);
                    fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
                    // find the prompt in the text
                    float best_sim = 0.0f;
                    size_t best_len = 0;
                    for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
                        const auto prompt = txt.substr(0, n);
                        const float sim = similarity(prompt, k_prompt);
                        //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
                        if (sim > best_sim) {
                            best_sim = sim;
                            best_len = n;
                        }
                    }
                    fprintf(stdout, "%s:   DEBUG: txt = '%s', prob = %.2f%%\n", __func__, txt.c_str(), p);
                    std::string command = ::trim(txt.substr(best_len));
                    fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
                    fprintf(stdout, "\n");
                    {
                        char txt[1024];
                        snprintf(txt, sizeof(txt), "Command '%s', (t = %d ms)", command.c_str(), (int) t_ms);
                        set_status(txt);
                    }
                    if (!command.empty()) {
                        auto move = m_board->process(command);
                        if (!move.empty()) {
                            set_moves(std::move(move));
                        }
                    }
                }
                clear_audio();
            }
        }
    }
 }
 std::string WChess::transcribe(
                const std::vector<float> & pcmf32,
                float & logprob_min,
                float & logprob_sum,
                int & n_tokens,
                int64_t & t_ms) {
    const auto t_start = std::chrono::high_resolution_clock::now();
    logprob_min = 0.0f;
    logprob_sum = 0.0f;
    n_tokens    = 0;
    t_ms = 0;
    if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
        return {};
    }
    std::string result;
    const int n_segments = whisper_full_n_segments(m_ctx);
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(m_ctx, i);
        result += text;
        const int n = whisper_full_n_tokens(m_ctx, i);
        for (int j = 0; j < n; ++j) {
            const auto token = whisper_full_get_token_data(m_ctx, i, j);
            if(token.plog > 0.0f) return {};
            logprob_min = std::min(logprob_min, token.plog);
            logprob_sum += token.plog;
            ++n_tokens;
        }
    }
    const auto t_end = std::chrono::high_resolution_clock::now();
    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
    return result;
 }
--- a/examples/wchess/libwchess/WChess.h
+++ b/examples/wchess/libwchess/WChess.h
@ -0,0 +1,62 @@
 #pragma once
 #include "whisper.h"
 #include <string>
 #include <vector>
 #include <memory>
 class Chessboard;
 class WChess {
 public:
    using SetStatusCb = void (*)(const std::string &);
    using CheckRunningCb = bool (*)();
    using GetAudioCb = void (*)(int, std::vector<float> &);
    using SetMovesCb = void (*)(const std::string &);
    using ClearAudioCb = void (*)();
    struct callbacks {
        SetStatusCb set_status = nullptr;
        CheckRunningCb check_running = nullptr;
        GetAudioCb get_audio = nullptr;
        SetMovesCb set_moves = nullptr;
        ClearAudioCb clear_audio = nullptr;
    };
    struct settings {
        int32_t vad_ms     = 2000;
        int32_t prompt_ms  = 5000;
        int32_t command_ms = 4000;
        float vad_thold    = 0.2f;
        float freq_thold   = 100.0f;
        bool print_energy  = false;
    };
    WChess(
        whisper_context * ctx,
        const whisper_full_params & wparams,
        callbacks cb,
        settings s
    );
    ~WChess();
    void run();
    std::string stringify_board() const;
 private:
    void get_audio(int ms, std::vector<float>& pcmf32) const;
    void set_status(const std::string& msg) const;
    void set_moves(const std::string& moves) const;
    bool check_running() const;
    void clear_audio() const;
    std::string transcribe(
                    const std::vector<float> & pcmf32,
                    float & logprob_min,
                    float & logprob_sum,
                    int & n_tokens,
                    int64_t & t_ms);
    whisper_context * m_ctx;
    whisper_full_params m_wparams;
    const callbacks m_cb;
    const settings m_settings;
    std::unique_ptr<Chessboard> m_board;
 };
--- a/examples/wchess/libwchess/test-chessboard.cpp
+++ b/examples/wchess/libwchess/test-chessboard.cpp
@ -0,0 +1,88 @@
 #include "Chessboard.h"
 #define ASSERT(x) \
    do { \
        if (!(x)) { \
            fprintf(stderr, "ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
            fflush(stderr); \
            exit(1); \
        } \
    } while (0)
 int main() {
    {
        // pawns
        Chessboard chess;
        ASSERT(chess.process("pawn to d4, e5, e3, pawn to d5") == "d2-d4 e7-e5 e2-e3 d7-d5");
        ASSERT(chess.process("pawn to d4") == ""); // wrong
        ASSERT(chess.process("pawn to c5") == ""); // wrong
        ASSERT(chess.process("pawn to d5") == ""); // wrong
        ASSERT(chess.process("pawn to d3") == ""); // wrong
        ASSERT(chess.process("pawn to f5") == ""); // wrong, white's turn
        ASSERT(chess.process("h4") == "h2-h4");
        ASSERT(chess.process("d4") == "e5-d4");
        ASSERT(chess.process("e4") == "e3-e4");
        ASSERT(chess.process("d4") == ""); // wrong
        ASSERT(chess.process("e4") == "d5-e4");
    }
    {
        // rook
        Chessboard chess;
        ASSERT(chess.process("rook to a3") == ""); // wrong
        ASSERT(chess.process("a4, h5, rook to a3, rook to h6") == "a2-a4 h7-h5 a1-a3 h8-h6");
        ASSERT(chess.process("rook to d3, rook to e6") == "a3-d3 h6-e6");
        ASSERT(chess.process("rook to d4, rook to e5") == "d3-d4 e6-e5");
        ASSERT(chess.process("rook to a4") == ""); // wrong
        ASSERT(chess.process("rook to d8") == ""); // wrong
        ASSERT(chess.process("rook to d3") == "d4-d3");
        ASSERT(chess.process("rook to e2") == "e5-e2");
    }
    {
        // knight
        Chessboard chess;
        ASSERT(chess.process("knight to c3, knight to c6") == "b1-c3 b8-c6");
        ASSERT(chess.process("knight to c3") == ""); // wrong
        ASSERT(chess.process("knight to a2") == ""); // wrong
        ASSERT(chess.process("knight to b4") == ""); // wrong, white's turn
        ASSERT(chess.process("knight to b5") == "c3-b5");
        ASSERT(chess.process("knight to a5") == "c6-a5");
        ASSERT(chess.process("knight to c7") == "b5-c7");
    }
    {
        // bishop
        Chessboard chess;
        ASSERT(chess.process("b3, b6, bishop to b2, bishop to b7") == "b2-b3 b7-b6 c1-b2 c8-b7");
        ASSERT(chess.process("bishop to a1") == ""); // wrong
        ASSERT(chess.process("bishop to h8") == ""); // wrong
        ASSERT(chess.process("bishop to a6") == ""); // wrong, white's turn
        ASSERT(chess.process("bishop to g7") == "b2-g7");
    }
    {
        // queen
        Chessboard chess;
        ASSERT(chess.process("queen to d8") == ""); // wrong
        ASSERT(chess.process("queen to f1") == ""); // wrong
        ASSERT(chess.process("queen to h5") == ""); // wrong
        ASSERT(chess.process("e3, d5, queen to h5, queen to d6") == "e2-e3 d7-d5 d1-h5 d8-d6");
        ASSERT(chess.process("queen to c5") == ""); // wrong, white's turn
        ASSERT(chess.process("queen to f7") == "h5-f7");
    }
    {
        // king
        Chessboard chess;
        ASSERT(chess.process("d3, d6, king to d2, king to d7, king to c3, king to c6, king to c4") == "d2-d3 d7-d6 e1-d2 e8-d7 d2-c3 d7-c6 c3-c4");
        ASSERT(chess.process("bishop to e6") == "c8-e6");
        ASSERT(chess.process("king to b3") == "c4-b3"); // !! check check not implemented
    }
 }
--- a/examples/wchess/wchess.cmd/CMakeLists.txt
+++ b/examples/wchess/wchess.cmd/CMakeLists.txt
@ -0,0 +1,8 @@
 if (WHISPER_SDL2)
    set(TARGET wchess)
    add_executable(${TARGET} wchess.cmd.cpp)
    include(DefaultTargetOptions)
    target_link_libraries(${TARGET} PRIVATE libwchess common-sdl ${CMAKE_THREAD_LIBS_INIT})
 endif ()
--- a/examples/wchess/wchess.cmd/wchess.cmd.cpp
+++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp
@ -0,0 +1,207 @@
 // Command line voice assisted chess
 //
 // Speak chess move commands to the microphone.
 // The moves will translated to chessboard positions.
 //
 //
 #include "WChess.h"
 #include "common-sdl.h"
 #include <memory>
 #include <thread>
 // command-line parameters
 struct whisper_params {
    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
    int32_t prompt_ms  = 5000;
    int32_t command_ms = 8000;
    int32_t capture_id = -1;
    int32_t max_tokens = 32;
    int32_t audio_ctx  = 0;
    float vad_thold  = 0.6f;
    float freq_thold = 100.0f;
    float grammar_penalty = 100.0f;
    bool speed_up      = false;
    bool translate     = false;
    bool print_special = false;
    bool print_energy  = false;
    bool no_timestamps = true;
    bool use_gpu       = true;
    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
    std::string fname_out;
    std::string commands;
    std::string prompt;
    std::string context;
    std::string grammar;
 };
 void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options]\n", argv[0]);
    fprintf(stderr, "\n");
    fprintf(stderr, "options:\n");
    fprintf(stderr, "  -h,         --help           [default] show this help message and exit\n");
    fprintf(stderr, "  -t N,       --threads N      [%-7d] number of threads to use during computation\n", params.n_threads);
    fprintf(stderr, "  -pms N,     --prompt-ms N    [%-7d] prompt duration in milliseconds\n",             params.prompt_ms);
    fprintf(stderr, "  -cms N,     --command-ms N   [%-7d] command duration in milliseconds\n",            params.command_ms);
    fprintf(stderr, "  -c ID,      --capture ID     [%-7d] capture device ID\n",                           params.capture_id);
    fprintf(stderr, "  -mt N,      --max-tokens N   [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
    fprintf(stderr, "  -ac N,      --audio-ctx N    [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
    fprintf(stderr, "  -vth N,     --vad-thold N    [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
    fprintf(stderr, "  -fth N,     --freq-thold N   [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
    fprintf(stderr, "  -su,        --speed-up       [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
    fprintf(stderr, "  -tr,        --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
    fprintf(stderr, "  -ps,        --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
    fprintf(stderr, "  -pe,        --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
    fprintf(stderr, "  -ng,        --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
    fprintf(stderr, "  -l LANG,    --language LANG  [%-7s] spoken language\n",                             params.language.c_str());
    fprintf(stderr, "  -m FNAME,   --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
    fprintf(stderr, "  -f FNAME,   --file FNAME     [%-7s] text output file name\n",                       params.fname_out.c_str());
    fprintf(stderr, "  -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n",             params.commands.c_str());
    fprintf(stderr, "  -p,         --prompt         [%-7s] the required activation prompt\n",              params.prompt.c_str());
    fprintf(stderr, "  -ctx,       --context        [%-7s] sample text to help the transcription\n",       params.context.c_str());
    fprintf(stderr, "  --grammar-penalty N          [%-7.1f] scales down logits of nongrammar tokens\n",   params.grammar_penalty);
    fprintf(stderr, "\n");
 }
 bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
    for (int i = 1; i < argc; i++) {
        std::string arg = argv[i];
        if (arg == "-h" || arg == "--help") {
            whisper_print_usage(argc, argv, params);
            exit(0);
        }
        else if (arg == "-t"   || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
        else if (arg == "-pms" || arg == "--prompt-ms")     { params.prompt_ms     = std::stoi(argv[++i]); }
        else if (arg == "-cms" || arg == "--command-ms")    { params.command_ms    = std::stoi(argv[++i]); }
        else if (arg == "-c"   || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
        else if (arg == "-mt"  || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
        else if (arg == "-vth" || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
        else if (arg == "-fth" || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
        else if (arg == "-cmd" || arg == "--commands")      { params.commands      = argv[++i]; }
        else if (arg == "-p"   || arg == "--prompt")        { params.prompt        = argv[++i]; }
        else if (arg == "-ctx" || arg == "--context")       { params.context       = argv[++i]; }
        else if (                 arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
            exit(0);
        }
    }
    return true;
 }
 std::unique_ptr<WChess> g_wchess;
 void set_moves(const std::string & moves) {
    if (!moves.empty()) fprintf(stdout, "%s", g_wchess->stringify_board().c_str());
 }
 audio_async g_audio(30*1000);
 void get_audio(int ms, std::vector<float> & pcmf32_cur) {
    g_audio.get(ms, pcmf32_cur);
 }
 void clear_audio() {
    g_audio.clear();
 }
 int main(int argc, char ** argv) {
    whisper_params params;
    if (whisper_params_parse(argc, argv, params) == false) {
        return 1;
    }
    if (whisper_lang_id(params.language.c_str()) == -1) {
        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
        whisper_print_usage(argc, argv, params);
        exit(0);
    }
    // whisper init
    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
    // init audio
    if (!g_audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
        fprintf(stderr, "%s: audio.init() failed!\n", __func__);
        return 1;
    }
    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
    wparams.print_progress   = false;
    wparams.print_special    = params.print_special;
    wparams.print_realtime   = false;
    wparams.print_timestamps = !params.no_timestamps;
    wparams.translate        = params.translate;
    wparams.no_context       = true;
    wparams.no_timestamps    = params.no_timestamps;
    wparams.single_segment   = true;
    wparams.max_tokens       = params.max_tokens;
    wparams.language         = params.language.c_str();
    wparams.n_threads        = params.n_threads;
    wparams.audio_ctx = params.audio_ctx;
    wparams.speed_up  = params.speed_up;
    wparams.temperature     = 0.4f;
    wparams.temperature_inc = 1.0f;
    wparams.greedy.best_of  = 5;
    wparams.beam_search.beam_size = 5;
    wparams.initial_prompt = params.context.data();
    g_audio.resume();
    // wait for 1 second to avoid any buffered noise
    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
    g_audio.clear();
    WChess::callbacks cb;
    cb.check_running = sdl_poll_events;
    cb.get_audio = get_audio;
    cb.set_moves = set_moves;
    cb.clear_audio = clear_audio;
    WChess::settings s;
    s.vad_ms = 2000;
    s.prompt_ms = params.prompt_ms;
    s.command_ms = params.command_ms;
    s.vad_thold = params.vad_thold;
    s.freq_thold = params.freq_thold;
    s.print_energy = params.print_energy;
    g_wchess.reset(new WChess(ctx, wparams, cb, s));
    set_moves("start");
    g_wchess->run();
    g_audio.pause();
    whisper_print_timings(ctx);
    whisper_free(ctx);
    return 0;
 }
--- a/examples/wchess/wchess.wasm/CMakeLists.txt
+++ b/examples/wchess/wchess.wasm/CMakeLists.txt
@ -0,0 +1,51 @@
 set(TARGET wchess.wasm)
 add_executable(${TARGET}
    wchess.wasm.cpp
    )
 include(DefaultTargetOptions)
 target_link_libraries(${TARGET} PRIVATE
    common
    libwchess
    )
 unset(EXTRA_FLAGS)
 if (WHISPER_WASM_SINGLE_FILE)
    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
    message(STATUS "Embedding WASM inside chess.js")
    add_custom_command(
        TARGET ${TARGET} POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy
        ${CMAKE_BINARY_DIR}/bin/${TARGET}.js
        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/chess.js
        )
 endif()
 set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
    --bind \
    -s USE_PTHREADS=1 \
    -s PTHREAD_POOL_SIZE=8 \
    -s INITIAL_MEMORY=1024MB \
    -s TOTAL_MEMORY=1024MB \
    -s FORCE_FILESYSTEM=1 \
    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
    ${EXTRA_FLAGS} \
    ")
 add_custom_command(
        TARGET ${TARGET} POST_BUILD
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        ${CMAKE_CURRENT_SOURCE_DIR}/chessboardjs-1.0.0
        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/
        COMMAND ${CMAKE_COMMAND} -E copy
        ${CMAKE_CURRENT_SOURCE_DIR}/jquery-3.7.1.min.js
        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/
    )
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
 configure_file(${CMAKE_SOURCE_DIR}/examples/helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/helpers.js @ONLY)
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/CHANGELOG.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/CHANGELOG.md
@ -0,0 +1,32 @@
 # chessboard.js Change Log
 All notable changes to this project will be documented in this file.
 ## [1.0.0] - 2019-06-11
 - Orientation methods now return current orientation. [Issue #64]
 - Drop support for IE8
 - Do not check for `window.JSON` (Error #1004)
 - Rename `ChessBoard` to `Chessboard` (`ChessBoard` is still supported, however)
 - id query selectors are now supported as the first argument to `Chessboard()`
 - Remove Error #1002
 - Format code according to [StandardJS]
 - Bump minimum jQuery version to 1.8.3
 - Throttle piece drag functions
 ## [0.3.0] - 2013-08-10
 - Added `appearSpeed` animation config property
 - Added `onSnapbackEnd` event
 - Added `onMoveEnd` event
 ## [0.2.0] - 2013-08-05
 - Added `onMouseoverSquare` and `onMouseoutSquare` events
 - Added `onSnapEnd` event
 - Added square code as CSS class on the squares
 - Added [chess.js] integration examples
 ## [0.1.0] - 2013-05-21
 - Initial release
 [chess.js]:https://github.com/jhlywa/chess.js
 [Issue #64]:https://github.com/oakmac/chessboardjs/issues/64
 [StandardJS]:https://standardjs.com/
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/LICENSE.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/LICENSE.md
@ -0,0 +1,20 @@
 Copyright 2019 Chris Oakman
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be
 included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/README.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/README.md
@ -0,0 +1,82 @@
 # chessboard.js
 chessboard.js is a JavaScript chessboard component. It depends on [jQuery].
 Please see [chessboardjs.com] for documentation and examples.
 ## What is chessboard.js?
 chessboard.js is a JavaScript chessboard component with a flexible "just a
 board" API that
 chessboard.js is a standalone JavaScript Chess Board. It is designed to be "just
 a board" and expose a powerful API so that it can be used in different ways.
 Here's a non-exhaustive list of things you can do with chessboard.js:
 - Use chessboard.js to show game positions alongside your expert commentary.
 - Use chessboard.js to have a tactics website where users have to guess the best
  move.
 - Integrate chessboard.js and [chess.js] with a PGN database and allow people to
  search and playback games (see [Example 5000])
 - Build a chess server and have users play their games out using the
  chessboard.js board.
 chessboard.js is flexible enough to handle any of these situations with relative
 ease.
 ## What can chessboard.js **not** do?
 The scope of chessboard.js is limited to "just a board." This is intentional and
 makes chessboard.js flexible for handling a multitude of chess-related problems.
 This is a common source of confusion for new users. [remove?]
 Specifically, chessboard.js does not understand anything about how the game of
 chess is played: how a knight moves, who's turn is it, is White in check?, etc.
 Fortunately, the powerful [chess.js] library deals with exactly this sort of
 problem domain and plays nicely with chessboard.js's flexible API. Some examples
 of chessboard.js combined with chess.js: 5000, 5001, 5002
 Please see the powerful [chess.js] library for an API to deal with these sorts
 of questions.
 This logic is distinct from the logic of the board. Please see the powerful
 [chess.js] library for this aspect of your application.
 Here is a list of things that chessboard.js is **not**:
 - A chess engine
 - A legal move validator
 - A PGN parser
 chessboard.js is designed to work well with any of those things, but the idea
 behind chessboard.js is that the logic that controls the board should be
 independent of those other problems.
 ## Docs and Examples
 - Docs - <http://chessboardjs.com/docs>
 - Examples - <http://chessboardjs.com/examples>
 ## Developer Tools
 ```sh
 # create a build in the build/ directory
 npm run build
 # re-build the website
 npm run website
 ```
 ## License
 [MIT License](LICENSE.md)
 [jQuery]:https://jquery.com/
 [chessboardjs.com]:http://chessboardjs.com
 [chess.js]:https://github.com/jhlywa/chess.js
 [Example 5000]:http://chessboardjs.com/examples#5000
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
@ -0,0 +1,54 @@
 /*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
 .clearfix-7da63 {
  clear: both;
 }
 .board-b72b1 {
  border: 2px solid #404040;
  box-sizing: content-box;
 }
 .square-55d63 {
  float: left;
  position: relative;
  /* disable any native browser highlighting */
  -webkit-touch-callout: none;
    -webkit-user-select: none;
     -khtml-user-select: none;
       -moz-user-select: none;
        -ms-user-select: none;
            user-select: none;
 }
 .white-1e1d7 {
  background-color: #f0d9b5;
  color: #b58863;
 }
 .black-3c85d {
  background-color: #b58863;
  color: #f0d9b5;
 }
 .highlight1-32417, .highlight2-9c5d2 {
  box-shadow: inset 0 0 3px 3px yellow;
 }
 .notation-322f9 {
  cursor: default;
  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
  font-size: 14px;
  position: absolute;
 }
 .alpha-d2270 {
  bottom: 1px;
  right: 3px;
 }
 .numeric-fc462 {
  top: 2px;
  left: 2px;
 }
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
@ -0,0 +1,2 @@
 /*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
 .clearfix-7da63{clear:both}.board-b72b1{border:2px solid #404040;box-sizing:content-box}.square-55d63{float:left;position:relative;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.white-1e1d7{background-color:#f0d9b5;color:#b58863}.black-3c85d{background-color:#b58863;color:#f0d9b5}.highlight1-32417,.highlight2-9c5d2{box-shadow:inset 0 0 3px 3px #ff0}.notation-322f9{cursor:default;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;position:absolute}.alpha-d2270{bottom:1px;right:3px}.numeric-fc462{top:2px;left:2px}
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/package.json
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/package.json
@ -0,0 +1,29 @@
 {
  "author": "Chris Oakman <chris@oakmac.com> (http://chrisoakman.com/)",
  "name": "@chrisoakman/chessboardjs",
  "description": "JavaScript chessboard widget",
  "homepage": "https://chessboardjs.com",
  "license": "MIT",
  "version": "1.0.0",
  "repository": {
    "type": "git",
    "url": "git://github.com/oakmac/chessboardjs.git"
  },
  "files": ["dist/"],
  "dependencies": {
    "jquery": ">=3.4.1"
  },
  "devDependencies": {
    "csso": "3.5.1",
    "fs-plus": "3.1.1",
    "kidif": "1.1.0",
    "mustache": "2.3.0",
    "standard": "10.0.2",
    "uglify-js": "3.6.0"
  },
  "scripts": {
    "build": "standard lib/chessboard.js && node scripts/build.js",
    "standard": "standard --fix lib/*.js website/js/*.js",
    "website": "node scripts/website.js"
  }
 }
--- a/examples/wchess/wchess.wasm/index-tmpl.html
+++ b/examples/wchess/wchess.wasm/index-tmpl.html
@ -0,0 +1,376 @@
 <!doctype html>
 <html lang="en-us">
    <head>
        <title>wchess : Voice assistant example using Whisper + WebAssembly</title>
        <style>
            #output {
                width: 100%;
                height: 100%;
                margin: 0 auto;
                margin-top: 10px;
                border-left: 0px;
                border-right: 0px;
                padding-left: 0px;
                padding-right: 0px;
                display: block;
                background-color: black;
                color: white;
                font-size: 10px;
                font-family: 'Lucida Console', Monaco, monospace;
                outline: none;
                white-space: pre;
                overflow-wrap: normal;
                overflow-x: scroll;
            }
        </style>
        <link rel="stylesheet" href="css/chessboard-1.0.0.min.css" integrity="sha384-q94+BZtLrkL1/ohfjR8c6L+A6qzNH9R2hBLwyoAfu3i/WCvQjzL2RQJ3uNHDISdU" crossorigin="anonymous">
    </head>
    <body onload="loadWhisper()">
        <div id="main-container">
            <b>wchess : Voice assistant example using Whisper + WebAssembly</b>
            <br><br>
            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">GitHub</a>.
            <br><br>
            <b>More examples:</b>
                <a href="https://whisper.ggerganov.com/">main</a> |
                <a href="https://whisper.ggerganov.com/bench">bench</a> |
                <a href="https://whisper.ggerganov.com/stream">stream</a> |
                <a href="https://whisper.ggerganov.com/command">command</a> |
                <a href="https://whisper.ggerganov.com/talk">talk</a> |
            <br><br>
            <hr>
            <div id="model-whisper">
                Whisper model: <span id="model-whisper-status"></span>
                <span id="fetch-whisper-progress"></span>
                <button id="clear" onclick="clearCache()">Clear Cache</button>
                <!--
                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
                -->
            </div>
            <br>
            <div id="myBoard" style="width: 400px"></div>
            <script src="js/jquery-3.7.1.min.js"></script>
            <script src="js/chessboard-1.0.0.min.js"></script>
            <script>
                var board = Chessboard('myBoard', 'start')
            </script>
            <br>
            <div id="input">
                <button id="toggler" disabled>Hold</button>
            </div>
            <br>
            <div id="state">
                Status: <b><span id="state-status">not started</span></b>
                <pre id="state-moves">[The moves will be displayed here]</pre>
            </div>
            <hr>
            Debug output:
            <textarea id="output" rows="20"></textarea>
            <br>
            <b>Troubleshooting</b>
            <br><br>
            The page does some heavy computations, so make sure:
            <ul>
                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
            </ul>
            <div class="cell-version">
                <span>
                    |
                    Build time: <span class="nav-link">@GIT_DATE@</span> |
                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">Source Code</a> |
                </span>
            </div>
        </div>
        <script type="text/javascript" src="js/helpers.js"></script>
        <script type='text/javascript'>
            // web audio context
            var context = null;
            // the command instance
            var instance = null;
            // model name
            var model_whisper = null;
            var Module = {
                print: printTextarea,
                printErr: printTextarea,
                setStatus: function(text) {
                    printTextarea('js: ' + text);
                },
                monitorRunDependencies: function(left) {
                },
                preRun: function() {
                    printTextarea('js: Preparing ...');
                },
                postRun: function() {
                    printTextarea('js: Module initialized successfully!');
                    instance = Module.init('whisper.bin');
                    if (instance) {
                        printTextarea("js: whisper initialized, instance: " + instance);
                    }
                    else {
                        printTextarea("js: failed to initialize whisper");
                    }
                }
            };
            //
            // fetch models
            //
            let dbVersion = 1
            let dbName    = 'whisper.ggerganov.com';
            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
            function storeFS(fname, buf) {
                // write to WASM file using FS_createDataFile
                // if the file exists, delete it
                try {
                    Module.FS_unlink(fname);
                } catch (e) {
                    // ignore
                }
                Module.FS_createDataFile("/", fname, buf, true, true);
                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
                if (model_whisper != null) {
                    document.getElementById('toggler').disabled = false;
                }
            }
            function loadWhisper() {
                // let urls = {
                //     'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
                //     'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
                //     'tiny-en-q5_1':  'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin',
                //     'base-en-q5_1':  'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin',
                // };
                // let sizes = {
                //     'tiny.en': 75,
                //     'base.en': 142,
                //     'tiny-en-q5_1':   31,
                //     'base-en-q5_1':   57,
                // };
                let url     = 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin';
                let dst     = 'whisper.bin';
                let size_mb = 75;
                model_whisper = 'tiny.en';
                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model_whisper + '" ... ';
                cbProgress = function(p) {
                    let el = document.getElementById('fetch-whisper-progress');
                    el.innerHTML = Math.round(100*p) + '%';
                };
                cbCancel = function() {
                    var el;
                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
                };
                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
            }
            //
            // microphone
            //
            const kSampleRate = 16000;
            const kRestartRecording_s = 120;
            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
            var mediaRecorder = null;
            var doRecording = false;
            var startTime = 0;
            window.AudioContext = window.AudioContext || window.webkitAudioContext;
            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            function stopRecording() {
                Module.set_status("paused");
                mediaRecorder.stop();
            }
            function startRecording() {
                if (!context) {
                    context = new AudioContext({
                        sampleRate: kSampleRate,
                        channelCount: 1,
                        echoCancellation: false,
                        autoGainControl:  true,
                        noiseSuppression: true,
                    });
                }
                startTime = Date.now();
                var chunks = [];
                var stream = null;
                navigator.mediaDevices.getUserMedia({audio: true, video: false})
                    .then(function(s) {
                        stream = s;
                        mediaRecorder = new MediaRecorder(stream);
                        mediaRecorder.ondataavailable = function(e) {
                            chunks.push(e.data);
                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
                            var reader = new FileReader();
                            reader.onload = function(event) {
                                var buf = new Uint8Array(reader.result);
                                if (!context) {
                                    return;
                                }
                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
                                    var source = offlineContext.createBufferSource();
                                    source.buffer = audioBuffer;
                                    source.connect(offlineContext.destination);
                                    source.start(0);
                                    offlineContext.startRendering().then(function(renderedBuffer) {
                                        let audio = renderedBuffer.getChannelData(0);
                                        if (instance) {
                                            printTextarea('js: number of samples: ' + audio.length);
                                            Module.set_audio(instance, audio);
                                        }
                                    });
                                    mediaRecorder = null;
                                    context = null;
                                });
                            }
                            reader.readAsArrayBuffer(blob);
                        };
                        mediaRecorder.onstop = function(e) {
                            stream.getTracks().forEach(function(track) {
                                track.stop();
                            });
                        };
                        mediaRecorder.start();
                    })
                    .catch(function(err) {
                        printTextarea('js: error getting audio stream: ' + err);
                    });
            }
            //
            // main
            //
            var nLines = 0;
            var intervalUpdate = null;
            var movesAll = '';
            document.body.addEventListener('keydown', function(event) {
                if (event.keyCode === 32) {
                    document.getElementById('toggler').innerText = "Release";
                    onStart();
                }
            }, true);
            document.body.addEventListener('keyup', function(event) {
                if (event.keyCode === 32) {
                    document.getElementById('toggler').innerText = "Hold";
                    onStop();
                }
            }, true);
            document.getElementById('toggler').addEventListener('mousedown', function(event) {
                this.innerText = "Release";
                onStart();
            }, true);
            document.getElementById('toggler').addEventListener('mouseup', function(event) {
                this.innerText = "Hold";
                onStop();
            }, true);
            function onStart() {
                if (!instance) {
                    return;
                }
                startRecording();
            }
            function onStop() {
                printTextarea('js: stopping recording ...');
                stopRecording();
                var interval = setInterval(function() {
                    var moves = Module.get_moves();
                    if (moves != null && moves.length > 1) {
                        clearInterval(interval);
                        for (move of moves.split(' ')) {
                            board.move(move);
                        }
                        movesAll += moves + '<br>';
                        nLines++;
                        // if more than 10 lines, remove the first line
                        if (nLines > 10) {
                            var i = movesAll.indexOf('<br>');
                            if (i > 0) {
                                movesAll = movesAll.substring(i + 4);
                                nLines--;
                            }
                        }
                        document.getElementById('state-status').innerHTML = Module.get_status();
                        document.getElementById('state-moves').innerHTML = movesAll;
                    }
                }, 100);
            }
        </script>
        <script type="text/javascript" src="js/chess.js"></script>
    </body>
 </html>
--- a/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
+++ b/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
--- a/examples/wchess/wchess.wasm/wchess.wasm.cpp
+++ b/examples/wchess/wchess.wasm/wchess.wasm.cpp
@ -0,0 +1,173 @@
 #include <WChess.h>
 #include <emscripten/bind.h>
 #include <atomic>
 #include <thread>
 constexpr int N_THREAD = 8;
 std::vector<struct whisper_context *> g_contexts(4, nullptr);
 std::mutex  g_mutex;
 std::thread g_worker;
 std::atomic<bool> g_running(false);
 std::string g_status        = "";
 std::string g_status_forced = "";
 std::string g_moves         = "";
 std::vector<float> g_pcmf32;
 void set_status(const std::string & status) {
    std::lock_guard<std::mutex> lock(g_mutex);
    g_status = status;
 }
 void set_moves(const std::string & moves) {
    std::lock_guard<std::mutex> lock(g_mutex);
    g_moves = moves;
 }
 void get_audio(int /* ms */, std::vector<float> & audio) {
    std::lock_guard<std::mutex> lock(g_mutex);
    audio = g_pcmf32;
 }
 bool check_running() {
    return g_running;
 }
 void clear_audio() {
    std::lock_guard<std::mutex> lock(g_mutex);
    g_pcmf32.clear();
 }
 void wchess_main(size_t i) {
    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
    wparams.offset_ms        = 0;
    wparams.translate        = false;
    wparams.no_context       = true;
    wparams.single_segment   = true;
    wparams.print_realtime   = false;
    wparams.print_progress   = false;
    wparams.print_timestamps = true;
    wparams.print_special    = false;
    wparams.no_timestamps    = true;
    wparams.max_tokens       = 32;
    wparams.audio_ctx        = 768; // partial encoder context for better performance
    wparams.temperature     = 0.0f;
    wparams.temperature_inc = 2.0f;
    wparams.greedy.best_of  = 1;
    wparams.beam_search.beam_size = 1;
    wparams.language         = "en";
    wparams.grammar_penalty = 100.0;
    wparams.initial_prompt = "bishop to c3, rook to d4, knight to e5, d4 d5, knight to c3, c3, queen to d4, king b1, pawn to a1, bishop to b2, knight to c3,";
    printf("command: using %d threads\n", wparams.n_threads);
    WChess::callbacks cb;
    cb.set_status = set_status;
    cb.check_running = check_running;
    cb.get_audio = get_audio;
    cb.set_moves = set_moves;
    cb.clear_audio = clear_audio;
    WChess(g_contexts[i], wparams, cb, {}).run();
    if (i < g_contexts.size()) {
        whisper_free(g_contexts[i]);
        g_contexts[i] = nullptr;
    }
 }
 EMSCRIPTEN_BINDINGS(command) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        for (size_t i = 0; i < g_contexts.size(); ++i) {
            if (g_contexts[i] == nullptr) {
                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
                if (g_contexts[i] != nullptr) {
                    g_running = true;
                    if (g_worker.joinable()) {
                        g_worker.join();
                    }
                    g_worker = std::thread([i]() {
                        wchess_main(i);
                    });
                    return i + 1;
                } else {
                    return (size_t) 0;
                }
            }
        }
        return (size_t) 0;
    }));
    emscripten::function("free", emscripten::optional_override([](size_t /* index */) {
        if (g_running) {
            g_running = false;
        }
    }));
    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
        --index;
        if (index >= g_contexts.size()) {
            return -1;
        }
        if (g_contexts[index] == nullptr) {
            return -2;
        }
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            const int n = audio["length"].as<int>();
            emscripten::val heap = emscripten::val::module_property("HEAPU8");
            emscripten::val memory = heap["buffer"];
            g_pcmf32.resize(n);
            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
            memoryView.call<void>("set", audio);
        }
        return 0;
    }));
    emscripten::function("get_moves", emscripten::optional_override([]() {
        std::string moves;
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            moves = std::move(g_moves);
        }
        return moves;
    }));
    emscripten::function("get_status", emscripten::optional_override([]() {
        std::string status;
        {
            std::lock_guard<std::mutex> lock(g_mutex);
            status = g_status_forced.empty() ? g_status : g_status_forced;
        }
        return status;
    }));
    emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
        std::lock_guard<std::mutex> lock(g_mutex);
        g_status_forced = status;
    }));
 }
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@ -446,12 +446,14 @@ static ggml_tallocr_t node_tallocr(ggml_gallocr_t galloc, struct ggml_tensor * n
    return galloc->hash_allocs[ggml_hash_find_or_insert(galloc->hash_set, node)];
 }
-static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view) {
+static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view, bool update_backend) {
    ggml_tallocr_t alloc = node_tallocr(galloc, view);
    //printf("init_view: %s from src %s\n", view->name, view->view_src->name);
    GGML_ASSERT(view->view_src != NULL && view->view_src->data != NULL);
    if (update_backend) {
        view->backend = view->view_src->backend;
    }
    view->buffer  = view->view_src->buffer;
    view->data    = (char *)view->view_src->data + view->view_offs;
@ -469,7 +471,7 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
    if (node->data == NULL) {
        if (ggml_is_view(node)) {
-            init_view(galloc, node);
+            init_view(galloc, node, true);
        } else {
            // see if we can reuse a parent's buffer (inplace)
            if (ggml_op_can_inplace(node->op)) {
@ -499,15 +501,14 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
                                AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
                                node->view_src = view_src;
                                view_src_hn->n_views += 1;
-                                init_view(galloc, node);
+                                init_view(galloc, node, false);
                                return;
                            }
-                        }
+                        } else {
                        else {
                            AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
                            node->view_src = parent;
                            p_hn->n_views += 1;
-                            init_view(galloc, node);
+                            init_view(galloc, node, false);
                            return;
                        }
                    }
@ -537,7 +538,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
            hash_get(galloc, view_src)->n_views += 1;
            if (node->buffer == NULL && node->data != NULL) {
                // view of a pre-allocated tensor, didn't call init_view() yet
-                init_view(galloc, node);
+                init_view(galloc, node, true);
            }
        }
@ -548,7 +549,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
            }
            hash_get(galloc, parent)->n_children += 1;
            if (ggml_is_view(parent) && parent->buffer == NULL && parent->data != NULL) {
-                init_view(galloc, parent);
+                init_view(galloc, parent, true);
            }
        }
   }
@ -663,7 +664,7 @@ size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, st
    return max_size;
 }
-void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_alloct) {
+void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_talloc) {
    const size_t hash_size = hash_set.size;
    GGML_ASSERT(hash_size >= (size_t)(graph->n_nodes + graph->n_leafs));
@ -686,7 +687,7 @@ void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * grap
    // reset hash values
    memset(galloc->hash_values, 0, sizeof(struct hash_node) * hash_size);
-    galloc->hash_allocs = hash_node_alloct;
+    galloc->hash_allocs = hash_node_talloc;
    ggml_tallocr_alloc_graph_impl(galloc, graph);
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -6142,6 +6142,9 @@ inline void ggml_cuda_op_add(
    const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
    const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
    GGML_ASSERT(ggml_is_contiguous(src0));
    GGML_ASSERT(ggml_is_contiguous(src1));
    GGML_ASSERT(src1->type == GGML_TYPE_F32);
    const int64_t ne10 = src1->ne[0];
--- a/ggml-quants.c
+++ b/ggml-quants.c
@ -1368,7 +1368,12 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
    float max = x[0];
    float sum_w = weights[0];
    float sum_x = sum_w * x[0];
 #ifdef HAVE_BUGGY_APPLE_LINKER
    // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
    for (volatile int i = 1; i < n; ++i) {
 #else
    for (int i = 1; i < n; ++i) {
 #endif
        if (x[i] < min) min = x[i];
        if (x[i] > max) max = x[i];
        float w = weights[i];
--- a/ggml.c
+++ b/ggml.c
@ -5024,8 +5024,13 @@ struct ggml_tensor * ggml_rope_back(
        int                   n_dims,
        int                   mode,
        int                   n_ctx,
        int                   n_orig_ctx,
        float                 freq_base,
        float                 freq_scale,
        float                 ext_factor,
        float                 attn_factor,
        float                 beta_fast,
        float                 beta_slow,
        float                 xpos_base,
        bool                  xpos_down) {
    GGML_ASSERT(ggml_is_vector(b));
@ -5042,11 +5047,15 @@ struct ggml_tensor * ggml_rope_back(
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
-    int32_t params[8] = { /*n_past*/ 0, n_dims, mode, n_ctx };
+    int32_t params[13] = { /*n_past*/ 0, n_dims, mode, n_ctx, n_orig_ctx };
-    memcpy(params + 4, &freq_base,  sizeof(float));
+    memcpy(params +  5, &freq_base,    sizeof(float));
-    memcpy(params + 5, &freq_scale, sizeof(float));
+    memcpy(params +  6, &freq_scale,   sizeof(float));
-    memcpy(params + 6, &xpos_base,  sizeof(float));
+    memcpy(params +  7, &ext_factor,   sizeof(float));
-    memcpy(params + 7, &xpos_down,  sizeof(bool));
+    memcpy(params +  8, &attn_factor,  sizeof(float));
    memcpy(params +  9, &beta_fast,    sizeof(float));
    memcpy(params + 10, &beta_slow,    sizeof(float));
    memcpy(params + 11, &xpos_base,    sizeof(float));
    memcpy(params + 12, &xpos_down,    sizeof(bool));
    ggml_set_op_params(result, params, sizeof(params));
    result->op   = GGML_OP_ROPE_BACK;
@ -9376,7 +9385,6 @@ static bool ggml_compute_forward_mul_mat_use_blas(
 }
 #endif
 static void ggml_compute_forward_mul_mat(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
@ -10946,7 +10954,8 @@ static void ggml_compute_forward_rope_f32(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
        const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        struct ggml_tensor * dst,
        const bool forward) {
    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }
@ -11005,6 +11014,11 @@ static void ggml_compute_forward_rope_f32(
    const bool is_neox = mode & 2;
    const bool is_glm  = mode & 4;
    // backward process uses inverse rotation by cos and sin.
    // cos and sin build a rotation matrix, where the inverse is the transpose.
    // this essentially just switches the sign of sin.
    const float sin_sign = forward ? 1.0f : -1.0f;
    const int32_t * pos = (const int32_t *) src1->data;
    for (int64_t i3 = 0; i3 < ne3; i3++) {
@ -11021,9 +11035,9 @@ static void ggml_compute_forward_rope_f32(
                    float block_theta = MAX(p - (n_ctx - 2), 0);
                    for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
                        const float cos_theta = cosf(theta_base);
-                        const float sin_theta = sinf(theta_base);
+                        const float sin_theta = sinf(theta_base) * sin_sign;
                        const float cos_block_theta = cosf(block_theta);
-                        const float sin_block_theta = sinf(block_theta);
+                        const float sin_block_theta = sinf(block_theta) * sin_sign;
                        theta_base *= theta_scale;
                        block_theta *= theta_scale;
@ -11047,6 +11061,7 @@ static void ggml_compute_forward_rope_f32(
                        rope_yarn(
                            theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
                        );
                        sin_theta *= sin_sign;
                        // zeta scaling for xPos only:
                        float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
@ -11077,6 +11092,7 @@ static void ggml_compute_forward_rope_f32(
                                theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
                                &cos_theta, &sin_theta
                            );
                            sin_theta *= sin_sign;
                            theta_base *= theta_scale;
@ -11102,7 +11118,8 @@ static void ggml_compute_forward_rope_f16(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
        const struct ggml_tensor * src1,
-        struct ggml_tensor * dst) {
+        struct ggml_tensor * dst,
        const bool forward) {
    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }
@ -11154,6 +11171,11 @@ static void ggml_compute_forward_rope_f16(
    const bool is_neox = mode & 2;
    const bool is_glm  = mode & 4;
    // backward process uses inverse rotation by cos and sin.
    // cos and sin build a rotation matrix, where the inverse is the transpose.
    // this essentially just switches the sign of sin.
    const float sin_sign = forward ? 1.0f : -1.0f;
    const int32_t * pos = (const int32_t *) src1->data;
    for (int64_t i3 = 0; i3 < ne3; i3++) {
@ -11170,9 +11192,9 @@ static void ggml_compute_forward_rope_f16(
                    float block_theta = MAX(p - (n_ctx - 2), 0);
                    for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
                        const float cos_theta = cosf(theta_base);
-                        const float sin_theta = sinf(theta_base);
+                        const float sin_theta = sinf(theta_base) * sin_sign;
                        const float cos_block_theta = cosf(block_theta);
-                        const float sin_block_theta = sinf(block_theta);
+                        const float sin_block_theta = sinf(block_theta) * sin_sign;
                        theta_base *= theta_scale;
                        block_theta *= theta_scale;
@ -11196,6 +11218,7 @@ static void ggml_compute_forward_rope_f16(
                        rope_yarn(
                            theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
                        );
                        sin_theta *= sin_sign;
                        theta_base *= theta_scale;
@ -11222,6 +11245,7 @@ static void ggml_compute_forward_rope_f16(
                                theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
                                &cos_theta, &sin_theta
                            );
                            sin_theta *= sin_sign;
                            theta_base *= theta_scale;
@ -11251,11 +11275,11 @@ static void ggml_compute_forward_rope(
    switch (src0->type) {
        case GGML_TYPE_F16:
            {
-                ggml_compute_forward_rope_f16(params, src0, src1, dst);
+                ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
            } break;
        case GGML_TYPE_F32:
            {
-                ggml_compute_forward_rope_f32(params, src0, src1, dst);
+                ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
            } break;
        default:
            {
@ -11266,216 +11290,6 @@ static void ggml_compute_forward_rope(
 // ggml_compute_forward_rope_back
 static void ggml_compute_forward_rope_back_f32(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
        const struct ggml_tensor * src1,
        struct ggml_tensor * dst) {
    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }
    // y = rope(x, src1)
    // dx = rope_back(dy, src1)
    // src0 is dy, src1 contains options
    float freq_base;
    float freq_scale;
    // these two only relevant for xPos RoPE:
    float xpos_base;
    bool xpos_down;
    //const int n_past = ((int32_t *) dst->op_params)[0];
    const int n_dims = ((int32_t *) dst->op_params)[1];
    const int mode   = ((int32_t *) dst->op_params)[2];
    const int n_ctx  = ((int32_t *) dst->op_params)[3]; UNUSED(n_ctx);
    memcpy(&freq_base,  (int32_t *) dst->op_params + 4, sizeof(float));
    memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
    memcpy(&xpos_base,  (int32_t *) dst->op_params + 6, sizeof(float));
    memcpy(&xpos_down,  (int32_t *) dst->op_params + 7, sizeof(bool));
    GGML_TENSOR_UNARY_OP_LOCALS
    //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
    //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
    assert(nb0 == sizeof(float));
    const int ith = params->ith;
    const int nth = params->nth;
    const int nr = ggml_nrows(dst);
    // rows per thread
    const int dr = (nr + nth - 1)/nth;
    // row range for this thread
    const int ir0 = dr*ith;
    const int ir1 = MIN(ir0 + dr, nr);
    // row index used to determine which thread to use
    int ir = 0;
    const float theta_scale = powf(freq_base, -2.0f/n_dims);
    const bool is_neox = mode & 2;
    const int32_t * pos = (const int32_t *) src1->data;
    for (int64_t i3 = 0; i3 < ne3; i3++) {
        for (int64_t i2 = 0; i2 < ne2; i2++) {
            const int64_t p = pos[i2];
            for (int64_t i1 = 0; i1 < ne1; i1++) {
                if (ir++ < ir0) continue;
                if (ir   > ir1) break;
                float theta_base = freq_scale * (float)p;
                if (!is_neox) {
                    for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
                        const float cos_theta = cosf(theta_base);
                        const float sin_theta = sinf(theta_base);
                        // zeta scaling for xPos only:
                        float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
                        if (xpos_down) zeta = 1.0f / zeta;
                        theta_base *= theta_scale;
                        const float * const dy  = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
                              float *       dx  = (float *)((char *)  dst->data + i3*nb3  + i2*nb2  + i1*nb1  + i0*nb0);
                        const float dy0 = dy[0];
                        const float dy1 = dy[1];
                        dx[0] =   dy0*cos_theta*zeta + dy1*sin_theta*zeta;
                        dx[1] = - dy0*sin_theta*zeta + dy1*cos_theta*zeta;
                    }
                } else {
                    for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
                        for (int64_t ic = 0; ic < n_dims; ic += 2) {
                            const float cos_theta = cosf(theta_base);
                            const float sin_theta = sinf(theta_base);
                            theta_base *= theta_scale;
                            const int64_t i0 = ib*n_dims + ic/2;
                            const float * const dy  = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
                                  float *       dx  = (float *)((char *)  dst->data + i3*nb3  + i2*nb2  + i1*nb1  + i0*nb0);
                            const float dy0 = dy[0];
                            const float dy1 = dy[n_dims/2];
                            dx[0]        =   dy0*cos_theta + dy1*sin_theta;
                            dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta;
                        }
                    }
                }
            }
        }
    }
 }
 static void ggml_compute_forward_rope_back_f16(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
        const struct ggml_tensor * src1,
        struct ggml_tensor * dst) {
    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }
    // y = rope(x, src1)
    // dx = rope_back(dy, src1)
    // src0 is dy, src1 contains options
    //const int n_past = ((int32_t *) dst->op_params)[0];
    const int n_dims = ((int32_t *) dst->op_params)[1];
    const int mode   = ((int32_t *) dst->op_params)[2];
    GGML_TENSOR_UNARY_OP_LOCALS
    //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
    //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
    assert(nb0 == sizeof(ggml_fp16_t));
    const int ith = params->ith;
    const int nth = params->nth;
    const int nr = ggml_nrows(dst);
    // rows per thread
    const int dr = (nr + nth - 1)/nth;
    // row range for this thread
    const int ir0 = dr*ith;
    const int ir1 = MIN(ir0 + dr, nr);
    // row index used to determine which thread to use
    int ir = 0;
    const float theta_scale = powf(10000.0, -2.0f/n_dims);
    const bool is_neox = mode & 2;
    const int32_t * pos = (const int32_t *) src1->data;
    for (int64_t i3 = 0; i3 < ne3; i3++) {
        for (int64_t i2 = 0; i2 < ne2; i2++) {
            const int64_t p = pos[i2];
            for (int64_t i1 = 0; i1 < ne1; i1++) {
                if (ir++ < ir0) continue;
                if (ir   > ir1) break;
                float theta_base = (float)p;
                if (!is_neox) {
                    for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
                        const float cos_theta = cosf(theta_base);
                        const float sin_theta = sinf(theta_base);
                        theta_base *= theta_scale;
                        const ggml_fp16_t * const dy  = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
                              ggml_fp16_t *       dx  = (ggml_fp16_t *)((char *)  dst->data + i3*nb3  + i2*nb2  + i1*nb1  + i0*nb0);
                        const float dy0 = GGML_FP16_TO_FP32(dy[0]);
                        const float dy1 = GGML_FP16_TO_FP32(dy[1]);
                        dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
                        dx[1] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
                    }
                } else {
                    for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
                        for (int64_t ic = 0; ic < n_dims; ic += 2) {
                            const float cos_theta = cosf(theta_base);
                            const float sin_theta = sinf(theta_base);
                            theta_base *= theta_scale;
                            const int64_t i0 = ib*n_dims + ic/2;
                            const ggml_fp16_t * const dy  = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
                                  ggml_fp16_t *       dx  = (ggml_fp16_t *)((char *)  dst->data + i3*nb3  + i2*nb2  + i1*nb1  + i0*nb0);
                            const float dy0 = GGML_FP16_TO_FP32(dy[0]);
                            const float dy1 = GGML_FP16_TO_FP32(dy[n_dims/2]);
                            dx[0]        = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
                            dx[n_dims/2] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
                        }
                    }
                }
            }
        }
    }
 }
 static void ggml_compute_forward_rope_back(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
@ -11484,11 +11298,11 @@ static void ggml_compute_forward_rope_back(
    switch (src0->type) {
        case GGML_TYPE_F16:
            {
-                ggml_compute_forward_rope_back_f16(params, src0, src1, dst);
+                ggml_compute_forward_rope_f16(params, src0, src1, dst, false);
            } break;
        case GGML_TYPE_F32:
            {
-                ggml_compute_forward_rope_back_f32(params, src0, src1, dst);
+                ggml_compute_forward_rope_f32(params, src0, src1, dst, false);
            } break;
        default:
            {
@ -14926,14 +14740,17 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                    const int n_dims     = ((int32_t *) tensor->op_params)[1];
                    const int mode       = ((int32_t *) tensor->op_params)[2];
                    const int n_ctx      = ((int32_t *) tensor->op_params)[3];
-                    float freq_base;
+                    const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
-                    float freq_scale;
+                    float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
-                    float xpos_base;
+
-                    bool  xpos_down;
+                    memcpy(&freq_base,   (int32_t *) tensor->op_params +  5, sizeof(float));
-                    memcpy(&freq_base,  (int32_t *) tensor->op_params + 4, sizeof(float));
+                    memcpy(&freq_scale,  (int32_t *) tensor->op_params +  6, sizeof(float));
-                    memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
+                    memcpy(&ext_factor,  (int32_t *) tensor->op_params +  7, sizeof(float));
-                    memcpy(&xpos_base,  (int32_t *) tensor->op_params + 6, sizeof(float));
+                    memcpy(&attn_factor, (int32_t *) tensor->op_params +  8, sizeof(float));
-                    memcpy(&xpos_down,  (int32_t *) tensor->op_params + 7, sizeof(bool));
+                    memcpy(&beta_fast,   (int32_t *) tensor->op_params +  9, sizeof(float));
                    memcpy(&beta_slow,   (int32_t *) tensor->op_params + 10, sizeof(float));
                    memcpy(&xpos_base,   (int32_t *) tensor->op_params + 11, sizeof(float));
                    memcpy(&xpos_down,   (int32_t *) tensor->op_params + 12, sizeof(bool));
                    src0->grad = ggml_add_or_set(ctx,
                            src0->grad,
@ -14943,8 +14760,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                                n_dims,
                                mode,
                                n_ctx,
                                n_orig_ctx,
                                freq_base,
                                freq_scale,
                                ext_factor,
                                attn_factor,
                                beta_fast,
                                beta_slow,
                                xpos_base,
                                xpos_down),
                            zero_table);
@ -14957,14 +14779,17 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                    const int n_dims     = ((int32_t *) tensor->op_params)[1];
                    const int mode       = ((int32_t *) tensor->op_params)[2];
                    const int n_ctx      = ((int32_t *) tensor->op_params)[3];
-                    float freq_base;
+                    const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
-                    float freq_scale;
+                    float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
-                    float xpos_base;
+
-                    bool  xpos_down;
+                    memcpy(&freq_base,   (int32_t *) tensor->op_params +  5, sizeof(float));
-                    memcpy(&freq_base,  (int32_t *) tensor->op_params + 4, sizeof(float));
+                    memcpy(&freq_scale,  (int32_t *) tensor->op_params +  6, sizeof(float));
-                    memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
+                    memcpy(&ext_factor,  (int32_t *) tensor->op_params +  7, sizeof(float));
-                    memcpy(&xpos_base,  (int32_t *) tensor->op_params + 6, sizeof(float));
+                    memcpy(&attn_factor, (int32_t *) tensor->op_params +  8, sizeof(float));
-                    memcpy(&xpos_down,  (int32_t *) tensor->op_params + 7, sizeof(bool));
+                    memcpy(&beta_fast,   (int32_t *) tensor->op_params +  9, sizeof(float));
                    memcpy(&beta_slow,   (int32_t *) tensor->op_params + 10, sizeof(float));
                    memcpy(&xpos_base,   (int32_t *) tensor->op_params + 11, sizeof(float));
                    memcpy(&xpos_down,   (int32_t *) tensor->op_params + 12, sizeof(bool));
                    src0->grad = ggml_add_or_set(ctx,
                            src0->grad,
@ -14973,14 +14798,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
                                src1,
                                n_dims,
                                mode,
                                0,
                                n_ctx,
                                n_orig_ctx,
                                freq_base,
                                freq_scale,
-                                0.0f,
+                                ext_factor,
-                                1.0f,
+                                attn_factor,
-                                0.0f,
+                                beta_fast,
-                                0.0f,
+                                beta_slow,
                                xpos_base,
                                xpos_down,
                                false),
@ -18248,7 +18073,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
    {
        ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
-        for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
+        for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
            struct gguf_kv * kv = &ctx->kv[i];
            //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
@ -18295,7 +18120,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                            case GGUF_TYPE_STRING:
                                {
                                    kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
-                                    for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
+                                    for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
                                        ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
                                    }
                                } break;
@ -18323,7 +18148,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
    {
        ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
-        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
+        for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
            struct gguf_tensor_info * info = &ctx->infos[i];
            for (int j = 0; j < GGML_MAX_DIMS; ++j) {
@ -18370,7 +18195,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
    // compute the total size of the data section, taking into account the alignment
    {
        ctx->size = 0;
-        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
+        for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
            struct gguf_tensor_info * info = &ctx->infos[i];
            const int64_t ne =
@ -18439,7 +18264,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
        ggml_set_no_alloc(ctx_data, true);
        // create the tensors
-        for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
+        for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
            const int64_t ne[GGML_MAX_DIMS] = {
                ctx->infos[i].ne[0],
                ctx->infos[i].ne[1],
--- a/ggml.h
+++ b/ggml.h
@ -1371,8 +1371,13 @@ extern "C" {
            int                   n_dims,
            int                   mode,
            int                   n_ctx,
            int                   n_orig_ctx,
            float                 freq_base,
            float                 freq_scale,
            float                 ext_factor,
            float                 attn_factor,
            float                 beta_fast,
            float                 beta_slow,
            float                 xpos_base,
            bool                  xpos_down);
--- a/whisper.cpp
+++ b/whisper.cpp
@ -1732,22 +1732,20 @@ static struct ggml_cgraph * whisper_build_graph_conv(
        // convolution + gelu
        {
            cur = ggml_conv_1d_ph(ctx0, model.e_conv_1_w, mel, 1, 1);
            if (n_ctx == hparams.n_audio_ctx) {
                cur = ggml_add(ctx0, cur, model.e_conv_1_b);
-            //cur = ggml_add(ctx0,
+            } else {
-            //        ggml_repeat(ctx0,
+                cur = ggml_add(ctx0, cur, ggml_cont(ctx0, ggml_view_2d(ctx0, model.e_conv_1_b, cur->ne[0], cur->ne[1], model.e_conv_1_b->nb[1], 0)));
-            //            model.e_conv_1_b,
+            }
            //            cur),
            //        cur);
            cur = ggml_gelu(ctx0, cur);
            cur = ggml_conv_1d_ph(ctx0, model.e_conv_2_w, cur, 2, 1);
            if (n_ctx == hparams.n_audio_ctx) {
                cur = ggml_add(ctx0, cur, model.e_conv_2_b);
-            //cur = ggml_add(ctx0,
+            } else {
-            //        ggml_repeat(ctx0,
+                cur = ggml_add(ctx0, cur, ggml_cont(ctx0, ggml_view_2d(ctx0, model.e_conv_2_b, cur->ne[0], cur->ne[1], model.e_conv_2_b->nb[1], 0)));
-            //            model.e_conv_2_b,
+            }
            //            cur),
            //        cur);
            cur = ggml_gelu(ctx0, cur);
        }
@ -3527,7 +3525,7 @@ int whisper_encode(struct whisper_context * ctx, int offset, int n_threads) {
 int whisper_decode_with_state(struct whisper_context * ctx, struct whisper_state * state, const whisper_token * tokens, int n_tokens, int n_past, int n_threads) {
    whisper_batch_prep_legacy(state->batch, tokens, n_tokens, n_past, 0);
-    whisper_kv_cache_seq_rm(ctx->state->kv_self, 0, n_past, -1);
+    whisper_kv_cache_seq_rm(state->kv_self, 0, n_past, -1);
    if (!whisper_decode_internal(*ctx, *state, state->batch, n_threads, nullptr, nullptr)) {
        WHISPER_LOG_ERROR("%s: failed to eval\n", __func__);
@ -3540,19 +3538,10 @@ int whisper_decode_with_state(struct whisper_context * ctx, struct whisper_state
 int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, int n_tokens, int n_past, int n_threads) {
    if (ctx->state == nullptr) {
        WHISPER_LOG_ERROR("%s: ERROR state was not loaded.\n", __func__);
-        return false;
+        return -1;
    }
-    whisper_kv_cache_seq_rm(ctx->state->kv_self, 0, n_past, -1);
+    return whisper_decode_with_state(ctx, ctx->state, tokens, n_tokens, n_past, n_threads);
    whisper_batch_prep_legacy(ctx->state->batch, tokens, n_tokens, n_past, 0);
    if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->batch, n_threads, nullptr, nullptr)) {
        WHISPER_LOG_ERROR("%s: failed to eval\n", __func__);
        return 1;
    }
    return 0;
 }
 int whisper_tokenize(struct whisper_context * ctx, const char * text, whisper_token * tokens, int n_max_tokens) {
@ -5188,7 +5177,7 @@ int whisper_full_with_state(
            const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start);
            params.progress_callback(
-                ctx, ctx->state, progress_cur, params.progress_callback_user_data);
+                ctx, state, progress_cur, params.progress_callback_user_data);
        }
        // of only 1 second left, then stop
--- a/whisper.h
+++ b/whisper.h
@ -50,7 +50,9 @@ extern "C" {
    //
    //     ...
    //
-    //     struct whisper_context * ctx = whisper_init_from_file("/path/to/ggml-base.en.bin");
+    //     whisper_context_params cparams = whisper_context_default_params();
    // 
    //     struct whisper_context * ctx = whisper_init_from_file_with_params("/path/to/ggml-base.en.bin", cparams);
    //
    //     if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
    //         fprintf(stderr, "failed to process audio\n");
Author	SHA1	Message	Date
Georgi Gerganov	15c4fdce45	chess : tuning performance	2023-11-30 10:50:47 +02:00
Fraxy V	70741ba794	wchess: c++17 -> c++11	2023-11-30 08:37:54 +02:00
Fraxy V	bb723282cc	wchess: off/on prompt	2023-11-30 01:17:29 +02:00
Fraxy V	dc5513a709	wchess: prompt	2023-11-29 19:30:57 +02:00
Fraxy V	ffc244845b	wchess : dynamic grammar	2023-11-29 18:53:28 +02:00
Fraxy V	8962a6bd67	wchess: preparing dyn grammar	2023-11-29 15:29:16 +02:00
Fraxy V	d313034b9c	wchess grammar tweaks	2023-11-29 09:25:45 +02:00
Fraxy V	8b0b0acff3	wchess : remove vad	2023-11-28 19:03:17 +02:00
Fraxy V	02ade14f67	wchess minor	2023-11-28 16:21:46 +02:00
fraxy-v	8dba8204eb	Merge pull request #1 from ggerganov/gg/wchess wchess : add clear_audio callback	2023-11-28 15:45:17 +02:00
Georgi Gerganov	4260d4fc70	wchess : minor	2023-11-28 15:10:18 +02:00
Georgi Gerganov	ee65df7982	wchess : add clear_audio callback	2023-11-28 13:37:26 +02:00
Fraxy V	03f254193b	wchess: hardcoded rules	2023-11-27 10:51:20 +02:00
Fraxy V	8f2d8eae10	wchess: basic chess rules	2023-11-27 10:41:04 +02:00
Fraxy V	a44b21bce0	wchess: tidy up entry files	2023-11-25 11:34:06 +02:00
Fraxy V	f07ff2aa6a	chess -> wchess	2023-11-25 10:16:48 +02:00
Fraxy V	280e631bcf	chess.wasm: poc of chess rules	2023-11-23 16:09:00 +02:00
Fraxy V	2f86da0d09	chess.wasm: add chessboard	2023-11-23 08:49:47 +02:00
Fraxy V	a787f7f85c	chess.wasm: encoder context value resulting in echoing	2023-11-21 20:42:20 +02:00
Fraxy V	c83a38e89d	chess.wasm: go back to greedy	2023-11-21 16:56:22 +02:00
Fraxy V	758c951729	chess.wasm: grammar in emscripten	2023-11-21 16:30:44 +02:00
Felix	eff3570f78	server : add a REST Whisper server example with OAI-like API (#1380 ) * Add first draft of server * Added json support and base funcs for server.cpp * Add more user input via api-request also some clean up * Add reqest params and load post function Also some general clean up * Remove unused function * Add readme * Add exception handlers * Update examples/server/server.cpp * make : add server target * Add magic curl syntax Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2023-11-20 21:40:24 +02:00
M. A. Ali	fa19bc4195	whisper : update example in whisper.h (#1529 ) update the example in the header, previous examples deprecated.	2023-11-20 20:52:27 +02:00
Georgi Gerganov	a01b2e0971	sdl : fix audio callback (#1523 )	2023-11-20 13:16:38 +02:00
Georgi Gerganov	8159a9ab99	whisper : reuse whisper_decode_with_state (#1521 )	2023-11-20 13:16:11 +02:00
Tamotsu Takahashi	7516d9c16d	ci : redistribute CUDA DLLs (#1522 ) see https://docs.nvidia.com/cuda/eula/index.html#attachment-a	2023-11-19 12:43:22 +02:00
sandrohanea	46cc26d1b9	whisper : fix with_state methods to use the correct state (#1519 ) Co-authored-by: Sandro Hanea <sandrohanea@microsoft.com>	2023-11-19 11:25:30 +02:00
Georgi Gerganov	f784f9fa12	whisper : fix overriding the audio context	2023-11-19 10:32:32 +02:00
Georgi Gerganov	ca23f8ee6d	cuda : assert ggml_add sources to be contiguous	2023-11-19 10:32:08 +02:00
Georgi Gerganov	e2f0eba2d4	ios : sync submodule	2023-11-17 10:42:04 +02:00
Georgi Gerganov	d4353e48f7	sync : ggml (ggml-alloc + linker + gguf fixes) (#1501 )	2023-11-17 10:00:07 +02:00
		`@ -0,0 +1,2 @@`
							`/! chessboard.js v1.0.0 \| (c) 2019 Chris Oakman \| MIT License chessboardjs.com/license /`
							.clearfix-7da63{clear:both}.board-b72b1{border:2px solid #404040;box-sizing:content-box}.square-55d63{float:left;position:relative;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.white-1e1d7{background-color:#f0d9b5;color:#b58863}.black-3c85d{background-color:#b58863;color:#f0d9b5}.highlight1-32417,.highlight2-9c5d2{box-shadow:inset 0 0 3px 3px #ff0}.notation-322f9{cursor:default;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;position:absolute}.alpha-d2270{bottom:1px;right:3px}.numeric-fc462{top:2px;left:2px}