cuda : use CUBLAS_COMPTE_F32 insted of CUBLAS_COMPUTE_F16

CI : Rectify the Clang-Related workflow issues (#1551 )
* fix bugs in workflow * fix missing clang in workflow * Update build.yml
2025-06-25 01:19:10 +00:00 · 2023-11-27 11:57:07 +02:00 · 2023-11-27 11:35:37 +02:00 · 2023-11-27 11:28:34 +02:00 · 2023-11-24 13:13:21 +02:00 · 2023-11-24 12:41:55 +02:00
47 changed files with 332 additions and 4109 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -25,6 +25,7 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
+            set -e
            apt update
            apt install -y build-essential libsdl2-dev
            make
@ -86,6 +87,7 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
+            set -e
            apt update
            apt install -y build-essential cmake libsdl2-dev
            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
@ -113,8 +115,10 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
+            set -e
            apt update
-            apt install -y build-essential cmake libsdl2-dev
+            apt install -y clang
+            apt install -y clang build-essential cmake libsdl2-dev
            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
            make
            ctest -L gh --output-on-failure'
@ -140,6 +144,7 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
+            set -e
            apt update
            apt install -y build-essential cmake
            cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
@ -217,10 +222,10 @@ jobs:
        sdl2: [ON]
        include:
          - arch: Win32
-            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x86.zip
+            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
            s2arc: x86
          - arch: x64
-            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x64.zip
+            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
            s2arc: x64
          - sdl2: ON
            s2ver: 2.26.0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.5)

-project(whisper.cpp VERSION 1.5.0)
+project(whisper.cpp VERSION 1.5.1)

 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

-Stable: [v1.5.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.5.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

@ -777,6 +777,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
 | [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
 | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
+| [server](examples/server) | | HTTP transcription server with OAI-like API |

 ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)

--- a/bindings/go/Makefile
+++ b/bindings/go/Makefile
@ -1,9 +1,26 @@
+ifndef UNAME_S
+UNAME_S := $(shell uname -s)
+endif
+
+ifndef UNAME_P
+UNAME_P := $(shell uname -p)
+endif
+
+ifndef UNAME_M
+UNAME_M := $(shell uname -m)
+endif
+
+GGML_METAL_PATH_RESOURCES := $(abspath ../..)
 BUILD_DIR := build
 MODELS_DIR := models
 EXAMPLES_DIR := $(wildcard examples/*)
 INCLUDE_PATH := $(abspath ../..)
 LIBRARY_PATH := $(abspath ../..)

+ifeq ($(UNAME_S),Darwin)
+	EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
+endif
+
 all: clean whisper examples

 whisper: mkdir
@ -11,8 +28,13 @@ whisper: mkdir
 	@${MAKE} -C ../.. libwhisper.a

 test: model-small whisper modtidy
+ifeq ($(UNAME_S),Darwin)
+	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
+	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
+else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
+endif

 examples: $(EXAMPLES_DIR)

@ -21,7 +43,11 @@ model-small: mkdir examples/go-model-download

 $(EXAMPLES_DIR): mkdir whisper modtidy
 	@echo Build example $(notdir $@)
+ifeq ($(UNAME_S),Darwin)
+	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
+else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
+endif

 mkdir:
 	@echo Mkdir ${BUILD_DIR}
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.5.0",
+  "version": "1.5.1",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -73,5 +73,3 @@ else()
    add_subdirectory(talk-llama)
    add_subdirectory(lsp)
 endif()
-
-add_subdirectory(wchess)
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -2,6 +2,10 @@

 Simple http server. WAV Files are passed to the inference model via http requests.

+https://github.com/ggerganov/whisper.cpp/assets/1991296/e983ee53-8741-4eb5-9048-afe5e4594b8f
+
+## Usage
+
 ```
 ./server -h

@ -29,6 +33,7 @@ options:
  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
+  -pr,       --print-realtime    [false  ] print output in realtime
  -pp,       --print-progress    [false  ] print progress
  -nt,       --no-timestamps     [false  ] do not print timestamps
  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
@ -38,8 +43,12 @@ options:
  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
  --host HOST,                   [127.0.0.1] Hostname/ip-adress for the server
  --port PORT,                   [8080   ] Port number for the server
+  --convert,                     [false  ] Convert audio to WAV, requires ffmpeg on the server
 ```

+> [!WARNING]  
+> **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
+
 ## request examples

 **/inference**
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -43,6 +43,8 @@ struct server_params
    int32_t port          = 8080;
    int32_t read_timeout  = 600;
    int32_t write_timeout = 600;
+    
+    bool ffmpeg_converter = false;
 };

 struct whisper_params {
@ -72,6 +74,7 @@ struct whisper_params {
    bool no_fallback     = false;
    bool print_special   = false;
    bool print_colors    = false;
+    bool print_realtime  = false;
    bool print_progress  = false;
    bool no_timestamps   = false;
    bool use_gpu         = true;
@ -144,6 +147,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -nf,       --no-fallback       [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
    fprintf(stderr, "  -ps,       --print-special     [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
    fprintf(stderr, "  -pc,       --print-colors      [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
+    fprintf(stderr, "  -pr,       --print-realtime    [%-7s] print output in realtime\n",                       params.print_realtime ? "true" : "false");
    fprintf(stderr, "  -pp,       --print-progress    [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
    fprintf(stderr, "  -nt,       --no-timestamps     [%-7s] do not print timestamps\n",                        params.no_timestamps ? "true" : "false");
    fprintf(stderr, "  -l LANG,   --language LANG     [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
@ -155,6 +159,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  --host HOST,                   [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
    fprintf(stderr, "  --port PORT,                   [%-7d] Port number for the server\n", sparams.port);
    fprintf(stderr, "  --public PATH,                 [%-7s] Path to the public folder\n", sparams.public_path.c_str());
+    fprintf(stderr, "  --convert,                     [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
    fprintf(stderr, "\n");
 }

@ -188,6 +193,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
        else if (arg == "-fp"   || arg == "--font-path")       { params.font_path       = argv[++i]; }
        else if (arg == "-ps"   || arg == "--print-special")   { params.print_special   = true; }
        else if (arg == "-pc"   || arg == "--print-colors")    { params.print_colors    = true; }
+        else if (arg == "-pr"   || arg == "--print-realtime")  { params.print_realtime  = true; }
        else if (arg == "-pp"   || arg == "--print-progress")  { params.print_progress  = true; }
        else if (arg == "-nt"   || arg == "--no-timestamps")   { params.no_timestamps   = true; }
        else if (arg == "-l"    || arg == "--language")        { params.language        = argv[++i]; }
@ -200,6 +206,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
        else if (                  arg == "--port")            { sparams.port        = std::stoi(argv[++i]); }
        else if (                  arg == "--host")            { sparams.hostname    = argv[++i]; }
        else if (                  arg == "--public")          { sparams.public_path = argv[++i]; }
+        else if (                  arg == "--convert")         { sparams.ffmpeg_converter     = true; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params, sparams);
@ -217,6 +224,45 @@ struct whisper_print_user_data {
    int progress_prev;
 };

+void check_ffmpeg_availibility() {
+    int result = system("ffmpeg -version");
+
+    if (result == 0) {
+        std::cout << "ffmpeg is available." << std::endl;
+    } else {
+        // ffmpeg is not available
+        std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
+        std::cout << "and that its executable is included in your system's PATH. ";
+        exit(0);
+    }
+}
+
+bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
+    std::ostringstream cmd_stream;
+    std::string converted_filename_temp = temp_filename + "_temp.wav";
+    cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
+    std::string cmd = cmd_stream.str();
+
+    int status = std::system(cmd.c_str());
+    if (status != 0) {
+        error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
+        return false;
+    }
+
+    // Remove the original file
+    if (remove(temp_filename.c_str()) != 0) {
+        error_resp = "{\"error\":\"Failed to remove the original file.\"}";
+        return false;
+    }
+
+    // Rename the temporary file to match the original filename
+    if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
+        error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
+        return false;
+    }
+    return true;
+}
+
 std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
    std::string speaker = "";
    const int64_t n_samples = pcmf32s[0].size();
@ -373,7 +419,7 @@ void get_req_parameters(const Request & req, whisper_params & params)
    {
        params.response_format = req.get_file_value("response-format").content;
    }
-    if (req.has_file("temerature"))
+    if (req.has_file("temperature"))
    {
        params.userdef_temp = std::stof(req.get_file_value("temperature").content);
    }
@ -404,6 +450,9 @@ int main(int argc, char ** argv) {
        exit(0);
    }

+    if (sparams.ffmpeg_converter) {
+        check_ffmpeg_availibility();
+    }
    // whisper init
    struct whisper_context_params cparams;
    cparams.use_gpu = params.use_gpu;
@ -429,7 +478,7 @@ int main(int argc, char ** argv) {
    });

    svr.Post("/inference", [&](const Request &req, Response &res){
-        // aquire whisper model mutex lock
+        // acquire whisper model mutex lock
        whisper_mutex.lock();

        // first check user requested fields of the request
@ -453,20 +502,35 @@ int main(int argc, char ** argv) {
        std::vector<float> pcmf32;               // mono-channel F32 PCM
        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM

-        // write file to temporary file
-        std::ofstream temp_file{filename, std::ios::binary};
+        // write to temporary file
+        const std::string temp_filename = "whisper_server_temp_file.wav";
+        std::ofstream temp_file{temp_filename, std::ios::binary};
        temp_file << audio_file.content;
+        temp_file.close();
+
+        // if file is not wav, convert to wav
+        
+        if (sparams.ffmpeg_converter) {
+            std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
+            const bool is_converted = convert_to_wav(temp_filename, error_resp);
+            if (!is_converted) {
+                res.set_content(error_resp, "application/json");
+                whisper_mutex.unlock();
+                return;
+            }
+        }

        // read wav content into pcmf32
-        if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) {
-            fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
+        if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
+            fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
            const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
            res.set_content(error_resp, "application/json");
+            std::remove(temp_filename.c_str());
            whisper_mutex.unlock();
            return;
        }
        // remove temp file
-        std::remove(filename.c_str());
+        std::remove(temp_filename.c_str());

        printf("Successfully loaded %s\n", filename.c_str());

@ -503,7 +567,6 @@ int main(int argc, char ** argv) {

        // run the inference
        {
-
            printf("Running whisper.cpp inference on %s\n", filename.c_str());
            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);

@ -541,7 +604,7 @@ int main(int argc, char ** argv) {
            whisper_print_user_data user_data = { &params, &pcmf32s, 0 };

            // this callback is called on each new segment
-            if (!wparams.print_realtime) {
+            if (params.print_realtime) {
                wparams.new_segment_callback           = whisper_print_segment_callback;
                wparams.new_segment_callback_user_data = &user_data;
            }
--- a/examples/wchess/CMakeLists.txt
+++ b/examples/wchess/CMakeLists.txt
@ -1,9 +0,0 @@
-set(CMAKE_CXX_STANDARD 11)
-
-add_subdirectory(libwchess)
-
-if (EMSCRIPTEN)
-    add_subdirectory(wchess.wasm)
-else()
-    add_subdirectory(wchess.cmd)
-endif()
--- a/examples/wchess/libwchess/CMakeLists.txt
+++ b/examples/wchess/libwchess/CMakeLists.txt
@ -1,19 +0,0 @@
-add_library(libwchess
-    WChess.cpp
-    WChess.h
-    Chessboard.cpp
-    Chessboard.h
-)
-
-target_link_libraries(libwchess
-    PUBLIC
-    whisper
-    common
-)
-
-target_include_directories(libwchess
-    PUBLIC
-    "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
-)
-
-add_executable(test-chessboard test-chessboard.cpp Chessboard.cpp)
--- a/examples/wchess/libwchess/Chessboard.cpp
+++ b/examples/wchess/libwchess/Chessboard.cpp
@ -1,714 +0,0 @@
-#include "Chessboard.h"
-#include <vector>
-#include <algorithm>
-#include <cstring>
-#include <set>
-
-namespace {
-// remove std::string_view, c++17 -> c++11
-constexpr std::array<const char*, 64> positions = {
-    "a1", "b1", "c1", "d1", "e1", "f1", "g1", "h1",
-    "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2",
-    "a3", "b3", "c3", "d3", "e3", "f3", "g3", "h3",
-    "a4", "b4", "c4", "d4", "e4", "f4", "g4", "h4",
-    "a5", "b5", "c5", "d5", "e5", "f5", "g5", "h5",
-    "a6", "b6", "c6", "d6", "e6", "f6", "g6", "h6",
-    "a7", "b7", "c7", "d7", "e7", "f7", "g7", "h7",
-    "a8", "b8", "c8", "d8", "e8", "f8", "g8", "h8",
-};
-constexpr int INVALID_POS = positions.size();
-constexpr int R = 0; // rank index
-constexpr int F = 1; // file index
-#define POS ((c[F] - '1') * 8 + (c[R] - 'a'))
-constexpr int operator ""_P(const char * c, size_t size) {
-    return size < 2 || POS < 0 || POS > INVALID_POS ? INVALID_POS : POS;
-}
-#undef POS
-
-struct sview {
-    const char * ptr = nullptr;
-    size_t size = 0;
-
-    sview() = default;
-    sview(const char * p, size_t s) : ptr(p), size(s) {}
-    sview(const std::string& s) : ptr(s.data()), size(s.size()) {}
-
-    size_t find(char del, size_t pos) {
-        while (pos < size && ptr[pos] != del) ++pos;
-        return pos < size ? pos : std::string::npos;
-    }
-};
-
-std::vector<sview> split(sview str, char del) {
-    std::vector<sview> res;
-    size_t cur = 0;
-    size_t last = 0;
-    while (cur != std::string::npos) {
-        if (str.ptr[last] == ' ') {
-            ++last;
-            continue;
-        }
-        cur = str.find(del, last);
-        size_t len = cur == std::string::npos ? str.size - last : cur - last;
-        res.emplace_back(str.ptr + last, len);
-        last = cur + 1;
-    }
-    return res;
-}
-
-size_t strToPos(sview str) {
-    return operator ""_P(str.ptr, str.size);
-}
-
-constexpr std::array<const char*, 6> pieceNames =  {
-    "pawn", "knight", "bishop", "rook", "queen", "king",
-};
-
-int strToType(sview str) {
-    auto it = std::find_if(pieceNames.begin(), pieceNames.end(), [str] (const char* name) { return strncmp(name, str.ptr, str.size) == 0; });
-    return it != pieceNames.end() ? int(it - pieceNames.begin()) : pieceNames.size();
-}
-}
-
-Chessboard::Chessboard()
-    : blackPieces {{
-        {Piece::Pawn, Piece::Black, "a7"_P },
-        {Piece::Pawn, Piece::Black, "b7"_P },
-        {Piece::Pawn, Piece::Black, "c7"_P },
-        {Piece::Pawn, Piece::Black, "d7"_P },
-        {Piece::Pawn, Piece::Black, "e7"_P },
-        {Piece::Pawn, Piece::Black, "f7"_P },
-        {Piece::Pawn, Piece::Black, "g7"_P },
-        {Piece::Pawn, Piece::Black, "h7"_P },
-        {Piece::Rook, Piece::Black, "a8"_P },
-        {Piece::Knight, Piece::Black, "b8"_P },
-        {Piece::Bishop, Piece::Black, "c8"_P },
-        {Piece::Queen, Piece::Black, "d8"_P },
-        {Piece::King, Piece::Black, "e8"_P },
-        {Piece::Bishop, Piece::Black, "f8"_P },
-        {Piece::Knight, Piece::Black, "g8"_P },
-        {Piece::Rook, Piece::Black, "h8"_P },
-    }}
-    , whitePieces {{
-        {Piece::Pawn, Piece::White, "a2"_P },
-        {Piece::Pawn, Piece::White, "b2"_P },
-        {Piece::Pawn, Piece::White, "c2"_P },
-        {Piece::Pawn, Piece::White, "d2"_P },
-        {Piece::Pawn, Piece::White, "e2"_P },
-        {Piece::Pawn, Piece::White, "f2"_P },
-        {Piece::Pawn, Piece::White, "g2"_P },
-        {Piece::Pawn, Piece::White, "h2"_P },
-        {Piece::Rook, Piece::White, "a1"_P },
-        {Piece::Knight, Piece::White, "b1"_P },
-        {Piece::Bishop, Piece::White, "c1"_P },
-        {Piece::Queen, Piece::White, "d1"_P },
-        {Piece::King, Piece::White, "e1"_P },
-        {Piece::Bishop, Piece::White, "f1"_P },
-        {Piece::Knight, Piece::White, "g1"_P },
-        {Piece::Rook, Piece::White, "h1"_P },
-    }}
-    , board {{
-        &whitePieces[ 8], &whitePieces[ 9], &whitePieces[10], &whitePieces[11], &whitePieces[12], &whitePieces[13], &whitePieces[14], &whitePieces[15],
-        &whitePieces[ 0], &whitePieces[ 1], &whitePieces[ 2], &whitePieces[ 3], &whitePieces[ 4], &whitePieces[ 5], &whitePieces[ 6], &whitePieces[ 7],
-        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
-        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
-        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
-        nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,          nullptr,
-        &blackPieces[ 0], &blackPieces[ 1], &blackPieces[ 2], &blackPieces[ 3], &blackPieces[ 4], &blackPieces[ 5], &blackPieces[ 6], &blackPieces[ 7],
-        &blackPieces[ 8], &blackPieces[ 9], &blackPieces[10], &blackPieces[11], &blackPieces[12], &blackPieces[13], &blackPieces[14], &blackPieces[15],
-    }}
-    , whiteMoves {
-        {"b1"_P, "a3"_P}, {"b1"_P, "c3"_P},
-        {"g1"_P, "f3"_P}, {"g1"_P, "h3"_P},
-        {"a2"_P, "a3"_P}, {"a2"_P, "a4"_P},
-        {"b2"_P, "b3"_P}, {"b2"_P, "b4"_P},
-        {"c2"_P, "c3"_P}, {"c2"_P, "c4"_P},
-        {"d2"_P, "d3"_P}, {"d2"_P, "d4"_P},
-        {"e2"_P, "e3"_P}, {"e2"_P, "e4"_P},
-        {"f2"_P, "f3"_P}, {"f2"_P, "f4"_P},
-        {"g2"_P, "g3"_P}, {"g2"_P, "g4"_P},
-        {"h2"_P, "h3"_P}, {"h2"_P, "h4"_P},
-    }
-    , blackMoves {
-        {"a7"_P, "a5"_P}, {"a7"_P, "a6"_P},
-        {"b7"_P, "b5"_P}, {"b7"_P, "b6"_P},
-        {"c7"_P, "c5"_P}, {"c7"_P, "c6"_P},
-        {"d7"_P, "d5"_P}, {"d7"_P, "d6"_P},
-        {"e7"_P, "e5"_P}, {"e7"_P, "e6"_P},
-        {"f7"_P, "f5"_P}, {"f7"_P, "f6"_P},
-        {"g7"_P, "g5"_P}, {"g7"_P, "g6"_P},
-        {"h7"_P, "h5"_P}, {"h7"_P, "h6"_P},
-        {"b8"_P, "a6"_P}, {"b8"_P, "c6"_P},
-        {"g8"_P, "f6"_P}, {"g8"_P, "h6"_P},
-    }
-
-{
-    static_assert(pieceNames.size() == Chessboard::Piece::Taken, "Mismatch between piece names and types");
-    std::sort(whiteMoves.begin(), whiteMoves.end());
-    std::sort(blackMoves.begin(), blackMoves.end());
-}
-
-std::string Chessboard::getRules(const std::string& prompt) const {
-    // leading space is very important!
-    std::string result =
-    "\n"
-    "# leading space is very important!\n"
-    "\n";
-    if (prompt.empty()) {
-        result += "move ::= \" \" ((piece | frompos) \" \" \"to \"?)? topos\n";
-        //result += "move ::= \" \" frompos \" \" \"to \"? topos\n";
-    }
-    else {
-        // result += "move ::= prompt \" \" ((piece | frompos) \" \" \"to \"?)? topos\n"
-        result += "move ::= prompt \" \" frompos \" \" \"to \"? topos\n"
-        "\n"
-        "prompt ::= \" " + prompt + "\"\n";
-    }
-
-    std::set<std::string> pieces;
-    std::set<std::string> from_pos;
-    std::set<std::string> to_pos;
-    auto& allowed_moves =  m_moveCounter % 2 ? blackMoves : whiteMoves;
-    for (auto& m : allowed_moves) {
-        if (board[m.first]->type != Piece::Taken) pieces.insert(pieceNames[board[m.first]->type]);
-        from_pos.insert(positions[m.first]);
-        to_pos.insert(positions[m.second]);
-    }
-    if (!pieces.empty()) {
-        result += "piece ::= (";
-        for (auto& p : pieces) result += " \"" + p + "\" |";
-        result.pop_back();
-        result += ")\n\n";
-    }
-    if (!from_pos.empty()) {
-        result += "frompos ::= (";
-        for (auto& p : from_pos) result += " \"" + p + "\" |";
-        result.pop_back();
-        result += ")\n";
-    }
-    if (!to_pos.empty()) {
-        result += "topos ::= (";
-        for (auto& p : to_pos) result += " \"" + p + "\" |";
-        result.pop_back();
-        result += ")\n";
-    }
-
-    return result;
-}
-
-std::string Chessboard::stringifyBoard() {
-    static constexpr std::array<char, 6> blackShort =  {
-        'p', 'n', 'b', 'r', 'q', 'k',
-    };
-    static constexpr std::array<char, 6> whiteShort =  {
-        'P', 'N', 'B', 'R', 'Q', 'K',
-    };
-
-    std::string result;
-    result.reserve(16 + 2 * 64 + 16);
-    for (char rank = 'a'; rank <= 'h'; ++rank) {
-        result.push_back(rank);
-        result.push_back(' ');
-    }
-    result.back() = '\n';
-    for (int i = 7; i >= 0; --i) {
-        for (int j = 0; j < 8; ++j) {
-            auto p = board[i * 8 + j];
-            if (p) result.push_back(p->color == Piece::White ? whiteShort[p->type] : blackShort[p->type]);
-            else result.push_back((i + j) % 2 ? '.' : '*');
-            result.push_back(' ');
-        }
-        result.push_back('0' + i + 1);
-        result.push_back('\n');
-    }
-    return result;
-}
-
-std::string Chessboard::process(const std::string& command) {
-    auto color = Piece::Colors(m_moveCounter % 2);
-    fprintf(stdout, "%s: Command to %s: '%s%.*s%s'\n", __func__, (color ? "Black" : "White"), "\033[1m", int(command.size()), command.data(), "\033[0m");
-    if (command.empty()) return "";
-    auto tokens = split(command, ' ');
-    for (auto& t : tokens) fprintf(stdout, "%s: Token %.*s\n", __func__, int(t.size), t.ptr);
-    auto pos_from = INVALID_POS;
-    auto type = Piece::Types::Taken;
-    auto pos_to = INVALID_POS;
-    if (tokens.size() == 1) {
-        type = Piece::Types::Pawn;
-        pos_to = strToPos(tokens.front());
-    }
-    else {
-        pos_from = strToPos(tokens.front());
-        if (pos_from == INVALID_POS) type = Piece::Types(strToType(tokens.front()));
-        pos_to = strToPos(tokens.back());
-    }
-    if (pos_to == INVALID_POS) return "";
-    if (pos_from == INVALID_POS) {
-        if (type == Piece::Types::Taken) return "";
-        auto& pieces = color ? blackPieces : whitePieces;
-        auto pieceIndex = 0u;
-        for (; pieceIndex < pieces.size(); ++pieceIndex) {
-            if (pieces[pieceIndex].type == type && validateMove(pieces[pieceIndex], pos_to)) break;
-        }
-        if (pieceIndex == pieces.size()) return "";
-        pos_from = pieces[pieceIndex].pos;
-    }
-    if (board[pos_from] == nullptr) return "";
-    if (board[pos_from]->color != color) return "";
-
-    Move m = {pos_from, pos_to};
-    auto& allowed_moves = color ? blackMoves : whiteMoves;
-    fprintf(stdout, "%s:allowed size %d :\n", __func__, int(allowed_moves.size()));
-    for (auto& m : allowed_moves) fprintf(stdout, " %s %s; ", positions[m.first], positions[m.second]);
-    fprintf(stdout, "\n");
-    if (!std::binary_search(allowed_moves.begin(), allowed_moves.end(), m)) return "";
-
-    move(m);
-
-    {
-        auto it = std::remove_if(allowed_moves.begin(), allowed_moves.end(), [m] (const Move& move) { return move.first == m.first; });
-        allowed_moves.erase(it, allowed_moves.end());
-    }
-
-    std::vector<Piece*> affected = { board[m.second] };
-    for (auto& p : whitePieces) {
-        if (&p == board[m.second]
-            || validateMove(p, m.first)
-            || validateMove(p, m.second)
-            || std::binary_search(whiteMoves.begin(), whiteMoves.end(), Move(p.pos, m.second))
-        ) {
-            auto it = std::remove_if(whiteMoves.begin(), whiteMoves.end(), [&p] (const Move& m) { return m.first == p.pos; });
-            whiteMoves.erase(it, whiteMoves.end());
-            affected.push_back(&p);
-        }
-    }
-
-    for (auto& p : blackPieces) {
-        if (&p == board[m.second]
-            || validateMove(p, m.first)
-            || validateMove(p, m.second)
-            || std::binary_search(blackMoves.begin(), blackMoves.end(), Move(p.pos, m.second))
-        ) {
-            auto it = std::remove_if(blackMoves.begin(), blackMoves.end(), [&p] (const Move& m) { return m.first == p.pos; });
-            blackMoves.erase(it, blackMoves.end());
-            affected.push_back(&p);
-        }
-    }
-    for (auto& p : affected) getValidMoves(*p, p->color ? blackMoves : whiteMoves);
-
-    std::sort(blackMoves.begin(), blackMoves.end());
-    std::sort(whiteMoves.begin(), whiteMoves.end());
-
-    std::string result = positions[m.first];
-    result += "-";
-    result += positions[m.second];
-    ++m_moveCounter;
-    fprintf(stdout, "%s: Move '%s%s%s'\n", __func__, "\033[1m", result.data(), "\033[0m");
-    return result;
-}
-
-void Chessboard::getValidMoves(const Piece& piece, std::vector<Move>& result) {
-    std::string cur = positions[piece.pos];
-    switch (piece.type) {
-        case Piece::Pawn: {
-            std::string next = cur;
-            piece.color ? --next[F] : ++next[F]; // one down / up
-            std::string left = { char(next[R] - 1), next[F]};
-            auto pos = strToPos(left);
-            if (pos != INVALID_POS && board[pos] && board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-            std::string right = { char(next[R] + 1), next[F]};
-            pos = strToPos(right);
-            if (pos != INVALID_POS && board[pos] && board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !board[pos]) result.emplace_back(piece.pos, pos);
-            else break;
-            if (piece.color ? cur[F] != '7' : cur[F] != '2') break;
-            piece.color ? --next[F] : ++next[F]; // one down / up
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !board[pos]) result.emplace_back(piece.pos, pos);
-            break;
-        }
-        case Piece::Knight: {
-            std::string next = cur;
-            --next[F]; --next[F]; --next[R];
-            auto pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            --next[F]; --next[F]; ++next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[F]; ++next[F]; --next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[F]; ++next[F]; ++next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            --next[F]; --next[R]; --next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[F]; --next[R]; --next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            --next[F]; ++next[R]; ++next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[F]; ++next[R]; ++next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-            break;
-        }
-        case Piece::Bishop: {
-            std::string next = cur;
-            while (true) {
-                --next[R]; --next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                --next[R]; ++next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[R]; --next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[R]; ++next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            break;
-        }
-        case Piece::Rook: {
-            std::string next = cur;
-            while (true) {
-                --next[R];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[R];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                --next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            break;
-        }
-        case Piece::Queen: {
-            std::string next = cur;
-            while (true) {
-                --next[R]; --next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                --next[R]; ++next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[R]; --next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[R]; ++next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                --next[R];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[R];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                --next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            next = cur;
-            while (true) {
-                ++next[F];
-                auto pos = strToPos(next);
-                if (pos == INVALID_POS) break;
-                else if (board[pos]) {
-                    if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
-                    break;
-                }
-                result.emplace_back(piece.pos, pos);
-            }
-            break;
-        }
-        case Piece::King: {
-            std::string next = cur;
-            --next[R]; --next[F];
-            auto pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            --next[R]; ++next[F];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[R]; --next[F];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[R]; ++next[F];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            --next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[R];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            --next[F];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            next = cur;
-            ++next[F];
-            pos = strToPos(next);
-            if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
-
-            break;
-        }
-        case Piece::Taken: break;
-        default: break;
-    }
-}
-
-bool Chessboard::validatePawnMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
-    int direction = color == Piece::White ? 1 : -1;
-    bool two_ranks = color == Piece::White ? from_rank == 1 : from_rank == 6;
-    if (from_file == to_file) {
-        if (from_rank == to_rank - direction) return board[to_rank * 8 + to_file] == nullptr;
-        if (two_ranks && from_rank == to_rank - direction * 2) return board[(to_rank - direction) * 8 + to_file] == nullptr && board[to_rank * 8 + to_file] == nullptr;
-    }
-    else if (from_file + 1 == to_file || from_file - 1 == to_file) {
-        if (from_rank == to_rank - direction) return board[to_rank * 8 + to_file] != nullptr && board[to_rank * 8 + to_file]->color != color;
-    }
-    return false;
-}
-
-bool Chessboard::validateKnightMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
-    int dr = std::abs(from_rank - to_rank);
-    int df = std::abs(from_file - to_file);
-    if ((dr == 2 && df == 1) || (dr == 1 && df == 2)) return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
-    return false;
-}
-
-bool Chessboard::validateBishopMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
-    if (from_rank - from_file == to_rank - to_file) {
-        int direction = from_rank < to_rank ? 1 : -1;
-        from_rank += direction;
-        from_file += direction;
-        while (from_rank != to_rank) {
-            if (board[from_rank * 8 + from_file]) return false;
-            from_rank += direction;
-            from_file += direction;
-        }
-        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
-    }
-    if (from_rank + from_file == to_rank + to_file) {
-        int direction = from_rank < to_rank ? 1 : -1;
-        from_rank += direction;
-        from_file -= direction;
-        while (from_rank != to_rank) {
-            if (board[from_rank * 8 + from_file]) return false;
-            from_rank += direction;
-            from_file -= direction;
-        }
-        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
-    }
-    return false;
-}
-
-bool Chessboard::validateRookMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
-    if (from_rank == to_rank) {
-        int direction = from_file < to_file ? 1 : -1;
-        from_file += direction;
-        while (from_file != to_file) {
-            if (board[from_rank * 8 + from_file]) return false;
-            from_file += direction;
-        }
-        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
-    }
-    if (from_file == to_file) {
-        int direction = from_rank < to_rank ? 1 : -1;
-        from_rank += direction;
-        while (from_rank != to_rank) {
-            if (board[from_rank * 8 + from_file]) return false;
-            from_rank += direction;
-        }
-        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
-    }
-    return false;
-}
-
-bool Chessboard::validateQueenMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
-    if (validateBishopMove(color, from_rank, from_file, to_rank, to_file)) return true;
-    return validateRookMove(color, from_rank, from_file, to_rank, to_file);
-}
-
-bool Chessboard::validateKingMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
-    if (std::abs(from_rank - to_rank) < 2 && std::abs(from_file - to_file) < 2) {
-        return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
-    }
-    return false;
-}
-
-bool Chessboard::validateMove(const Piece& piece, int pos) {
-    if (piece.type == Piece::Taken) return false;
-    if (piece.pos == pos) return false;
-    int i = piece.pos / 8;
-    int j = piece.pos - i * 8;
-
-    int ii = pos / 8;
-    int jj = pos - ii * 8;
-
-    switch (piece.type) {
-        case Piece::Pawn: return validatePawnMove(piece.color, i, j, ii, jj);
-        case Piece::Knight: return validateKnightMove(piece.color, i, j, ii, jj);
-        case Piece::Bishop: return validateBishopMove(piece.color, i, j, ii, jj);
-        case Piece::Rook: return validateRookMove(piece.color, i, j, ii, jj);
-        case Piece::Queen: return validateQueenMove(piece.color, i, j, ii, jj);
-        case Piece::King: return validateKingMove(piece.color, i, j, ii, jj);
-        default: break;
-    }
-    return false;
-}
-
-bool Chessboard::move(const Move& m) {
-    if (!board[m.first] || (board[m.second] && board[m.first]->color == board[m.second]->color)) return false;
-    if (board[m.second]) board[m.second]->type = Piece::Taken;
-    board[m.second] = board[m.first];
-    board[m.first] = nullptr;
-    board[m.second]->pos = m.second;
-    return true;
-}
--- a/examples/wchess/libwchess/Chessboard.h
+++ b/examples/wchess/libwchess/Chessboard.h
@ -1,59 +0,0 @@
-#pragma once
-#include <string>
-#include <array>
-#include <vector>
-
-class Chessboard {
-public:
-    Chessboard();
-    std::string process(const std::string& t);
-    std::string stringifyBoard();
-    std::string getRules(const std::string & prompt) const;
-    using Move = std::pair<int, int>;
-private:
-    bool move(const Move& move);
-
-    struct Piece {
-        enum Types {
-            Pawn,
-            Knight,
-            Bishop,
-            Rook,
-            Queen,
-            King,
-            Taken,
-        };
-
-        enum Colors {
-            White,
-            Black,
-        };
-
-        Types type;
-        Colors color;
-        int pos;
-    };
-
-    using PieceSet = std::array<Piece, 16>;
-
-    PieceSet blackPieces;
-    PieceSet whitePieces;
-    int m_moveCounter = 0;
-
-    using Board = std::array<Piece*, 64>;
-    Board board;
-
-    std::vector<Move> whiteMoves;
-    std::vector<Move> blackMoves;
-
-    bool validateMove(const Piece& piece, int pos);
-    void getValidMoves(const Piece& piece, std::vector<Move>& moves);
-    // just basic validation
-    // fixme: missing en passant, castling, promotion, etc.
-    bool validatePawnMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
-    bool validateKnightMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
-    bool validateBishopMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
-    bool validateRookMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
-    bool validateQueenMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
-    bool validateKingMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
-};
--- a/examples/wchess/libwchess/WChess.cpp
+++ b/examples/wchess/libwchess/WChess.cpp
@ -1,220 +0,0 @@
-#include "WChess.h"
-#include "Chessboard.h"
-#include "grammar-parser.h"
-#include "common.h"
-#include <thread>
-
-WChess::WChess(whisper_context * ctx,
-        const whisper_full_params & wparams,
-        callbacks cb,
-        settings s)
-        : m_ctx(ctx)
-        , m_wparams(wparams)
-        , m_cb(cb)
-        , m_settings(s)
-        , m_board(new Chessboard())
-{}
-
-WChess::~WChess() = default;
-
-void WChess::set_status(const std::string& msg) const {
-    if (m_cb.set_status) (*m_cb.set_status)(msg);
-}
-
-void WChess::set_moves(const std::string& moves) const {
-    if (m_cb.set_moves) (*m_cb.set_moves)(moves);
-}
-
-bool WChess::check_running() const {
-    if (m_cb.check_running) return (*m_cb.check_running)();
-    return false;
-}
-
-void WChess::clear_audio() const {
-    if (m_cb.clear_audio) (*m_cb.clear_audio)();
-}
-
-void WChess::get_audio(int ms, std::vector<float>& pcmf32) const {
-    if (m_cb.get_audio) (*m_cb.get_audio)(ms, pcmf32);
-}
-
-std::string WChess::stringify_board() const {
-    return m_board->stringifyBoard();
-}
-
-void WChess::run() {
-    set_status("loading data ...");
-
-    bool have_prompt  = true;
-    bool ask_prompt   = !have_prompt;
-
-    float logprob_min0 = 0.0f;
-    float logprob_min  = 0.0f;
-
-    float logprob_sum0 = 0.0f;
-    float logprob_sum  = 0.0f;
-
-    int n_tokens0 = 0;
-    int n_tokens  = 0;
-
-    std::vector<float> pcmf32_cur;
-    std::vector<float> pcmf32_prompt;
-
-    const std::string k_prompt = have_prompt ? "" : "checkmate";
-
-    while (check_running()) {
-        // delay
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-        if (ask_prompt) {
-            fprintf(stdout, "\n");
-            fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
-            fprintf(stdout, "\n");
-
-            {
-                char txt[1024];
-                snprintf(txt, sizeof(txt), "Say the following phrase: '%s'", k_prompt.c_str());
-                set_status(txt);
-            }
-
-            ask_prompt = false;
-        }
-
-        int64_t t_ms = 0;
-
-        {
-            get_audio(m_settings.vad_ms, pcmf32_cur);
-
-            if (!pcmf32_cur.empty()) {
-                fprintf(stdout, "%s: Processing ...\n", __func__);
-                set_status("Processing ...");
-
-                if (!have_prompt) {
-                    const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
-
-                    fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
-
-                    const float sim = similarity(txt, k_prompt);
-
-                    if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
-                        fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
-                        ask_prompt = true;
-                    } else {
-                        fprintf(stdout, "\n");
-                        fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
-                        fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
-                        fprintf(stdout, "\n");
-
-                        {
-                            char txt[1024];
-                            snprintf(txt, sizeof(txt), "Success! Waiting for voice commands ...");
-                            set_status(txt);
-                        }
-
-                        // save the audio for the prompt
-                        pcmf32_prompt = pcmf32_cur;
-                        have_prompt = true;
-                    }
-                } else {
-                    if (!pcmf32_prompt.empty()) pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
-                    static const size_t MIN_SIZE = 1.2 * WHISPER_SAMPLE_RATE;
-                    if (MIN_SIZE > pcmf32_cur.size()) pcmf32_cur.insert(pcmf32_cur.begin(), MIN_SIZE - pcmf32_cur.size(), 0.0f);
-
-                    std::string rules = m_board->getRules(k_prompt);
-                    fprintf(stdout, "%s: grammar rules:\n'%s'\n", __func__, rules.c_str());
-
-                    auto grammar_parsed = grammar_parser::parse(rules.c_str());
-                    auto grammar_rules = grammar_parsed.c_rules();
-
-                    m_wparams.grammar_rules   = grammar_rules.data();
-                    m_wparams.n_grammar_rules = grammar_rules.size();
-
-                    m_wparams.i_start_rule    = grammar_parsed.symbol_ids.at("move");
-                    auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
-
-                    const float p = 100.0f * std::exp(logprob_min);
-
-                    fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
-
-                    // find the prompt in the text
-                    float best_sim = 0.0f;
-                    size_t best_len = 0;
-                    for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
-                        const auto prompt = txt.substr(0, n);
-
-                        const float sim = similarity(prompt, k_prompt);
-
-                        //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
-
-                        if (sim > best_sim) {
-                            best_sim = sim;
-                            best_len = n;
-                        }
-                    }
-
-                    fprintf(stdout, "%s:   DEBUG: txt = '%s', prob = %.2f%%\n", __func__, txt.c_str(), p);
-                    std::string command = ::trim(txt.substr(best_len));
-
-                    fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
-                    fprintf(stdout, "\n");
-
-                    {
-                        char txt[1024];
-                        snprintf(txt, sizeof(txt), "Command '%s', (t = %d ms)", command.c_str(), (int) t_ms);
-                        set_status(txt);
-                    }
-                    if (!command.empty()) {
-                        auto move = m_board->process(command);
-                        if (!move.empty()) {
-                            set_moves(std::move(move));
-                        }
-                    }
-                }
-
-                clear_audio();
-            }
-        }
-    }
-}
-
-std::string WChess::transcribe(
-                const std::vector<float> & pcmf32,
-                float & logprob_min,
-                float & logprob_sum,
-                int & n_tokens,
-                int64_t & t_ms) {
-    const auto t_start = std::chrono::high_resolution_clock::now();
-
-    logprob_min = 0.0f;
-    logprob_sum = 0.0f;
-    n_tokens    = 0;
-    t_ms = 0;
-
-    if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
-        return {};
-    }
-
-    std::string result;
-
-    const int n_segments = whisper_full_n_segments(m_ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(m_ctx, i);
-
-        result += text;
-
-        const int n = whisper_full_n_tokens(m_ctx, i);
-        for (int j = 0; j < n; ++j) {
-            const auto token = whisper_full_get_token_data(m_ctx, i, j);
-
-            if(token.plog > 0.0f) return {};
-            logprob_min = std::min(logprob_min, token.plog);
-            logprob_sum += token.plog;
-            ++n_tokens;
-        }
-    }
-
-    const auto t_end = std::chrono::high_resolution_clock::now();
-    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
-
-    return result;
-}
--- a/examples/wchess/libwchess/WChess.h
+++ b/examples/wchess/libwchess/WChess.h
@ -1,62 +0,0 @@
-#pragma once
-#include "whisper.h"
-#include <string>
-#include <vector>
-#include <memory>
-
-class Chessboard;
-
-class WChess {
-public:
-    using SetStatusCb = void (*)(const std::string &);
-    using CheckRunningCb = bool (*)();
-    using GetAudioCb = void (*)(int, std::vector<float> &);
-    using SetMovesCb = void (*)(const std::string &);
-    using ClearAudioCb = void (*)();
-
-    struct callbacks {
-        SetStatusCb set_status = nullptr;
-        CheckRunningCb check_running = nullptr;
-        GetAudioCb get_audio = nullptr;
-        SetMovesCb set_moves = nullptr;
-        ClearAudioCb clear_audio = nullptr;
-    };
-
-    struct settings {
-        int32_t vad_ms     = 2000;
-        int32_t prompt_ms  = 5000;
-        int32_t command_ms = 4000;
-        float vad_thold    = 0.2f;
-        float freq_thold   = 100.0f;
-        bool print_energy  = false;
-    };
-
-    WChess(
-        whisper_context * ctx,
-        const whisper_full_params & wparams,
-        callbacks cb,
-        settings s
-    );
-    ~WChess();
-
-    void run();
-    std::string stringify_board() const;
-private:
-    void get_audio(int ms, std::vector<float>& pcmf32) const;
-    void set_status(const std::string& msg) const;
-    void set_moves(const std::string& moves) const;
-    bool check_running() const;
-    void clear_audio() const;
-    std::string transcribe(
-                    const std::vector<float> & pcmf32,
-                    float & logprob_min,
-                    float & logprob_sum,
-                    int & n_tokens,
-                    int64_t & t_ms);
-
-    whisper_context * m_ctx;
-    whisper_full_params m_wparams;
-    const callbacks m_cb;
-    const settings m_settings;
-    std::unique_ptr<Chessboard> m_board;
-};
--- a/examples/wchess/libwchess/test-chessboard.cpp
+++ b/examples/wchess/libwchess/test-chessboard.cpp
@ -1,88 +0,0 @@
-#include "Chessboard.h"
-
-#define ASSERT(x) \
-    do { \
-        if (!(x)) { \
-            fprintf(stderr, "ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
-            fflush(stderr); \
-            exit(1); \
-        } \
-    } while (0)
-
-
-int main() {
-
-    {
-        // pawns
-        Chessboard chess;
-
-        ASSERT(chess.process("pawn to d4, e5, e3, pawn to d5") == "d2-d4 e7-e5 e2-e3 d7-d5");
-        ASSERT(chess.process("pawn to d4") == ""); // wrong
-        ASSERT(chess.process("pawn to c5") == ""); // wrong
-        ASSERT(chess.process("pawn to d5") == ""); // wrong
-        ASSERT(chess.process("pawn to d3") == ""); // wrong
-        ASSERT(chess.process("pawn to f5") == ""); // wrong, white's turn
-        ASSERT(chess.process("h4") == "h2-h4");
-        ASSERT(chess.process("d4") == "e5-d4");
-        ASSERT(chess.process("e4") == "e3-e4");
-        ASSERT(chess.process("d4") == ""); // wrong
-        ASSERT(chess.process("e4") == "d5-e4");
-    }
-
-    {
-        // rook
-        Chessboard chess;
-
-        ASSERT(chess.process("rook to a3") == ""); // wrong
-        ASSERT(chess.process("a4, h5, rook to a3, rook to h6") == "a2-a4 h7-h5 a1-a3 h8-h6");
-        ASSERT(chess.process("rook to d3, rook to e6") == "a3-d3 h6-e6");
-        ASSERT(chess.process("rook to d4, rook to e5") == "d3-d4 e6-e5");
-        ASSERT(chess.process("rook to a4") == ""); // wrong
-        ASSERT(chess.process("rook to d8") == ""); // wrong
-        ASSERT(chess.process("rook to d3") == "d4-d3");
-        ASSERT(chess.process("rook to e2") == "e5-e2");
-    }
-
-    {
-        // knight
-        Chessboard chess;
-
-        ASSERT(chess.process("knight to c3, knight to c6") == "b1-c3 b8-c6");
-        ASSERT(chess.process("knight to c3") == ""); // wrong
-        ASSERT(chess.process("knight to a2") == ""); // wrong
-        ASSERT(chess.process("knight to b4") == ""); // wrong, white's turn
-        ASSERT(chess.process("knight to b5") == "c3-b5");
-        ASSERT(chess.process("knight to a5") == "c6-a5");
-        ASSERT(chess.process("knight to c7") == "b5-c7");
-    }
-
-    {
-        // bishop
-        Chessboard chess;
-
-        ASSERT(chess.process("b3, b6, bishop to b2, bishop to b7") == "b2-b3 b7-b6 c1-b2 c8-b7");
-        ASSERT(chess.process("bishop to a1") == ""); // wrong
-        ASSERT(chess.process("bishop to h8") == ""); // wrong
-        ASSERT(chess.process("bishop to a6") == ""); // wrong, white's turn
-        ASSERT(chess.process("bishop to g7") == "b2-g7");
-    }
-
-    {
-        // queen
-        Chessboard chess;
-        ASSERT(chess.process("queen to d8") == ""); // wrong
-        ASSERT(chess.process("queen to f1") == ""); // wrong
-        ASSERT(chess.process("queen to h5") == ""); // wrong
-        ASSERT(chess.process("e3, d5, queen to h5, queen to d6") == "e2-e3 d7-d5 d1-h5 d8-d6");
-        ASSERT(chess.process("queen to c5") == ""); // wrong, white's turn
-        ASSERT(chess.process("queen to f7") == "h5-f7");
-    }
-
-    {
-        // king
-        Chessboard chess;
-        ASSERT(chess.process("d3, d6, king to d2, king to d7, king to c3, king to c6, king to c4") == "d2-d3 d7-d6 e1-d2 e8-d7 d2-c3 d7-c6 c3-c4");
-        ASSERT(chess.process("bishop to e6") == "c8-e6");
-        ASSERT(chess.process("king to b3") == "c4-b3"); // !! check check not implemented
-    }
-}
--- a/examples/wchess/wchess.cmd/CMakeLists.txt
+++ b/examples/wchess/wchess.cmd/CMakeLists.txt
@ -1,8 +0,0 @@
-if (WHISPER_SDL2)
-    set(TARGET wchess)
-    add_executable(${TARGET} wchess.cmd.cpp)
-
-    include(DefaultTargetOptions)
-
-    target_link_libraries(${TARGET} PRIVATE libwchess common-sdl ${CMAKE_THREAD_LIBS_INIT})
-endif ()
--- a/examples/wchess/wchess.cmd/wchess.cmd.cpp
+++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp
@ -1,207 +0,0 @@
-// Command line voice assisted chess
-//
-// Speak chess move commands to the microphone.
-// The moves will translated to chessboard positions.
-//
-//
-
-#include "WChess.h"
-#include "common-sdl.h"
-
-#include <memory>
-#include <thread>
-
-// command-line parameters
-struct whisper_params {
-    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t prompt_ms  = 5000;
-    int32_t command_ms = 8000;
-    int32_t capture_id = -1;
-    int32_t max_tokens = 32;
-    int32_t audio_ctx  = 0;
-
-    float vad_thold  = 0.6f;
-    float freq_thold = 100.0f;
-
-    float grammar_penalty = 100.0f;
-
-    bool speed_up      = false;
-    bool translate     = false;
-    bool print_special = false;
-    bool print_energy  = false;
-    bool no_timestamps = true;
-    bool use_gpu       = true;
-
-    std::string language  = "en";
-    std::string model     = "models/ggml-base.en.bin";
-    std::string fname_out;
-    std::string commands;
-    std::string prompt;
-    std::string context;
-    std::string grammar;
-};
-
-void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options]\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,         --help           [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,       --threads N      [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "  -pms N,     --prompt-ms N    [%-7d] prompt duration in milliseconds\n",             params.prompt_ms);
-    fprintf(stderr, "  -cms N,     --command-ms N   [%-7d] command duration in milliseconds\n",            params.command_ms);
-    fprintf(stderr, "  -c ID,      --capture ID     [%-7d] capture device ID\n",                           params.capture_id);
-    fprintf(stderr, "  -mt N,      --max-tokens N   [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
-    fprintf(stderr, "  -ac N,      --audio-ctx N    [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
-    fprintf(stderr, "  -vth N,     --vad-thold N    [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
-    fprintf(stderr, "  -fth N,     --freq-thold N   [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
-    fprintf(stderr, "  -su,        --speed-up       [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,        --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
-    fprintf(stderr, "  -ps,        --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pe,        --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
-    fprintf(stderr, "  -ng,        --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
-    fprintf(stderr, "  -l LANG,    --language LANG  [%-7s] spoken language\n",                             params.language.c_str());
-    fprintf(stderr, "  -m FNAME,   --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
-    fprintf(stderr, "  -f FNAME,   --file FNAME     [%-7s] text output file name\n",                       params.fname_out.c_str());
-    fprintf(stderr, "  -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n",             params.commands.c_str());
-    fprintf(stderr, "  -p,         --prompt         [%-7s] the required activation prompt\n",              params.prompt.c_str());
-    fprintf(stderr, "  -ctx,       --context        [%-7s] sample text to help the transcription\n",       params.context.c_str());
-    fprintf(stderr, "  --grammar-penalty N          [%-7.1f] scales down logits of nongrammar tokens\n",   params.grammar_penalty);
-    fprintf(stderr, "\n");
-}
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-        else if (arg == "-t"   || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
-        else if (arg == "-pms" || arg == "--prompt-ms")     { params.prompt_ms     = std::stoi(argv[++i]); }
-        else if (arg == "-cms" || arg == "--command-ms")    { params.command_ms    = std::stoi(argv[++i]); }
-        else if (arg == "-c"   || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
-        else if (arg == "-mt"  || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
-        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
-        else if (arg == "-vth" || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
-        else if (arg == "-fth" || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
-        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
-        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
-        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
-        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
-        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
-        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
-        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
-        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
-        else if (arg == "-cmd" || arg == "--commands")      { params.commands      = argv[++i]; }
-        else if (arg == "-p"   || arg == "--prompt")        { params.prompt        = argv[++i]; }
-        else if (arg == "-ctx" || arg == "--context")       { params.context       = argv[++i]; }
-        else if (                 arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-std::unique_ptr<WChess> g_wchess;
-void set_moves(const std::string & moves) {
-    if (!moves.empty()) fprintf(stdout, "%s", g_wchess->stringify_board().c_str());
-}
-
-audio_async g_audio(30*1000);
-void get_audio(int ms, std::vector<float> & pcmf32_cur) {
-    g_audio.get(ms, pcmf32_cur);
-}
-
-void clear_audio() {
-    g_audio.clear();
-}
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    if (whisper_lang_id(params.language.c_str()) == -1) {
-        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-        whisper_print_usage(argc, argv, params);
-        exit(0);
-    }
-
-    // whisper init
-
-    struct whisper_context_params cparams;
-    cparams.use_gpu = params.use_gpu;
-
-    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
-
-    // init audio
-
-    if (!g_audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
-        fprintf(stderr, "%s: audio.init() failed!\n", __func__);
-        return 1;
-    }
-
-    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
-
-    wparams.print_progress   = false;
-    wparams.print_special    = params.print_special;
-    wparams.print_realtime   = false;
-    wparams.print_timestamps = !params.no_timestamps;
-    wparams.translate        = params.translate;
-    wparams.no_context       = true;
-    wparams.no_timestamps    = params.no_timestamps;
-    wparams.single_segment   = true;
-    wparams.max_tokens       = params.max_tokens;
-    wparams.language         = params.language.c_str();
-    wparams.n_threads        = params.n_threads;
-
-    wparams.audio_ctx = params.audio_ctx;
-    wparams.speed_up  = params.speed_up;
-
-    wparams.temperature     = 0.4f;
-    wparams.temperature_inc = 1.0f;
-    wparams.greedy.best_of  = 5;
-
-    wparams.beam_search.beam_size = 5;
-
-    wparams.initial_prompt = params.context.data();
-
-    g_audio.resume();
-
-    // wait for 1 second to avoid any buffered noise
-    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
-    g_audio.clear();
-
-    WChess::callbacks cb;
-    cb.check_running = sdl_poll_events;
-    cb.get_audio = get_audio;
-    cb.set_moves = set_moves;
-    cb.clear_audio = clear_audio;
-
-    WChess::settings s;
-    s.vad_ms = 2000;
-    s.prompt_ms = params.prompt_ms;
-    s.command_ms = params.command_ms;
-    s.vad_thold = params.vad_thold;
-    s.freq_thold = params.freq_thold;
-    s.print_energy = params.print_energy;
-
-    g_wchess.reset(new WChess(ctx, wparams, cb, s));
-    set_moves("start");
-    g_wchess->run();
-
-    g_audio.pause();
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    return 0;
-}
--- a/examples/wchess/wchess.wasm/CMakeLists.txt
+++ b/examples/wchess/wchess.wasm/CMakeLists.txt
@ -1,51 +0,0 @@
-set(TARGET wchess.wasm)
-
-add_executable(${TARGET}
-    wchess.wasm.cpp
-    )
-
-include(DefaultTargetOptions)
-
-target_link_libraries(${TARGET} PRIVATE
-    common
-    libwchess
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside chess.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/${TARGET}.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/chess.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1024MB \
-    -s TOTAL_MEMORY=1024MB \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-    ${EXTRA_FLAGS} \
-    ")
-
-
-add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-        ${CMAKE_CURRENT_SOURCE_DIR}/chessboardjs-1.0.0
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_CURRENT_SOURCE_DIR}/jquery-3.7.1.min.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/
-    )
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_SOURCE_DIR}/examples/helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/helpers.js @ONLY)
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/CHANGELOG.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/CHANGELOG.md
@ -1,32 +0,0 @@
-# chessboard.js Change Log
-
-All notable changes to this project will be documented in this file.
-
-## [1.0.0] - 2019-06-11
- Orientation methods now return current orientation. [Issue #64]
- Drop support for IE8
- Do not check for `window.JSON` (Error #1004)
- Rename `ChessBoard` to `Chessboard` (`ChessBoard` is still supported, however)
- id query selectors are now supported as the first argument to `Chessboard()`
- Remove Error #1002
- Format code according to [StandardJS]
- Bump minimum jQuery version to 1.8.3
- Throttle piece drag functions
-
-## [0.3.0] - 2013-08-10
- Added `appearSpeed` animation config property
- Added `onSnapbackEnd` event
- Added `onMoveEnd` event
-
-## [0.2.0] - 2013-08-05
- Added `onMouseoverSquare` and `onMouseoutSquare` events
- Added `onSnapEnd` event
- Added square code as CSS class on the squares
- Added [chess.js] integration examples
-
-## [0.1.0] - 2013-05-21
- Initial release
-
-[chess.js]:https://github.com/jhlywa/chess.js
-[Issue #64]:https://github.com/oakmac/chessboardjs/issues/64
-[StandardJS]:https://standardjs.com/
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/LICENSE.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/LICENSE.md
@ -1,20 +0,0 @@
-Copyright 2019 Chris Oakman
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/README.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/README.md
@ -1,82 +0,0 @@
-# chessboard.js
-
-chessboard.js is a JavaScript chessboard component. It depends on [jQuery].
-
-Please see [chessboardjs.com] for documentation and examples.
-
-## What is chessboard.js?
-
-chessboard.js is a JavaScript chessboard component with a flexible "just a
-board" API that
-
-chessboard.js is a standalone JavaScript Chess Board. It is designed to be "just
-a board" and expose a powerful API so that it can be used in different ways.
-Here's a non-exhaustive list of things you can do with chessboard.js:
-
- Use chessboard.js to show game positions alongside your expert commentary.
- Use chessboard.js to have a tactics website where users have to guess the best
-  move.
- Integrate chessboard.js and [chess.js] with a PGN database and allow people to
-  search and playback games (see [Example 5000])
- Build a chess server and have users play their games out using the
-  chessboard.js board.
-
-chessboard.js is flexible enough to handle any of these situations with relative
-ease.
-
-## What can chessboard.js **not** do?
-
-The scope of chessboard.js is limited to "just a board." This is intentional and
-makes chessboard.js flexible for handling a multitude of chess-related problems.
-
-This is a common source of confusion for new users. [remove?]
-
-Specifically, chessboard.js does not understand anything about how the game of
-chess is played: how a knight moves, who's turn is it, is White in check?, etc.
-
-Fortunately, the powerful [chess.js] library deals with exactly this sort of
-problem domain and plays nicely with chessboard.js's flexible API. Some examples
-of chessboard.js combined with chess.js: 5000, 5001, 5002
-
-Please see the powerful [chess.js] library for an API to deal with these sorts
-of questions.
-
-
-This logic is distinct from the logic of the board. Please see the powerful
-[chess.js] library for this aspect of your application.
-
-
-
-Here is a list of things that chessboard.js is **not**:
-
- A chess engine
- A legal move validator
- A PGN parser
-
-chessboard.js is designed to work well with any of those things, but the idea
-behind chessboard.js is that the logic that controls the board should be
-independent of those other problems.
-
-## Docs and Examples
-
- Docs - <http://chessboardjs.com/docs>
- Examples - <http://chessboardjs.com/examples>
-
-## Developer Tools
-
-```sh
-# create a build in the build/ directory
-npm run build
-
-# re-build the website
-npm run website
-```
-
-## License
-
-[MIT License](LICENSE.md)
-
-[jQuery]:https://jquery.com/
-[chessboardjs.com]:http://chessboardjs.com
-[chess.js]:https://github.com/jhlywa/chess.js
-[Example 5000]:http://chessboardjs.com/examples#5000
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
@ -1,54 +0,0 @@
-/*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
-
-.clearfix-7da63 {
-  clear: both;
-}
-
-.board-b72b1 {
-  border: 2px solid #404040;
-  box-sizing: content-box;
-}
-
-.square-55d63 {
-  float: left;
-  position: relative;
-
-  /* disable any native browser highlighting */
-  -webkit-touch-callout: none;
-    -webkit-user-select: none;
-     -khtml-user-select: none;
-       -moz-user-select: none;
-        -ms-user-select: none;
-            user-select: none;
-}
-
-.white-1e1d7 {
-  background-color: #f0d9b5;
-  color: #b58863;
-}
-
-.black-3c85d {
-  background-color: #b58863;
-  color: #f0d9b5;
-}
-
-.highlight1-32417, .highlight2-9c5d2 {
-  box-shadow: inset 0 0 3px 3px yellow;
-}
-
-.notation-322f9 {
-  cursor: default;
-  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
-  font-size: 14px;
-  position: absolute;
-}
-
-.alpha-d2270 {
-  bottom: 1px;
-  right: 3px;
-}
-
-.numeric-fc462 {
-  top: 2px;
-  left: 2px;
-}
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
@ -1,2 +0,0 @@
-/*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
-.clearfix-7da63{clear:both}.board-b72b1{border:2px solid #404040;box-sizing:content-box}.square-55d63{float:left;position:relative;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.white-1e1d7{background-color:#f0d9b5;color:#b58863}.black-3c85d{background-color:#b58863;color:#f0d9b5}.highlight1-32417,.highlight2-9c5d2{box-shadow:inset 0 0 3px 3px #ff0}.notation-322f9{cursor:default;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;position:absolute}.alpha-d2270{bottom:1px;right:3px}.numeric-fc462{top:2px;left:2px}
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/package.json
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/package.json
@ -1,29 +0,0 @@
-{
-  "author": "Chris Oakman <chris@oakmac.com> (http://chrisoakman.com/)",
-  "name": "@chrisoakman/chessboardjs",
-  "description": "JavaScript chessboard widget",
-  "homepage": "https://chessboardjs.com",
-  "license": "MIT",
-  "version": "1.0.0",
-  "repository": {
-    "type": "git",
-    "url": "git://github.com/oakmac/chessboardjs.git"
-  },
-  "files": ["dist/"],
-  "dependencies": {
-    "jquery": ">=3.4.1"
-  },
-  "devDependencies": {
-    "csso": "3.5.1",
-    "fs-plus": "3.1.1",
-    "kidif": "1.1.0",
-    "mustache": "2.3.0",
-    "standard": "10.0.2",
-    "uglify-js": "3.6.0"
-  },
-  "scripts": {
-    "build": "standard lib/chessboard.js && node scripts/build.js",
-    "standard": "standard --fix lib/*.js website/js/*.js",
-    "website": "node scripts/website.js"
-  }
-}
--- a/examples/wchess/wchess.wasm/index-tmpl.html
+++ b/examples/wchess/wchess.wasm/index-tmpl.html
@ -1,376 +0,0 @@
-<!doctype html>
-<html lang="en-us">
-    <head>
-        <title>wchess : Voice assistant example using Whisper + WebAssembly</title>
-
-        <style>
-            #output {
-                width: 100%;
-                height: 100%;
-                margin: 0 auto;
-                margin-top: 10px;
-                border-left: 0px;
-                border-right: 0px;
-                padding-left: 0px;
-                padding-right: 0px;
-                display: block;
-                background-color: black;
-                color: white;
-                font-size: 10px;
-                font-family: 'Lucida Console', Monaco, monospace;
-                outline: none;
-                white-space: pre;
-                overflow-wrap: normal;
-                overflow-x: scroll;
-            }
-        </style>
-        <link rel="stylesheet" href="css/chessboard-1.0.0.min.css" integrity="sha384-q94+BZtLrkL1/ohfjR8c6L+A6qzNH9R2hBLwyoAfu3i/WCvQjzL2RQJ3uNHDISdU" crossorigin="anonymous">
-    </head>
-    <body onload="loadWhisper()">
-        <div id="main-container">
-            <b>wchess : Voice assistant example using Whisper + WebAssembly</b>
-
-            <br><br>
-
-            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">GitHub</a>.
-
-            <br><br>
-
-            <b>More examples:</b>
-                <a href="https://whisper.ggerganov.com/">main</a> |
-                <a href="https://whisper.ggerganov.com/bench">bench</a> |
-                <a href="https://whisper.ggerganov.com/stream">stream</a> |
-                <a href="https://whisper.ggerganov.com/command">command</a> |
-                <a href="https://whisper.ggerganov.com/talk">talk</a> |
-
-            <br><br>
-
-            <hr>
-
-            <div id="model-whisper">
-                Whisper model: <span id="model-whisper-status"></span>
-                <span id="fetch-whisper-progress"></span>
-                <button id="clear" onclick="clearCache()">Clear Cache</button>
-                <!--
-                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-                -->
-            </div>
-
-            <br>
-            <div id="myBoard" style="width: 400px"></div>
-            <script src="js/jquery-3.7.1.min.js"></script>
-            <script src="js/chessboard-1.0.0.min.js"></script>
-            <script>
-                var board = Chessboard('myBoard', 'start')
-            </script>
-
-            <br>
-
-            <div id="input">
-                <button id="toggler" disabled>Hold</button>
-            </div>
-
-            <br>
-
-            <div id="state">
-                Status: <b><span id="state-status">not started</span></b>
-
-                <pre id="state-moves">[The moves will be displayed here]</pre>
-            </div>
-
-            <hr>
-
-            Debug output:
-            <textarea id="output" rows="20"></textarea>
-
-            <br>
-
-            <b>Troubleshooting</b>
-
-            <br><br>
-
-            The page does some heavy computations, so make sure:
-
-            <ul>
-                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
-                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
-                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
-            </ul>
-
-            <div class="cell-version">
-                <span>
-                    |
-                    Build time: <span class="nav-link">@GIT_DATE@</span> |
-                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
-                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
-                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">Source Code</a> |
-                </span>
-            </div>
-        </div>
-
-        <script type="text/javascript" src="js/helpers.js"></script>
-        <script type='text/javascript'>
-            // web audio context
-            var context = null;
-
-            // the command instance
-            var instance = null;
-
-            // model name
-            var model_whisper = null;
-
-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                },
-                preRun: function() {
-                    printTextarea('js: Preparing ...');
-                },
-                postRun: function() {
-                    printTextarea('js: Module initialized successfully!');
-                    instance = Module.init('whisper.bin');
-
-                    if (instance) {
-                        printTextarea("js: whisper initialized, instance: " + instance);
-                    }
-                    else {
-                        printTextarea("js: failed to initialize whisper");
-                    }
-                }
-            };
-
-            //
-            // fetch models
-            //
-
-            let dbVersion = 1
-            let dbName    = 'whisper.ggerganov.com';
-            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-
-            function storeFS(fname, buf) {
-                // write to WASM file using FS_createDataFile
-                // if the file exists, delete it
-                try {
-                    Module.FS_unlink(fname);
-                } catch (e) {
-                    // ignore
-                }
-
-                Module.FS_createDataFile("/", fname, buf, true, true);
-
-                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
-
-                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-
-                if (model_whisper != null) {
-                    document.getElementById('toggler').disabled = false;
-                }
-            }
-
-            function loadWhisper() {
-                // let urls = {
-                //     'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
-                //     'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
-
-                //     'tiny-en-q5_1':  'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin',
-                //     'base-en-q5_1':  'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin',
-                // };
-
-                // let sizes = {
-                //     'tiny.en': 75,
-                //     'base.en': 142,
-
-                //     'tiny-en-q5_1':   31,
-                //     'base-en-q5_1':   57,
-                // };
-
-                let url     = 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin';
-                let dst     = 'whisper.bin';
-                let size_mb = 75;
-
-                model_whisper = 'tiny.en';
-
-                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model_whisper + '" ... ';
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-whisper-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-            }
-
-            //
-            // microphone
-            //
-
-            const kSampleRate = 16000;
-            const kRestartRecording_s = 120;
-            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
-
-            var mediaRecorder = null;
-            var doRecording = false;
-            var startTime = 0;
-
-            window.AudioContext = window.AudioContext || window.webkitAudioContext;
-            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
-
-            function stopRecording() {
-                Module.set_status("paused");
-                mediaRecorder.stop();
-            }
-
-            function startRecording() {
-                if (!context) {
-                    context = new AudioContext({
-                        sampleRate: kSampleRate,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                }
-
-                startTime = Date.now();
-
-                var chunks = [];
-                var stream = null;
-
-                navigator.mediaDevices.getUserMedia({audio: true, video: false})
-                    .then(function(s) {
-                        stream = s;
-                        mediaRecorder = new MediaRecorder(stream);
-                        mediaRecorder.ondataavailable = function(e) {
-                            chunks.push(e.data);
-
-                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
-                            var reader = new FileReader();
-
-                            reader.onload = function(event) {
-                                var buf = new Uint8Array(reader.result);
-
-                                if (!context) {
-                                    return;
-                                }
-                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
-                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                                    var source = offlineContext.createBufferSource();
-                                    source.buffer = audioBuffer;
-                                    source.connect(offlineContext.destination);
-                                    source.start(0);
-
-                                    offlineContext.startRendering().then(function(renderedBuffer) {
-                                        let audio = renderedBuffer.getChannelData(0);
-
-                                        if (instance) {
-                                            printTextarea('js: number of samples: ' + audio.length);
-                                            Module.set_audio(instance, audio);
-                                        }
-                                    });
-
-                                    mediaRecorder = null;
-                                    context = null;
-                                });
-                            }
-
-                            reader.readAsArrayBuffer(blob);
-                        };
-
-                        mediaRecorder.onstop = function(e) {
-                            stream.getTracks().forEach(function(track) {
-                                track.stop();
-                            });
-                        };
-
-                        mediaRecorder.start();
-                    })
-                    .catch(function(err) {
-                        printTextarea('js: error getting audio stream: ' + err);
-                    });
-            }
-
-            //
-            // main
-            //
-
-            var nLines = 0;
-            var intervalUpdate = null;
-            var movesAll = '';
-
-            document.body.addEventListener('keydown', function(event) {
-                if (event.keyCode === 32) {
-                    document.getElementById('toggler').innerText = "Release";
-                    onStart();
-                }
-            }, true);
-
-            document.body.addEventListener('keyup', function(event) {
-                if (event.keyCode === 32) {
-                    document.getElementById('toggler').innerText = "Hold";
-                    onStop();
-                }
-            }, true);
-
-            document.getElementById('toggler').addEventListener('mousedown', function(event) {
-                this.innerText = "Release";
-                onStart();
-            }, true);
-
-            document.getElementById('toggler').addEventListener('mouseup', function(event) {
-                this.innerText = "Hold";
-                onStop();
-            }, true);
-
-            function onStart() {
-                if (!instance) {
-                    return;
-                }
-
-                startRecording();
-            }
-
-            function onStop() {
-                printTextarea('js: stopping recording ...');
-                stopRecording();
-
-                var interval = setInterval(function() {
-                    var moves = Module.get_moves();
-
-                    if (moves != null && moves.length > 1) {
-                        clearInterval(interval);
-
-                        for (move of moves.split(' ')) {
-                            board.move(move);
-                        }
-
-                        movesAll += moves + '<br>';
-                        nLines++;
-
-                        // if more than 10 lines, remove the first line
-                        if (nLines > 10) {
-                            var i = movesAll.indexOf('<br>');
-                            if (i > 0) {
-                                movesAll = movesAll.substring(i + 4);
-                                nLines--;
-                            }
-                        }
-
-                        document.getElementById('state-status').innerHTML = Module.get_status();
-                        document.getElementById('state-moves').innerHTML = movesAll;
-                    }
-                }, 100);
-            }
-
-        </script>
-        <script type="text/javascript" src="js/chess.js"></script>
-    </body>
-</html>
--- a/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
+++ b/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
--- a/examples/wchess/wchess.wasm/wchess.wasm.cpp
+++ b/examples/wchess/wchess.wasm/wchess.wasm.cpp
@ -1,173 +0,0 @@
-#include <WChess.h>
-#include <emscripten/bind.h>
-
-#include <atomic>
-#include <thread>
-
-constexpr int N_THREAD = 8;
-
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
-
-std::mutex  g_mutex;
-std::thread g_worker;
-
-std::atomic<bool> g_running(false);
-
-std::string g_status        = "";
-std::string g_status_forced = "";
-std::string g_moves         = "";
-
-std::vector<float> g_pcmf32;
-
-void set_status(const std::string & status) {
-    std::lock_guard<std::mutex> lock(g_mutex);
-    g_status = status;
-}
-
-void set_moves(const std::string & moves) {
-    std::lock_guard<std::mutex> lock(g_mutex);
-    g_moves = moves;
-}
-
-void get_audio(int /* ms */, std::vector<float> & audio) {
-    std::lock_guard<std::mutex> lock(g_mutex);
-    audio = g_pcmf32;
-}
-
-bool check_running() {
-    return g_running;
-}
-
-void clear_audio() {
-    std::lock_guard<std::mutex> lock(g_mutex);
-    g_pcmf32.clear();
-}
-
-void wchess_main(size_t i) {
-    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-
-    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-    wparams.offset_ms        = 0;
-    wparams.translate        = false;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.print_realtime   = false;
-    wparams.print_progress   = false;
-    wparams.print_timestamps = true;
-    wparams.print_special    = false;
-    wparams.no_timestamps    = true;
-
-    wparams.max_tokens       = 32;
-    wparams.audio_ctx        = 768; // partial encoder context for better performance
-
-    wparams.temperature     = 0.0f;
-    wparams.temperature_inc = 2.0f;
-    wparams.greedy.best_of  = 1;
-
-    wparams.beam_search.beam_size = 1;
-
-    wparams.language         = "en";
-
-    wparams.grammar_penalty = 100.0;
-    wparams.initial_prompt = "bishop to c3, rook to d4, knight to e5, d4 d5, knight to c3, c3, queen to d4, king b1, pawn to a1, bishop to b2, knight to c3,";
-
-    printf("command: using %d threads\n", wparams.n_threads);
-
-    WChess::callbacks cb;
-    cb.set_status = set_status;
-    cb.check_running = check_running;
-    cb.get_audio = get_audio;
-    cb.set_moves = set_moves;
-    cb.clear_audio = clear_audio;
-
-    WChess(g_contexts[i], wparams, cb, {}).run();
-    if (i < g_contexts.size()) {
-        whisper_free(g_contexts[i]);
-        g_contexts[i] = nullptr;
-    }
-}
-
-EMSCRIPTEN_BINDINGS(command) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
-                if (g_contexts[i] != nullptr) {
-                    g_running = true;
-                    if (g_worker.joinable()) {
-                        g_worker.join();
-                    }
-                    g_worker = std::thread([i]() {
-                        wchess_main(i);
-                    });
-
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
-            }
-        }
-
-        return (size_t) 0;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([](size_t /* index */) {
-        if (g_running) {
-            g_running = false;
-        }
-    }));
-
-    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
-        --index;
-
-        if (index >= g_contexts.size()) {
-            return -1;
-        }
-
-        if (g_contexts[index] == nullptr) {
-            return -2;
-        }
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            const int n = audio["length"].as<int>();
-
-            emscripten::val heap = emscripten::val::module_property("HEAPU8");
-            emscripten::val memory = heap["buffer"];
-
-            g_pcmf32.resize(n);
-
-            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
-            memoryView.call<void>("set", audio);
-        }
-
-        return 0;
-    }));
-
-    emscripten::function("get_moves", emscripten::optional_override([]() {
-        std::string moves;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            moves = std::move(g_moves);
-        }
-
-        return moves;
-    }));
-
-    emscripten::function("get_status", emscripten::optional_override([]() {
-        std::string status;
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            status = g_status_forced.empty() ? g_status : g_status_forced;
-        }
-
-        return status;
-    }));
-
-    emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
-        std::lock_guard<std::mutex> lock(g_mutex);
-        g_status_forced = status;
-    }));
-}
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -1,4 +1,5 @@
 #include <algorithm>
+#include <cinttypes>
 #include <cstddef>
 #include <cstdint>
 #include <limits>
@ -235,7 +236,7 @@ typedef float2 dfloat2;
 #endif //GGML_CUDA_F16

 static __device__ __forceinline__ int get_int_from_int8(const int8_t * x8, const int & i32) {
-    const uint16_t * x16 = (uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment
+    const uint16_t * x16 = (const uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment

    int x32 = 0;
    x32 |= x16[0] <<  0;
@ -245,7 +246,7 @@ static __device__ __forceinline__ int get_int_from_int8(const int8_t * x8, const
 }

 static __device__ __forceinline__ int get_int_from_uint8(const uint8_t * x8, const int & i32) {
-    const uint16_t * x16 = (uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment
+    const uint16_t * x16 = (const uint16_t *) (x8 + sizeof(int) * i32); // assume at least 2 byte alignment

    int x32 = 0;
    x32 |= x16[0] <<  0;
@ -255,11 +256,11 @@ static __device__ __forceinline__ int get_int_from_uint8(const uint8_t * x8, con
 }

 static __device__ __forceinline__ int get_int_from_int8_aligned(const int8_t * x8, const int & i32) {
-    return *((int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment
+    return *((const int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment
 }

 static __device__ __forceinline__ int get_int_from_uint8_aligned(const uint8_t * x8, const int & i32) {
-    return *((int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment
+    return *((const int *) (x8 + sizeof(int) * i32)); // assume at least 4 byte alignment
 }

 template<typename T>
@ -469,7 +470,7 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA
 #define MUL_MAT_SRC1_COL_STRIDE 128

 #define MAX_STREAMS 8
-static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_DEVICES][MAX_STREAMS] = { nullptr };
+static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_DEVICES][MAX_STREAMS] = { { nullptr } };

 struct ggml_tensor_extra_gpu {
    void * data_device[GGML_CUDA_MAX_DEVICES]; // 1 pointer for each device for split tensors
@ -2248,6 +2249,7 @@ static __device__ __forceinline__ float vec_dot_q4_0_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q4_0(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh; (void)x_sc;

    __shared__ int  tile_x_qs[mmq_y * (WARP_SIZE)       + mmq_y];
    __shared__ float tile_x_d[mmq_y * (WARP_SIZE/QI4_0) + mmq_y/QI4_0];
@ -2259,7 +2261,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q4_0(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q4_0(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
-
+    (void)x_qh; (void)x_sc;
    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
    GGML_CUDA_ASSUME(k >= 0);
@ -2268,7 +2270,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI4_0;
    const int kqsx = k % QI4_0;

-    const block_q4_0 * bx0 = (block_q4_0 *) vx;
+    const block_q4_0 * bx0 = (const block_q4_0 *) vx;

    float * x_dmf = (float *) x_dm;

@ -2306,9 +2308,10 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q4_0_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh; (void)x_sc;

    const int kyqs = k % (QI8_1/2) + QI8_1 * (k / (QI8_1/2));
-    const float * x_dmf = (float *) x_dm;
+    const float * x_dmf = (const float *) x_dm;

    int u[2*VDR_Q4_0_Q8_1_MMQ];

@ -2342,6 +2345,7 @@ static __device__ __forceinline__ float vec_dot_q4_1_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q4_1(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh; (void)x_sc;

    __shared__ int   tile_x_qs[mmq_y * (WARP_SIZE) +     + mmq_y];
    __shared__ half2 tile_x_dm[mmq_y * (WARP_SIZE/QI4_1) + mmq_y/QI4_1];
@ -2353,6 +2357,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q4_1(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q4_1(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh; (void)x_sc;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
@ -2362,7 +2367,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI4_1;
    const int kqsx = k % QI4_1;

-    const block_q4_1 * bx0 = (block_q4_1 *) vx;
+    const block_q4_1 * bx0 = (const block_q4_1 *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -2397,6 +2402,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q4_1_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh; (void)x_sc;

    const int kyqs = k % (QI8_1/2) + QI8_1 * (k / (QI8_1/2));

@ -2434,6 +2440,7 @@ static __device__ __forceinline__ float vec_dot_q5_0_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q5_0(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh; (void)x_sc;

    __shared__ int  tile_x_ql[mmq_y * (2*WARP_SIZE)     + mmq_y];
    __shared__ float tile_x_d[mmq_y * (WARP_SIZE/QI5_0) + mmq_y/QI5_0];
@ -2445,6 +2452,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q5_0(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q5_0(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh; (void)x_sc;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
@ -2454,7 +2462,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI5_0;
    const int kqsx = k % QI5_0;

-    const block_q5_0 * bx0 = (block_q5_0 *) vx;
+    const block_q5_0 * bx0 = (const block_q5_0 *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -2509,6 +2517,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q5_0_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh; (void)x_sc;

    const int kyqs = k % (QI8_1/2) + QI8_1 * (k / (QI8_1/2));
    const int index_bx = i * (WARP_SIZE/QI5_0) + i/QI5_0 + k/QI5_0;
@ -2548,6 +2557,7 @@ static __device__ __forceinline__ float vec_dot_q5_1_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q5_1(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh; (void)x_sc;

    __shared__ int   tile_x_ql[mmq_y * (2*WARP_SIZE)     + mmq_y];
    __shared__ half2 tile_x_dm[mmq_y * (WARP_SIZE/QI5_1) + mmq_y/QI5_1];
@ -2559,6 +2569,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q5_1(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q5_1(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh; (void)x_sc;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset < nwarps);
@ -2568,7 +2579,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI5_1;
    const int kqsx = k % QI5_1;

-    const block_q5_1 * bx0 = (block_q5_1 *) vx;
+    const block_q5_1 * bx0 = (const block_q5_1 *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -2620,6 +2631,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q5_1_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh; (void)x_sc;

    const int kyqs = k % (QI8_1/2) + QI8_1 * (k / (QI8_1/2));
    const int index_bx = i * (WARP_SIZE/QI5_1) + + i/QI5_1 + k/QI5_1;
@ -2654,6 +2666,7 @@ static __device__ __forceinline__ float vec_dot_q8_0_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q8_0(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh; (void)x_sc;

    __shared__ int  tile_x_qs[mmq_y * (WARP_SIZE)       + mmq_y];
    __shared__ float tile_x_d[mmq_y * (WARP_SIZE/QI8_0) + mmq_y/QI8_0];
@ -2665,6 +2678,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q8_0(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q8_0(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh; (void)x_sc;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
@ -2675,7 +2689,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kqsx = k % QI8_0;
    float * x_dmf = (float *) x_dm;

-    const block_q8_0 * bx0 = (block_q8_0 *) vx;
+    const block_q8_0 * bx0 = (const block_q8_0 *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -2710,6 +2724,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q8_0_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh; (void)x_sc;

    const float * x_dmf = (const float *) x_dm;
    const float * y_df  = (const float *) y_ds;
@ -2743,6 +2758,7 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q2_K(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh;

    __shared__ int   tile_x_ql[mmq_y * (WARP_SIZE)       + mmq_y];
    __shared__ half2 tile_x_dm[mmq_y * (WARP_SIZE/QI2_K) + mmq_y/QI2_K];
@ -2756,6 +2772,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q2_K(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q2_K(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
@ -2765,7 +2782,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI2_K;
    const int kqsx = k % QI2_K;

-    const block_q2_K * bx0 = (block_q2_K *) vx;
+    const block_q2_K * bx0 = (const block_q2_K *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -2813,6 +2830,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q2_K_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh;

    const int kbx = k / QI2_K;
    const int ky  = (k % QI2_K) * QR2_K;
@ -2886,7 +2904,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI3_K;
    const int kqsx = k % QI3_K;

-    const block_q3_K * bx0 = (block_q3_K *) vx;
+    const block_q3_K * bx0 = (const block_q3_K *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -2967,7 +2985,7 @@ static __device__ __forceinline__ float vec_dot_q3_K_q8_1_mul_mat(
    const float * x_dmf = (const float *) x_dm;
    const float * y_df  = (const float *) y_ds;

-    const int8_t * scales = ((int8_t *) (x_sc + i * (WARP_SIZE/4) + i/4 + kbx*4)) + ky/4;
+    const int8_t * scales = ((const int8_t *) (x_sc + i * (WARP_SIZE/4) + i/4 + kbx*4)) + ky/4;

    int v[QR3_K*VDR_Q3_K_Q8_1_MMQ];

@ -3082,6 +3100,7 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q4_K(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh;

    __shared__ int   tile_x_ql[mmq_y * (WARP_SIZE)       + mmq_y];
    __shared__ half2 tile_x_dm[mmq_y * (WARP_SIZE/QI4_K) + mmq_y/QI4_K];
@ -3095,6 +3114,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q4_K(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q4_K(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
@ -3104,7 +3124,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI4_K; // == 0 if QK_K == 256
    const int kqsx = k % QI4_K; // == k if QK_K == 256

-    const block_q4_K * bx0 = (block_q4_K *) vx;
+    const block_q4_K * bx0 = (const block_q4_K *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -3149,7 +3169,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin

        const block_q4_K * bxi = bx0 + i*blocks_per_row + (k % (WARP_SIZE/8)) / (QI4_K/8);

-        const int * scales = (int *) bxi->scales;
+        const int * scales = (const int *) bxi->scales;

        const int ksc = k % (WARP_SIZE/8);

@ -3164,6 +3184,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q4_K_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh;

    const uint8_t * sc = ((const uint8_t *) &x_sc[i * (WARP_SIZE/8) + i/8 + k/16]) + 2*((k % 16) / 8);

@ -3263,6 +3284,7 @@ static __device__ __forceinline__ float vec_dot_q5_K_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q5_K(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh;

    __shared__ int   tile_x_ql[mmq_y * (2*WARP_SIZE)     + mmq_y];
    __shared__ half2 tile_x_dm[mmq_y * (WARP_SIZE/QI5_K) + mmq_y/QI5_K];
@ -3276,6 +3298,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q5_K(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q5_K(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
@ -3285,7 +3308,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI5_K; // == 0 if QK_K == 256
    const int kqsx = k % QI5_K; // == k if QK_K == 256

-    const block_q5_K * bx0 = (block_q5_K *) vx;
+    const block_q5_K * bx0 = (const block_q5_K *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -3341,7 +3364,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin

        const block_q5_K * bxi = bx0 + i*blocks_per_row + (k % (WARP_SIZE/8)) / (QI5_K/8);

-        const int * scales = (int *) bxi->scales;
+        const int * scales = (const int *) bxi->scales;

        const int ksc = k % (WARP_SIZE/8);

@ -3356,6 +3379,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q5_K_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh;

    const uint8_t * sc = ((const uint8_t *) &x_sc[i * (WARP_SIZE/8) + i/8 + k/16]) + 2 * ((k % 16) / 8);

@ -3392,6 +3416,7 @@ static __device__ __forceinline__ float vec_dot_q6_K_q8_1(
 }

 template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q6_K(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc) {
+    (void)x_qh;

    __shared__ int   tile_x_ql[mmq_y * (2*WARP_SIZE)     + mmq_y];
    __shared__ half2 tile_x_dm[mmq_y * (WARP_SIZE/QI6_K) + mmq_y/QI6_K];
@ -3405,6 +3430,7 @@ template <int mmq_y> static __device__ __forceinline__ void allocate_tiles_q6_K(
 template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinline__ void load_tiles_q6_K(
    const void * __restrict__ vx, int * __restrict__ x_ql, half2 * __restrict__ x_dm, int * __restrict__ x_qh,
    int * __restrict__ x_sc, const int & i_offset, const int & i_max, const int & k, const int & blocks_per_row) {
+    (void)x_qh;

    GGML_CUDA_ASSUME(i_offset >= 0);
    GGML_CUDA_ASSUME(i_offset <  nwarps);
@ -3414,7 +3440,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
    const int kbx  = k / QI6_K; // == 0 if QK_K == 256
    const int kqsx = k % QI6_K; // == k if QK_K == 256

-    const block_q6_K * bx0 = (block_q6_K *) vx;
+    const block_q6_K * bx0 = (const block_q6_K *) vx;

 #pragma unroll
    for (int i0 = 0; i0 < mmq_y; i0 += nwarps) {
@ -3476,6 +3502,7 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 static __device__ __forceinline__ float vec_dot_q6_K_q8_1_mul_mat(
    const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
    const int * __restrict__ y_qs, const half2 * __restrict__ y_ds, const int & i, const int & j, const int & k) {
+    (void)x_qh;

    const float * x_dmf = (const float *) x_dm;
    const float * y_df  = (const float *) y_ds;
@ -3518,7 +3545,7 @@ static __device__ __forceinline__ void mul_mat_q(
    __shared__ int    tile_y_qs[mmq_x * WARP_SIZE];
    __shared__ half2  tile_y_ds[mmq_x * WARP_SIZE/QI8_1];

-    float sum[mmq_y/WARP_SIZE][mmq_x/nwarps] = {0.0f};
+    float sum[mmq_y/WARP_SIZE][mmq_x/nwarps] = {{0.0f}};

    for (int ib0 = 0; ib0 < blocks_per_row_x; ib0 += blocks_per_warp) {

@ -5840,7 +5867,7 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
        return ptr;
    }
 #ifdef DEBUG_CUDA_MALLOC
-    fprintf(stderr, "%s: %d buffers, max_size = %u MB, tot_size = %u MB, requested %u MB\n", __func__, nnz,
+    fprintf(stderr, "%s: %d buffers, max_size = %u MiB, tot_size = %u MiB, requested %u MiB\n", __func__, nnz,
            (uint32_t)(max_size/1024/1024), (uint32_t)(tot_size/1024/1024), (uint32_t)(size/1024/1024));
 #endif
    void * ptr;
@ -5978,7 +6005,7 @@ void * ggml_cuda_host_malloc(size_t size) {
        // The allocation error can be bypassed. A null ptr will assigned out of this function.
        // This can fixed the OOM error in WSL.
        cudaGetLastError();
-        fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
+        fprintf(stderr, "WARNING: failed to allocate %.2f MiB of pinned memory: %s\n",
            size/1024.0/1024.0, cudaGetErrorString(err));
        return nullptr;
    }
@ -6359,6 +6386,7 @@ static int64_t get_row_rounding(ggml_type type) {
        case GGML_TYPE_Q8_0:
            return max_compute_capability >= CC_RDNA2 ? 128 : 64;
        case GGML_TYPE_F16:
+        case GGML_TYPE_F32:
            return 1;
        case GGML_TYPE_Q2_K:
            return max_compute_capability >= CC_RDNA2 ? 128 : 32;
@ -6381,6 +6409,7 @@ static int64_t get_row_rounding(ggml_type type) {
        case GGML_TYPE_Q8_0:
            return 64;
        case GGML_TYPE_F16:
+        case GGML_TYPE_F32:
            return 1;
        case GGML_TYPE_Q2_K:
        case GGML_TYPE_Q3_K:
@ -6990,7 +7019,7 @@ static void ggml_cuda_op_mul_mat(
    const int64_t ne01 = src0->ne[1];
    const int64_t ne02 = src0->ne[2];
    const int64_t ne03 = src0->ne[3];
-    const int64_t nrows0 = ggml_nrows(src0);
+    // const int64_t nrows0 = ggml_nrows(src0);

    const int64_t ne10 = src1->ne[0];
    const int64_t ne11 = src1->ne[1];
@ -7091,7 +7120,7 @@ static void ggml_cuda_op_mul_mat(
        if (src0_on_device && src0_is_contiguous) {
            src0_dd[id] = (char *) src0_extra->data_device[id];
        } else {
-            const size_t size_src0_ddq = split ? (row_high[id]-row_low[id])*ne00 * src0_ts/src0_bs : ggml_nbytes(src0);
+            // const size_t size_src0_ddq = split ? (row_high[id]-row_low[id])*ne00 * src0_ts/src0_bs : ggml_nbytes(src0);
            src0_dd[id] = (char *) ggml_cuda_pool_malloc(ggml_nbytes(src0), &src0_as[id]);
        }

@ -7324,7 +7353,7 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
 }

 bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
-    if (!g_cublas_loaded) return false;
+    if (!g_cublas_loaded) { return false; }

    const int64_t ne10 = src1->ne[0];

@ -7402,8 +7431,8 @@ static void ggml_cuda_mul_mat_vec_nc(const ggml_tensor * src0, const ggml_tensor
    ggml_mul_mat_vec_nc_f16_f32_cuda(src0_ddq, src1_ddf, dst_ddf, ne00, ne01, row_stride_x, ne02, ne12, channel_stride_x, main_stream);
 }

-__global__ void k_compute_batched_ptrs(
-        const half * src0_as_f16, const half * src1_as_f16, half * dst_f16,
+__global__ static void k_compute_batched_ptrs(
+        const half * src0_as_f16, const half * src1_as_f16, float * dst_f32,
        const void ** ptrs_src, void ** ptrs_dst,
        int ne12, int ne13,
        int ne23,
@ -7423,7 +7452,7 @@ __global__ void k_compute_batched_ptrs(

    ptrs_src[0*ne23 + i12 + i13*ne12] = (const char *) src0_as_f16 + i02*nb02   + i03*nb03;
    ptrs_src[1*ne23 + i12 + i13*ne12] = (const char *) src1_as_f16 + i12*nb12/2 + i13*nb13/2;
-    ptrs_dst[0*ne23 + i12 + i13*ne12] = (      char *)     dst_f16 + i12* nb2/2 + i13* nb3/2;
+    ptrs_dst[0*ne23 + i12 + i13*ne12] = (      char *)     dst_f32 + i12* nb2   + i13* nb3  ;
 }

 static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@ -7480,9 +7509,6 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
    half * src1_as_f16 = (half *) ggml_cuda_pool_malloc(ne1 * sizeof(half), &src1_as);
    to_fp16_cuda(src1_ddf, src1_as_f16, ne1, main_stream);

-    size_t dst_as = 0;
-    half * dst_f16 = (half *) ggml_cuda_pool_malloc(ne * sizeof(half), &dst_as);
-
    GGML_ASSERT(ne12 % ne02 == 0);
    GGML_ASSERT(ne13 % ne03 == 0);

@ -7490,8 +7516,8 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
    const int64_t r2 = ne12/ne02;
    const int64_t r3 = ne13/ne03;

-    const half alpha_f16 = 1.0f;
-    const half beta_f16  = 0.0f;
+    const float alpha = 1.0f;
+    const float beta  = 0.0f;

 #if 0
    // use cublasGemmEx
@ -7504,10 +7530,10 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
                CUBLAS_CHECK(
                        cublasGemmEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N,
                            ne01, ne11, ne10,
-                            &alpha_f16, (const char *) src0_as_f16 + i02*src0->nb[2]   + i03*src0->nb[3]  , CUDA_R_16F, nb01/sizeof(half),
-                                        (const char *) src1_as_f16 + i12*src1->nb[2]/2 + i13*src1->nb[3]/2, CUDA_R_16F, nb11/sizeof(float),
-                            &beta_f16,  (      char *)     dst_f16 + i12* dst->nb[2]/2 + i13* dst->nb[3]/2, CUDA_R_16F, ne01,
-                            CUBLAS_COMPUTE_16F,
+                            &alpha, (const char *) src0_as_f16 + i02*src0->nb[2]   + i03*src0->nb[3]  , CUDA_R_16F, nb01/sizeof(half),
+                                    (const char *) src1_as_f16 + i12*src1->nb[2]/2 + i13*src1->nb[3]/2, CUDA_R_16F, nb11/sizeof(float),
+                            &beta,  (      char *)     dst_ddf + i12* dst->nb[2]   + i13* dst->nb[3]  , CUDA_R_32F, ne01,
+                            CUBLAS_COMPUTE_32F,
                            CUBLAS_GEMM_DEFAULT_TENSOR_OP));
            }
        }
@ -7519,11 +7545,11 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
        CUBLAS_CHECK(
        cublasGemmStridedBatchedEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N,
                ne01, ne11, ne10,
-                &alpha_f16, (const char *) src0_as_f16, CUDA_R_16F, nb01/sizeof(half),  src0->nb[2]/sizeof(half),  // strideA
-                            (const char *) src1_as_f16, CUDA_R_16F, nb11/sizeof(float), src1->nb[2]/sizeof(float), // strideB
-                &beta_f16,  (      char *)     dst_f16, CUDA_R_16F, ne01,                dst->nb[2]/sizeof(float), // strideC
+                &alpha, (const char *) src0_as_f16, CUDA_R_16F, nb01/sizeof(half),  src0->nb[2]/sizeof(half),  // strideA
+                        (const char *) src1_as_f16, CUDA_R_16F, nb11/sizeof(float), src1->nb[2]/sizeof(float), // strideB
+                &beta,  (      char *)     dst_ddf, CUDA_R_32F, ne01,                dst->nb[2]/sizeof(float), // strideC
                ne12*ne13,
-                CUBLAS_COMPUTE_16F,
+                CUBLAS_COMPUTE_32F,
                CUBLAS_GEMM_DEFAULT_TENSOR_OP));
    } else {
        // use cublasGemmBatchedEx
@ -7540,7 +7566,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const

        dim3 block_dims(ne13, ne12);
        k_compute_batched_ptrs<<<1, block_dims, 0, main_stream>>>(
-                src0_as_f16, src1_as_f16, dst_f16,
+                src0_as_f16, src1_as_f16, dst_ddf,
                ptrs_src, ptrs_dst,
                ne12, ne13,
                ne23,
@ -7553,11 +7579,11 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
        CUBLAS_CHECK(
        cublasGemmBatchedEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N,
                ne01, ne11, ne10,
-                &alpha_f16, (const void **) (ptrs_src + 0*ne23), CUDA_R_16F, nb01/sizeof(half),
-                            (const void **) (ptrs_src + 1*ne23), CUDA_R_16F, nb11/sizeof(float),
-                &beta_f16,  (      void **) (ptrs_dst + 0*ne23), CUDA_R_16F, ne01,
+                &alpha, (const void **) (ptrs_src + 0*ne23), CUDA_R_16F, nb01/sizeof(half),
+                        (const void **) (ptrs_src + 1*ne23), CUDA_R_16F, nb11/sizeof(float),
+                &beta,  (      void **) (ptrs_dst + 0*ne23), CUDA_R_32F, ne01,
                ne23,
-                CUBLAS_COMPUTE_16F,
+                CUBLAS_COMPUTE_32F,
                CUBLAS_GEMM_DEFAULT_TENSOR_OP));

        if (ptrs_src_s != 0) {
@ -7569,11 +7595,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
    }
 #endif

-    const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(GGML_TYPE_F16);
-    to_fp32_cuda(dst_f16, dst_ddf, ne, main_stream);
-
    ggml_cuda_pool_free(src1_as_f16, src1_as);
-    ggml_cuda_pool_free(dst_f16, dst_as);
 }

 static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@ -8018,7 +8040,7 @@ void ggml_cuda_free_scratch() {
 }

 bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
-    if (!g_cublas_loaded) return false;
+    if (!g_cublas_loaded) { return false; }

    ggml_cuda_func_t func;
    const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
@ -8032,7 +8054,7 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
    if (tensor->op == GGML_OP_MUL_MAT) {
        if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
 #ifndef NDEBUG
-            fprintf(stderr, "%s: cannot compute %s: src0->ne[3] = %d, src1->ne[3] = %d - fallback to CPU\n", __func__, tensor->name, tensor->src[0]->ne[3], tensor->src[1]->ne[3]);
+            fprintf(stderr, "%s: cannot compute %s: src0->ne[3] = " PRId64 ", src1->ne[3] = " PRId64 " - fallback to CPU\n", __func__, tensor->name, tensor->src[0]->ne[3], tensor->src[1]->ne[3]);
 #endif
            return false;
        }
@ -8317,14 +8339,14 @@ static ggml_backend_graph_plan_t ggml_backend_cuda_graph_plan_create(ggml_backen
    UNUSED(cgraph);
 }

-static void ggml_backend_cuda_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
+[[noreturn]] static void ggml_backend_cuda_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
    GGML_ASSERT(!"not implemented");

    UNUSED(backend);
    UNUSED(plan);
 }

-static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
+[[noreturn]] static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
    GGML_ASSERT(!"not implemented");

    UNUSED(backend);
@ -8340,8 +8362,9 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph
    for (int i = 0; i < cgraph->n_nodes; i++) {
        ggml_tensor * node = cgraph->nodes[i];

-        if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE)
+        if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE) {
            continue;
+        }
        assert(node->backend == GGML_BACKEND_GPU);
        for (int j = 0; j < GGML_MAX_SRC; j++) {
            if (node->src[j] != nullptr) {
--- a/ggml-metal.h
+++ b/ggml-metal.h
@ -52,6 +52,11 @@ void ggml_metal_free(struct ggml_metal_context * ctx);
 void * ggml_metal_host_malloc(size_t n);
 void   ggml_metal_host_free  (void * data);

+// helper to check if the device supports a specific family
+// ideally, the user code should be doing these checks
+// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+bool ggml_metal_supports_family(struct ggml_metal_context * ctx, int family);
+
 // set the number of command buffers to use
 void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);

@ -100,6 +105,8 @@ GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);

 GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);

+GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
+
 #ifdef __cplusplus
 }
 #endif
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -459,6 +459,10 @@ void ggml_metal_host_free(void * data) {
    free(data);
 }

+bool ggml_metal_supports_family(struct ggml_metal_context * ctx, int family) {
+    return [ctx->device supportsFamily:(MTLGPUFamilyApple1 + family - 1)];
+}
+
 void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
    ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
 }
@ -1072,7 +1076,7 @@ void ggml_metal_graph_compute(
                            GGML_ASSERT(ne00 == ne10);
                            GGML_ASSERT(ne03 == ne13);

-                            const uint gqa = ne12/ne02;
+                            const unsigned int gqa = ne12/ne02;

                            // find the break-even point where the matrix-matrix kernel becomes more efficient compared
                            // to the matrix-vector kernel
@ -1751,3 +1755,9 @@ void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {

    ggml_metal_set_n_cb(ctx, n_cb);
 }
+
+bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family) {
+    struct ggml_metal_context * ctx = (struct ggml_metal_context *)backend->context;
+
+    return ggml_metal_supports_family(ctx, family);
+}
--- a/whisper.cpp
+++ b/whisper.cpp
@ -1078,6 +1078,11 @@ static ggml_backend_t whisper_backend_init(const whisper_context_params & params
        if (!backend_gpu) {
            WHISPER_LOG_ERROR("%s: ggml_backend_metal_init() failed\n", __func__);
        }
+        if (!ggml_backend_metal_supports_family(backend_gpu, 7)) {
+            WHISPER_LOG_ERROR("%s: Metal GPU does not support family 7 - falling back to CPU\n", __func__);
+            ggml_backend_free(backend_gpu);
+            backend_gpu = NULL;
+        }
    }
 #endif

@ -3593,6 +3598,17 @@ const char * whisper_lang_str(int id) {
    return nullptr;
 }

+const char * whisper_lang_str_full(int id) {
+   for (const auto & kv : g_lang) {
+        if (kv.second.first == id) {
+            return kv.second.second.c_str();
+        }
+    }
+
+    WHISPER_LOG_ERROR("%s: unknown language id %d\n", __func__, id);
+    return nullptr;
+}
+
 int whisper_lang_auto_detect_with_state(
        struct whisper_context * ctx,
          struct whisper_state * state,
@ -5180,7 +5196,7 @@ int whisper_full_with_state(
                ctx, state, progress_cur, params.progress_callback_user_data);
        }

-        // of only 1 second left, then stop
+        // if only 1 second left, then stop
        if (seek + 100 >= seek_end) {
            break;
        }
@ -6064,7 +6080,9 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
    // 1GB array
    const size_t size = arr*1e6;

-    // single-thread
+    double sum  = 0.0;
+
+    // heat-up
    {
        char * src = (char *) malloc(size);
        char * dst = (char *) malloc(size);
@ -6074,7 +6092,6 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
        memcpy(dst, src, size); // heat-up

        double tsum = 0.0;
-        double sum  = 0.0;

        for (size_t i = 0; i < n; i++) {
            const int64_t t0 = ggml_time_us();
@ -6088,21 +6105,108 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
            src[rand() % size] = rand() % 256;
        }

-        snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1e9));
+        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (heat-up)\n", (double) (n*size)/(tsum*1e9));
        s += strbuf;

        // needed to prevent the compiler from optimizing the memcpy away
        {
            for (size_t i = 0; i < size; i++) sum += dst[i];
-
-            snprintf(strbuf, sizeof(strbuf), "sum:    %f\n", sum);
-            s += strbuf;
        }

        free(src);
        free(dst);
    }

+    // single-thread
+    {
+        char * src = (char *) malloc(size);
+        char * dst = (char *) malloc(size);
+
+        for (size_t i = 0; i < size; i++) src[i] = i;
+
+        memcpy(dst, src, size); // heat-up
+
+        double tsum = 0.0;
+
+        for (size_t i = 0; i < n; i++) {
+            const int64_t t0 = ggml_time_us();
+
+            memcpy(dst, src, size);
+
+            const int64_t t1 = ggml_time_us();
+
+            tsum += (t1 - t0)*1e-6;
+
+            src[rand() % size] = rand() % 256;
+        }
+
+        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s ( 1 thread)\n", (double) (n*size)/(tsum*1e9));
+        s += strbuf;
+
+        // needed to prevent the compiler from optimizing the memcpy away
+        {
+            for (size_t i = 0; i < size; i++) sum += dst[i];
+        }
+
+        free(src);
+        free(dst);
+    }
+
+    // multi-thread
+
+    for (uint32_t k = 1; k <= n_threads; k++) {
+        char * src = (char *) malloc(size);
+        char * dst = (char *) malloc(size);
+
+        for (size_t i = 0; i < size; i++) src[i] = i;
+
+        memcpy(dst, src, size); // heat-up
+
+        double tsum = 0.0;
+
+        auto helper = [&](int th) {
+            const int64_t i0 = (th + 0)*size/k;
+            const int64_t i1 = (th + 1)*size/k;
+
+            for (size_t i = 0; i < n; i++) {
+                memcpy(dst + i0, src + i0, i1 - i0);
+
+                src[i0 + rand() % (i1 - i0)] = rand() % 256;
+            };
+        };
+
+        const int64_t t0 = ggml_time_us();
+
+        std::vector<std::thread> threads(k - 1);
+        for (uint32_t th = 0; th < k - 1; ++th) {
+            threads[th] = std::thread(helper, th);
+        }
+
+        helper(k - 1);
+
+        for (uint32_t th = 0; th < k - 1; ++th) {
+            threads[th].join();
+        }
+
+        const int64_t t1 = ggml_time_us();
+
+        tsum += (t1 - t0)*1e-6;
+
+        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (%2d thread)\n", (double) (n*size)/(tsum*1e9), k);
+        s += strbuf;
+
+        // needed to prevent the compiler from optimizing the memcpy away
+        {
+            for (size_t i = 0; i < size; i++) sum += dst[i];
+        }
+
+        free(src);
+        free(dst);
+    }
+
+    snprintf(strbuf, sizeof(strbuf), "sum:    %f\n", sum);
+    s += strbuf;
+
    return s.c_str();
 }

--- a/whisper.h
+++ b/whisper.h
@ -51,7 +51,7 @@ extern "C" {
    //     ...
    //
    //     whisper_context_params cparams = whisper_context_default_params();
-    // 
+    //
    //     struct whisper_context * ctx = whisper_init_from_file_with_params("/path/to/ggml-base.en.bin", cparams);
    //
    //     if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
@ -315,6 +315,9 @@ extern "C" {
    // Return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found
    WHISPER_API const char * whisper_lang_str(int id);

+    // Return the short string of the specified language name (e.g. 2 -> "german"), returns nullptr if not found
+    WHISPER_API const char * whisper_lang_str_full(int id);
+
    // Use mel data at offset_ms to try and auto-detect the spoken language
    // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
    // Returns the top language id or negative on failure
Author	SHA1	Message	Date
Georgi Gerganov	c8b3bc6a0d	cuda : use CUBLAS_COMPTE_F32 insted of CUBLAS_COMPUTE_F16	2023-11-27 11:57:07 +02:00
bobqianic	f52e74d4dc	CI : Rectify the Clang-Related workflow issues (#1551 ) * fix bugs in workflow * fix missing clang in workflow * Update build.yml	2023-11-27 11:35:37 +02:00
Ismatulla Mansurov	23c21e92eb	server : automatically convert audio on the server (#1539 ) * server : automatically convert audio on the server * server : remove rebundant comments * server : automatic conversion refactor * server : update server readme * server : remove unnecessary comments and tabs * server : put back remove calling * server : apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : check ffmpeg before the server lunch * server : fix indentation * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : fix function typo calling * server : fix function typo calling * server : add warning in readme --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2023-11-27 11:28:34 +02:00
Georgi Gerganov	447d49530c	whisper : remove trailing whitespaces	2023-11-24 13:13:21 +02:00
Georgi Gerganov	9d6ebd877c	release : v1.5.1	2023-11-24 12:41:55 +02:00
Georgi Gerganov	0ba365f958	metal : add backend function to check device family support (#1547 )	2023-11-24 12:37:08 +02:00
Georgi Gerganov	010c8ec3ab	cuda : sync some minor stuff from llama.cpp (#1548 )	2023-11-24 12:36:21 +02:00
Georgi Gerganov	ffdb5c4735	whisper : fix typo	2023-11-24 09:45:10 +02:00
ecneladis	a5881d619c	server : add --print-realtime param (#1541 ) * server : add --print-realtime param * Fix duplicate realtime output	2023-11-24 09:35:02 +02:00
bradmit	34f70b3a56	whisper : add whisper_lang_str_full (#1546 ) * Update whisper.h add whisper_lang_fullstr to retrieve the full language name * Update whisper.cpp add whisper_lang_fullstr to return the full language name * fullstr -> str_full --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2023-11-24 09:33:13 +02:00
Okabintaro	8328d1900f	fix(server): typo in temperature parameter (#1545 ) Also fixed another typo in comments.	2023-11-23 20:59:36 +02:00
sandrohanea	d2bd5f0bdc	metal : fix build (#1544 )	2023-11-23 20:20:53 +02:00
Georgi Gerganov	34209a37a2	readme : add server example	2023-11-23 17:20:33 +02:00
Gleicon Moraes	180e062eda	go : fixed Makefile for MacOS ARM 64 (#1530 ) * Fixed Makefile for MacOS ARM 64 based on https://github.com/ggerganov/whisper.cpp/issues/1344 + proper ggml-metal env var setting * conditional to fix broken non-macos compilation * spaces -> tab * make : fix whitespaces --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2023-11-22 18:08:11 +02:00
Felix	5c7be85fdc	Change temp file name for server application (#1535 ) Avoid issue of removing file if it exists in the current working directory	2023-11-22 09:23:36 +01:00
Georgi Gerganov	146169ec38	bench : pass memcpy threads from cli	2023-11-21 22:27:22 +02:00
Georgi Gerganov	9befab5ab9	bench : multi-thread memcpy (#1534 )	2023-11-21 22:07:30 +02:00
Felix	9ac88f2b57	Close file after writing in server application (#1533 ) Fix of mistake leaving file open while reading it again as wav	2023-11-21 20:36:10 +01:00
Georgi Gerganov	46f5b6cb08	server : add video to readme	2023-11-21 17:30:43 +02:00