whisper : quantize encoder only

2025-06-25 01:19:10 +00:00 · 2023-11-16 16:19:02 +02:00
76 changed files with 2010 additions and 46368 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -25,7 +25,6 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
            apt update
            apt install -y build-essential libsdl2-dev
            make
@ -87,7 +86,6 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
            apt update
            apt install -y build-essential cmake libsdl2-dev
            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
@ -115,10 +113,8 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
            apt update
-            apt install -y clang
-            apt install -y clang build-essential cmake libsdl2-dev
+            apt install -y build-essential cmake libsdl2-dev
            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
            make
            ctest -L gh --output-on-failure'
@ -144,7 +140,6 @@ jobs:
          docker run --platform ${{ matrix.arch }} --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
            apt update
            apt install -y build-essential cmake
            cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
@ -222,10 +217,10 @@ jobs:
        sdl2: [ON]
        include:
          - arch: Win32
-            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
+            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x86.zip
            s2arc: x86
          - arch: x64
-            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
+            obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.24/OpenBLAS-0.3.24-x64.zip
            s2arc: x64
          - sdl2: ON
            s2ver: 2.26.0
@ -290,7 +285,6 @@ jobs:
        arch: [x64]
        cublas: [ON]
        sdl2: [ON]
-        cuda-toolkit: [12.2.0, 11.8.0]
        include:
          - arch: x64
            s2arc: x64
@ -306,9 +300,7 @@ jobs:

      - name: Install CUDA Toolkit
        id: cuda-toolkit
-        uses: Jimver/cuda-toolkit@v0.2.11
-        with:
-          cuda: '${{ matrix.cuda-toolkit }}'
+        uses: Jimver/cuda-toolkit@v0.2.10

      - name: Fetch SDL2 and set SDL2_DIR
        if: matrix.sdl2 == 'ON'
@ -323,17 +315,10 @@ jobs:
          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
          -DWHISPER_CUBLAS=1

-      - name: Build ${{ matrix.cuda-toolkit }}
+      - name: Build
        run: |
          cd ./build
-          cmake --build . --config ${{ matrix.build }}
-
-      - name: Copy CUDA DLLs
-        run: >
-          Copy-Item -PassThru
-          -Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
-          -Include cudart64_*,cublas64_*,cublasLt64_*
-          -Destination build/bin/${{ matrix.build }}
+          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}

      - name: Copy SDL2.dll
        if: matrix.sdl2 == 'ON'
@ -343,7 +328,7 @@ jobs:
        if: matrix.sdl2 == 'ON'
        uses: actions/upload-artifact@v1
        with:
-          name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
+          name: whisper-cublas-bin-${{ matrix.arch }}
          path: build/bin/${{ matrix.build }}

  emscripten:
--- a/.gitignore
+++ b/.gitignore
@ -31,7 +31,6 @@ build-sanitize-thread/
 /talk-llama
 /bench
 /quantize
-/server
 /lsp

 arm_neon.h
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.5)

-project(whisper.cpp VERSION 1.5.2)
+project(whisper.cpp VERSION 1.5.0)

 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
@ -533,7 +533,7 @@ target_compile_definitions(${TARGET} PUBLIC
    ${WHISPER_EXTRA_FLAGS}
    )

-set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "ggml.h;whisper.h")
+set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h")

 include(GNUInstallDirs)

--- a/7
+++ b/7
@ -1,4 +1,4 @@
-default: main bench quantize server
+default: main bench quantize

 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
@ -338,7 +338,7 @@ libwhisper.so: $(WHISPER_OBJ)
 	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)

 clean:
-	rm -f *.o main stream command talk talk-llama bench quantize server lsp libwhisper.a libwhisper.so
+	rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so

 #
 # Examples
@ -359,9 +359,6 @@ bench: examples/bench/bench.cpp $(WHISPER_OBJ)
 quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
 	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)

-server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
-
 stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
 	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)

--- a/Package.swift
+++ b/Package.swift
@ -2,14 +2,33 @@

 import PackageDescription

+#if arch(arm) || arch(arm64)
+let platforms: [SupportedPlatform]? = [
+    .macOS(.v12),
+    .iOS(.v14),
+    .watchOS(.v4),
+    .tvOS(.v14)
+]
+let exclude: [String] = []
+let resources: [Resource] = [
+    .process("ggml-metal.metal")
+]
+let additionalSources: [String] = ["ggml-metal.m"]
+let additionalSettings: [CSetting] = [
+    .unsafeFlags(["-fno-objc-arc"]),
+    .define("GGML_USE_METAL")
+]
+#else
+let platforms: [SupportedPlatform]? = nil
+let exclude: [String] = ["ggml-metal.metal"]
+let resources: [Resource] = []
+let additionalSources: [String] = []
+let additionalSettings: [CSetting] = []
+#endif
+
 let package = Package(
    name: "whisper",
-    platforms: [
-        .macOS(.v12),
-        .iOS(.v14),
-        .watchOS(.v4),
-        .tvOS(.v14)
-    ],
+    platforms: platforms,
    products: [
        .library(name: "whisper", targets: ["whisper"]),
    ],
@ -17,7 +36,7 @@ let package = Package(
        .target(
            name: "whisper",
            path: ".",
-            exclude: [
+            exclude: exclude + [
               "bindings",
               "cmake",
               "coreml",
@ -36,22 +55,19 @@ let package = Package(
                "whisper.cpp",
                "ggml-alloc.c",
                "ggml-backend.c",
-                "ggml-quants.c",
-                "ggml-metal.m"
-            ],
-            resources: [.process("ggml-metal.metal")],
+                "ggml-quants.c"
+            ] + additionalSources,
+            resources: resources,
            publicHeadersPath: "spm-headers",
            cSettings: [
                .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
-                .define("GGML_USE_ACCELERATE"),
-                .unsafeFlags(["-fno-objc-arc"]),
-                .define("GGML_USE_METAL")
+                .define("GGML_USE_ACCELERATE")
                // NOTE: NEW_LAPACK will required iOS version 16.4+
                // We should consider add this in the future when we drop support for iOS 14
                // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
                // .define("ACCELERATE_NEW_LAPACK"),
                // .define("ACCELERATE_LAPACK_ILP64")
-            ],
+            ] + additionalSettings,
            linkerSettings: [
                .linkedFramework("Accelerate")
            ]
--- a/README.md
+++ b/README.md
@ -6,7 +6,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

-Stable: [v1.5.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.2) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.5.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

@ -110,8 +110,8 @@ options:
  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
  -ml N,     --max-len N         [0      ] maximum segment length in characters
  -sow,      --split-on-word     [false  ] split on word rather than on token
-  -bo N,     --best-of N         [5      ] number of best candidates to keep
-  -bs N,     --beam-size N       [5      ] beam size for beam search
+  -bo N,     --best-of N         [2      ] number of best candidates to keep
+  -bs N,     --beam-size N       [-1     ] beam size for beam search
  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
@ -128,7 +128,6 @@ options:
  -fp,       --font-path         [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
  -ocsv,     --output-csv        [false  ] output result in a CSV file
  -oj,       --output-json       [false  ] output result in a JSON file
-  -ojf,      --output-json-full  [false  ] include more information in the JSON file
  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
@ -140,8 +139,7 @@ options:
  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
  -f FNAME,  --file FNAME        [       ] input WAV file path
  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
-  -ls,       --log-score         [false  ] log best decoder scores of tokens
-  -ng,       --no-gpu            [false  ] disable GPU
+  -ls,       --log-score         [false  ] log best decoder scores of token


 bash ./models/download-ggml-model.sh base.en
@ -770,7 +768,6 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
 | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
 | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
-| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
 | [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
 | [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
 | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
@ -780,7 +777,6 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
 | [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
 | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
-| [server](examples/server) | | HTTP transcription server with OAI-like API |

 ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)

--- a/bindings/go/Makefile
+++ b/bindings/go/Makefile
@ -1,26 +1,9 @@
-ifndef UNAME_S
-UNAME_S := $(shell uname -s)
-endif
-
-ifndef UNAME_P
-UNAME_P := $(shell uname -p)
-endif
-
-ifndef UNAME_M
-UNAME_M := $(shell uname -m)
-endif
-
-GGML_METAL_PATH_RESOURCES := $(abspath ../..)
 BUILD_DIR := build
 MODELS_DIR := models
 EXAMPLES_DIR := $(wildcard examples/*)
 INCLUDE_PATH := $(abspath ../..)
 LIBRARY_PATH := $(abspath ../..)

-ifeq ($(UNAME_S),Darwin)
-	EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
-endif
-
 all: clean whisper examples

 whisper: mkdir
@ -28,13 +11,8 @@ whisper: mkdir
 	@${MAKE} -C ../.. libwhisper.a

 test: model-small whisper modtidy
-ifeq ($(UNAME_S),Darwin)
-	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
-	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
-else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
-endif

 examples: $(EXAMPLES_DIR)

@ -43,11 +21,7 @@ model-small: mkdir examples/go-model-download

 $(EXAMPLES_DIR): mkdir whisper modtidy
 	@echo Build example $(notdir $@)
-ifeq ($(UNAME_S),Darwin)
-	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
-else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
-endif

 mkdir:
 	@echo Mkdir ${BUILD_DIR}
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.5.2",
+  "version": "1.5.0",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -65,7 +65,6 @@ elseif(CMAKE_JS_VERSION)
 else()
    add_subdirectory(main)
    add_subdirectory(stream)
-    add_subdirectory(server)
    add_subdirectory(command)
    add_subdirectory(bench)
    add_subdirectory(quantize)
@ -73,5 +72,3 @@ else()
    add_subdirectory(talk-llama)
    add_subdirectory(lsp)
 endif()
-
-add_subdirectory(wchess)
--- a/examples/common-sdl.cpp
+++ b/examples/common-sdl.cpp
@ -139,13 +139,10 @@ void audio_async::callback(uint8_t * stream, int len) {
        return;
    }

-    size_t n_samples = len / sizeof(float);
+    const size_t n_samples = len / sizeof(float);

-    if (n_samples > m_audio.size()) {
-        n_samples = m_audio.size();
-
-        stream += (len - (n_samples * sizeof(float)));
-    }
+    m_audio_new.resize(n_samples);
+    memcpy(m_audio_new.data(), stream, n_samples * sizeof(float));

    //fprintf(stderr, "%s: %zu samples, pos %zu, len %zu\n", __func__, n_samples, m_audio_pos, m_audio_len);

@ -156,7 +153,7 @@ void audio_async::callback(uint8_t * stream, int len) {
            const size_t n0 = m_audio.size() - m_audio_pos;

            memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
-            memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
+            memcpy(&m_audio[0], &stream[n0], (n_samples - n0) * sizeof(float));

            m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
            m_audio_len = m_audio.size();
--- a/examples/common-sdl.h
+++ b/examples/common-sdl.h
@ -41,6 +41,7 @@ private:
    std::mutex       m_mutex;

    std::vector<float> m_audio;
+    std::vector<float> m_audio_new;
    size_t             m_audio_pos = 0;
    size_t             m_audio_len = 0;
 };
--- a/examples/helpers.js
+++ b/examples/helpers.js
@ -22,7 +22,6 @@ var printTextarea = (function() {
 async function clearCache() {
    if (confirm('Are you sure you want to clear the cache?\nAll the models will be downloaded again.')) {
        indexedDB.deleteDatabase(dbName);
-        location.reload();
    }
 }

--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -17,37 +17,28 @@ options:
  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
  -ml N,     --max-len N         [0      ] maximum segment length in characters
-  -sow,      --split-on-word     [false  ] split on word rather than on token
  -bo N,     --best-of N         [5      ] number of best candidates to keep
-  -bs N,     --beam-size N       [5      ] beam size for beam search
+  -bs N,     --beam-size N       [-1     ] beam size for beam search
  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
-  -debug,    --debug-mode        [false  ] enable debug mode (eg. dump log_mel)
+  -su,       --speed-up          [false  ] speed up audio by x2 (reduced accuracy)
  -tr,       --translate         [false  ] translate from source language to english
  -di,       --diarize           [false  ] stereo audio diarization
-  -tdrz,     --tinydiarize       [false  ] enable tinydiarize (requires a tdrz model)
  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
  -otxt,     --output-txt        [false  ] output result in a text file
  -ovtt,     --output-vtt        [false  ] output result in a vtt file
  -osrt,     --output-srt        [false  ] output result in a srt file
-  -olrc,     --output-lrc        [false  ] output result in a lrc file
  -owts,     --output-words      [false  ] output script for generating karaoke video
-  -fp,       --font-path         [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
  -ocsv,     --output-csv        [false  ] output result in a CSV file
  -oj,       --output-json       [false  ] output result in a JSON file
-  -ojf,      --output-json-full  [false  ] include more information in the JSON file
  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
  -ps,       --print-special     [false  ] print special tokens
  -pc,       --print-colors      [false  ] print colors
  -pp,       --print-progress    [false  ] print progress
-  -nt,       --no-timestamps     [false  ] do not print timestamps
+  -nt,       --no-timestamps     [true   ] do not print timestamps
  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
-  -dl,       --detect-language   [false  ] exit after automatically detecting language
             --prompt PROMPT     [       ] initial prompt
  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
  -f FNAME,  --file FNAME        [       ] input WAV file path
-  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
-  -ls,       --log-score         [false  ] log best decoder scores of tokens
-  -ng,       --no-gpu            [false  ] disable GPU
 ```
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -165,8 +165,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-m"    || arg == "--model")           { params.model           = argv[++i]; }
        else if (arg == "-f"    || arg == "--file")            { params.fname_inp.emplace_back(argv[++i]); }
        else if (arg == "-oved" || arg == "--ov-e-device")     { params.openvino_encode_device = argv[++i]; }
-        else if (arg == "-ls"   || arg == "--log-score")       { params.log_score       = true; }
-        else if (arg == "-ng"   || arg == "--no-gpu")          { params.use_gpu         = false; }
+        else if (arg == "-ls"   || arg == "--log-score")       { params.log_score = true; }
+        else if (arg == "-ng"   || arg == "--no-gpu")          { params.use_gpu = false; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@ -162,6 +162,7 @@ bool whisper_model_quantize(const std::string & fname_inp, const std::string & f
        "encoder.conv2.bias",
        "encoder.positional_embedding",
        "decoder.positional_embedding",
+        "decoder.*",
    };

    if (!ggml_common_quantize_0(finp, fout, ftype, { ".*" }, to_skip)) {
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@ -1,12 +0,0 @@
-set(TARGET server)
-add_executable(${TARGET} server.cpp httplib.h json.hpp)
-
-include(DefaultTargetOptions)
-
-target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
-
-# Check if the compiler is MinGW
-if(MINGW)
-    # Link the necessary libraries for SSL and Winsock
-    target_link_libraries(${TARGET} PRIVATE -lcrypt32 -lssl -lcrypto -lws2_32)
-endif()
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -1,68 +0,0 @@
-# whisper.cpp http server
-
-Simple http server. WAV Files are passed to the inference model via http requests.
-
-https://github.com/ggerganov/whisper.cpp/assets/1991296/e983ee53-8741-4eb5-9048-afe5e4594b8f
-
-## Usage
-
-```
-./server -h
-
-usage: ./bin/server [options]
-
-options:
-  -h,        --help              [default] show this help message and exit
-  -t N,      --threads N         [4      ] number of threads to use during computation
-  -p N,      --processors N      [1      ] number of processors to use during computation
-  -ot N,     --offset-t N        [0      ] time offset in milliseconds
-  -on N,     --offset-n N        [0      ] segment index offset
-  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
-  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
-  -ml N,     --max-len N         [0      ] maximum segment length in characters
-  -sow,      --split-on-word     [false  ] split on word rather than on token
-  -bo N,     --best-of N         [2      ] number of best candidates to keep
-  -bs N,     --beam-size N       [-1     ] beam size for beam search
-  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
-  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
-  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
-  -debug,    --debug-mode        [false  ] enable debug mode (eg. dump log_mel)
-  -tr,       --translate         [false  ] translate from source language to english
-  -di,       --diarize           [false  ] stereo audio diarization
-  -tdrz,     --tinydiarize       [false  ] enable tinydiarize (requires a tdrz model)
-  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
-  -ps,       --print-special     [false  ] print special tokens
-  -pc,       --print-colors      [false  ] print colors
-  -pr,       --print-realtime    [false  ] print output in realtime
-  -pp,       --print-progress    [false  ] print progress
-  -nt,       --no-timestamps     [false  ] do not print timestamps
-  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
-  -dl,       --detect-language   [false  ] exit after automatically detecting language
-             --prompt PROMPT     [       ] initial prompt
-  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
-  -oved D,   --ov-e-device DNAME [CPU    ] the OpenVINO device used for encode inference
-  --host HOST,                   [127.0.0.1] Hostname/ip-adress for the server
-  --port PORT,                   [8080   ] Port number for the server
-  --convert,                     [false  ] Convert audio to WAV, requires ffmpeg on the server
-```
-
-> [!WARNING]  
-> **Do not run the server example with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads and using ffmpeg for format conversions. Always validate and sanitize inputs to guard against potential security threats.**
-
-## request examples
-
-**/inference**
-```
-curl 127.0.0.1:8080/inference \
-H "Content-Type: multipart/form-data" \
-F file="@<file-path>" \
-F temperature="0.2" \
-F response-format="json"
-```
-
-**/load**
-```
-curl 127.0.0.1:8080/load \
-H "Content-Type: multipart/form-data" \
-F model="<path-to-model-file>"
-```
--- a/examples/server/httplib.h
+++ b/examples/server/httplib.h
--- a/examples/server/json.hpp
+++ b/examples/server/json.hpp
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1,811 +0,0 @@
-#include "common.h"
-
-#include "whisper.h"
-#include "httplib.h"
-#include "json.hpp"
-
-#include <cmath>
-#include <fstream>
-#include <cstdio>
-#include <string>
-#include <thread>
-#include <vector>
-#include <cstring>
-#include <sstream>
-
-#if defined(_MSC_VER)
-#pragma warning(disable: 4244 4267) // possible loss of data
-#endif
-
-using namespace httplib;
-using json = nlohmann::json;
-
-namespace {
-
-// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
-// Lowest is red, middle is yellow, highest is green.
-const std::vector<std::string> k_colors = {
-    "\033[38;5;196m", "\033[38;5;202m", "\033[38;5;208m", "\033[38;5;214m", "\033[38;5;220m",
-    "\033[38;5;226m", "\033[38;5;190m", "\033[38;5;154m", "\033[38;5;118m", "\033[38;5;82m",
-};
-
-// output formats
-const std::string json_format   = "json";
-const std::string text_format   = "text";
-const std::string srt_format    = "srt";
-const std::string vjson_format  = "verbose_json";
-const std::string vtt_format    = "vtt";
-
-struct server_params
-{
-    std::string hostname = "127.0.0.1";
-    std::string public_path = "examples/server/public";
-
-    int32_t port          = 8080;
-    int32_t read_timeout  = 600;
-    int32_t write_timeout = 600;
-    
-    bool ffmpeg_converter = false;
-};
-
-struct whisper_params {
-    int32_t n_threads    = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t n_processors =  1;
-    int32_t offset_t_ms  =  0;
-    int32_t offset_n     =  0;
-    int32_t duration_ms  =  0;
-    int32_t progress_step =  5;
-    int32_t max_context  = -1;
-    int32_t max_len      =  0;
-    int32_t best_of      =  2;
-    int32_t beam_size    = -1;
-
-    float word_thold    =  0.01f;
-    float entropy_thold =  2.40f;
-    float logprob_thold = -1.00f;
-    float userdef_temp  =  0.20f;
-
-    bool speed_up        = false;
-    bool debug_mode      = false;
-    bool translate       = false;
-    bool detect_language = false;
-    bool diarize         = false;
-    bool tinydiarize     = false;
-    bool split_on_word   = false;
-    bool no_fallback     = false;
-    bool print_special   = false;
-    bool print_colors    = false;
-    bool print_realtime  = false;
-    bool print_progress  = false;
-    bool no_timestamps   = false;
-    bool use_gpu         = true;
-
-    std::string language        = "en";
-    std::string prompt          = "";
-    std::string font_path       = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
-    std::string model           = "models/ggml-base.en.bin";
-
-    std::string response_format     = json_format;
-
-    // [TDRZ] speaker turn string
-    std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
-
-    std::string openvino_encode_device = "CPU";
-};
-
-//  500 -> 00:05.000
-// 6000 -> 01:00.000
-std::string to_timestamp(int64_t t, bool comma = false) {
-    int64_t msec = t * 10;
-    int64_t hr = msec / (1000 * 60 * 60);
-    msec = msec - hr * (1000 * 60 * 60);
-    int64_t min = msec / (1000 * 60);
-    msec = msec - min * (1000 * 60);
-    int64_t sec = msec / 1000;
-    msec = msec - sec * 1000;
-
-    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
-
-    return std::string(buf);
-}
-
-int timestamp_to_sample(int64_t t, int n_samples) {
-    return std::max(0, std::min((int) n_samples - 1, (int) ((t*WHISPER_SAMPLE_RATE)/100)));
-}
-
-bool is_file_exist(const char *fileName)
-{
-    std::ifstream infile(fileName);
-    return infile.good();
-}
-
-void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params,
-                         const server_params& sparams) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options] \n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,        --help              [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,      --threads N         [%-7d] number of threads to use during computation\n",    params.n_threads);
-    fprintf(stderr, "  -p N,      --processors N      [%-7d] number of processors to use during computation\n", params.n_processors);
-    fprintf(stderr, "  -ot N,     --offset-t N        [%-7d] time offset in milliseconds\n",                    params.offset_t_ms);
-    fprintf(stderr, "  -on N,     --offset-n N        [%-7d] segment index offset\n",                           params.offset_n);
-    fprintf(stderr, "  -d  N,     --duration N        [%-7d] duration of audio to process in milliseconds\n",   params.duration_ms);
-    fprintf(stderr, "  -mc N,     --max-context N     [%-7d] maximum number of text context tokens to store\n", params.max_context);
-    fprintf(stderr, "  -ml N,     --max-len N         [%-7d] maximum segment length in characters\n",           params.max_len);
-    fprintf(stderr, "  -sow,      --split-on-word     [%-7s] split on word rather than on token\n",             params.split_on_word ? "true" : "false");
-    fprintf(stderr, "  -bo N,     --best-of N         [%-7d] number of best candidates to keep\n",              params.best_of);
-    fprintf(stderr, "  -bs N,     --beam-size N       [%-7d] beam size for beam search\n",                      params.beam_size);
-    fprintf(stderr, "  -wt N,     --word-thold N      [%-7.2f] word timestamp probability threshold\n",         params.word_thold);
-    fprintf(stderr, "  -et N,     --entropy-thold N   [%-7.2f] entropy threshold for decoder fail\n",           params.entropy_thold);
-    fprintf(stderr, "  -lpt N,    --logprob-thold N   [%-7.2f] log probability threshold for decoder fail\n",   params.logprob_thold);
-    // fprintf(stderr, "  -su,       --speed-up          [%-7s] speed up audio by x2 (reduced accuracy)\n",        params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -debug,    --debug-mode        [%-7s] enable debug mode (eg. dump log_mel)\n",           params.debug_mode ? "true" : "false");
-    fprintf(stderr, "  -tr,       --translate         [%-7s] translate from source language to english\n",      params.translate ? "true" : "false");
-    fprintf(stderr, "  -di,       --diarize           [%-7s] stereo audio diarization\n",                       params.diarize ? "true" : "false");
-    fprintf(stderr, "  -tdrz,     --tinydiarize       [%-7s] enable tinydiarize (requires a tdrz model)\n",     params.tinydiarize ? "true" : "false");
-    fprintf(stderr, "  -nf,       --no-fallback       [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
-    fprintf(stderr, "  -ps,       --print-special     [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pc,       --print-colors      [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
-    fprintf(stderr, "  -pr,       --print-realtime    [%-7s] print output in realtime\n",                       params.print_realtime ? "true" : "false");
-    fprintf(stderr, "  -pp,       --print-progress    [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
-    fprintf(stderr, "  -nt,       --no-timestamps     [%-7s] do not print timestamps\n",                        params.no_timestamps ? "true" : "false");
-    fprintf(stderr, "  -l LANG,   --language LANG     [%-7s] spoken language ('auto' for auto-detect)\n",       params.language.c_str());
-    fprintf(stderr, "  -dl,       --detect-language   [%-7s] exit after automatically detecting language\n",    params.detect_language ? "true" : "false");
-    fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
-    fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
-    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
-    // server params
-    fprintf(stderr, "  --host HOST,                   [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
-    fprintf(stderr, "  --port PORT,                   [%-7d] Port number for the server\n", sparams.port);
-    fprintf(stderr, "  --public PATH,                 [%-7s] Path to the public folder\n", sparams.public_path.c_str());
-    fprintf(stderr, "  --convert,                     [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
-    fprintf(stderr, "\n");
-}
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params, server_params & sparams) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params, sparams);
-            exit(0);
-        }
-        else if (arg == "-t"    || arg == "--threads")         { params.n_threads       = std::stoi(argv[++i]); }
-        else if (arg == "-p"    || arg == "--processors")      { params.n_processors    = std::stoi(argv[++i]); }
-        else if (arg == "-ot"   || arg == "--offset-t")        { params.offset_t_ms     = std::stoi(argv[++i]); }
-        else if (arg == "-on"   || arg == "--offset-n")        { params.offset_n        = std::stoi(argv[++i]); }
-        else if (arg == "-d"    || arg == "--duration")        { params.duration_ms     = std::stoi(argv[++i]); }
-        else if (arg == "-mc"   || arg == "--max-context")     { params.max_context     = std::stoi(argv[++i]); }
-        else if (arg == "-ml"   || arg == "--max-len")         { params.max_len         = std::stoi(argv[++i]); }
-        else if (arg == "-bo"   || arg == "--best-of")         { params.best_of         = std::stoi(argv[++i]); }
-        else if (arg == "-bs"   || arg == "--beam-size")       { params.beam_size       = std::stoi(argv[++i]); }
-        else if (arg == "-wt"   || arg == "--word-thold")      { params.word_thold      = std::stof(argv[++i]); }
-        else if (arg == "-et"   || arg == "--entropy-thold")   { params.entropy_thold   = std::stof(argv[++i]); }
-        else if (arg == "-lpt"  || arg == "--logprob-thold")   { params.logprob_thold   = std::stof(argv[++i]); }
-        // else if (arg == "-su"   || arg == "--speed-up")        { params.speed_up        = true; }
-        else if (arg == "-debug"|| arg == "--debug-mode")      { params.debug_mode      = true; }
-        else if (arg == "-tr"   || arg == "--translate")       { params.translate       = true; }
-        else if (arg == "-di"   || arg == "--diarize")         { params.diarize         = true; }
-        else if (arg == "-tdrz" || arg == "--tinydiarize")     { params.tinydiarize     = true; }
-        else if (arg == "-sow"  || arg == "--split-on-word")   { params.split_on_word   = true; }
-        else if (arg == "-nf"   || arg == "--no-fallback")     { params.no_fallback     = true; }
-        else if (arg == "-fp"   || arg == "--font-path")       { params.font_path       = argv[++i]; }
-        else if (arg == "-ps"   || arg == "--print-special")   { params.print_special   = true; }
-        else if (arg == "-pc"   || arg == "--print-colors")    { params.print_colors    = true; }
-        else if (arg == "-pr"   || arg == "--print-realtime")  { params.print_realtime  = true; }
-        else if (arg == "-pp"   || arg == "--print-progress")  { params.print_progress  = true; }
-        else if (arg == "-nt"   || arg == "--no-timestamps")   { params.no_timestamps   = true; }
-        else if (arg == "-l"    || arg == "--language")        { params.language        = argv[++i]; }
-        else if (arg == "-dl"   || arg == "--detect-language") { params.detect_language = true; }
-        else if (                  arg == "--prompt")          { params.prompt          = argv[++i]; }
-        else if (arg == "-m"    || arg == "--model")           { params.model           = argv[++i]; }
-        else if (arg == "-oved" || arg == "--ov-e-device")     { params.openvino_encode_device = argv[++i]; }
-        else if (arg == "-ng"   || arg == "--no-gpu")          { params.use_gpu         = false; }
-        // server params
-        else if (                  arg == "--port")            { sparams.port        = std::stoi(argv[++i]); }
-        else if (                  arg == "--host")            { sparams.hostname    = argv[++i]; }
-        else if (                  arg == "--public")          { sparams.public_path = argv[++i]; }
-        else if (                  arg == "--convert")         { sparams.ffmpeg_converter     = true; }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params, sparams);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-struct whisper_print_user_data {
-    const whisper_params * params;
-
-    const std::vector<std::vector<float>> * pcmf32s;
-    int progress_prev;
-};
-
-void check_ffmpeg_availibility() {
-    int result = system("ffmpeg -version");
-
-    if (result == 0) {
-        std::cout << "ffmpeg is available." << std::endl;
-    } else {
-        // ffmpeg is not available
-        std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
-        std::cout << "and that its executable is included in your system's PATH. ";
-        exit(0);
-    }
-}
-
-bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
-    std::ostringstream cmd_stream;
-    std::string converted_filename_temp = temp_filename + "_temp.wav";
-    cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
-    std::string cmd = cmd_stream.str();
-
-    int status = std::system(cmd.c_str());
-    if (status != 0) {
-        error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
-        return false;
-    }
-
-    // Remove the original file
-    if (remove(temp_filename.c_str()) != 0) {
-        error_resp = "{\"error\":\"Failed to remove the original file.\"}";
-        return false;
-    }
-
-    // Rename the temporary file to match the original filename
-    if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
-        error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
-        return false;
-    }
-    return true;
-}
-
-std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
-    std::string speaker = "";
-    const int64_t n_samples = pcmf32s[0].size();
-
-    const int64_t is0 = timestamp_to_sample(t0, n_samples);
-    const int64_t is1 = timestamp_to_sample(t1, n_samples);
-
-    double energy0 = 0.0f;
-    double energy1 = 0.0f;
-
-    for (int64_t j = is0; j < is1; j++) {
-        energy0 += fabs(pcmf32s[0][j]);
-        energy1 += fabs(pcmf32s[1][j]);
-    }
-
-    if (energy0 > 1.1*energy1) {
-        speaker = "0";
-    } else if (energy1 > 1.1*energy0) {
-        speaker = "1";
-    } else {
-        speaker = "?";
-    }
-
-    //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, speaker = %s\n", is0, is1, energy0, energy1, speaker.c_str());
-
-    if (!id_only) {
-        speaker.insert(0, "(speaker ");
-        speaker.append(")");
-    }
-
-    return speaker;
-}
-
-void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
-    int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
-    int * progress_prev  = &(((whisper_print_user_data *) user_data)->progress_prev);
-    if (progress >= *progress_prev + progress_step) {
-        *progress_prev += progress_step;
-        fprintf(stderr, "%s: progress = %3d%%\n", __func__, progress);
-    }
-}
-
-void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
-    const auto & params  = *((whisper_print_user_data *) user_data)->params;
-    const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
-
-    const int n_segments = whisper_full_n_segments(ctx);
-
-    std::string speaker = "";
-
-    int64_t t0 = 0;
-    int64_t t1 = 0;
-
-    // print the last n_new segments
-    const int s0 = n_segments - n_new;
-
-    if (s0 == 0) {
-        printf("\n");
-    }
-
-    for (int i = s0; i < n_segments; i++) {
-        if (!params.no_timestamps || params.diarize) {
-            t0 = whisper_full_get_segment_t0(ctx, i);
-            t1 = whisper_full_get_segment_t1(ctx, i);
-        }
-
-        if (!params.no_timestamps) {
-            printf("[%s --> %s]  ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
-        }
-
-        if (params.diarize && pcmf32s.size() == 2) {
-            speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
-        }
-
-        if (params.print_colors) {
-            for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
-                if (params.print_special == false) {
-                    const whisper_token id = whisper_full_get_token_id(ctx, i, j);
-                    if (id >= whisper_token_eot(ctx)) {
-                        continue;
-                    }
-                }
-
-                const char * text = whisper_full_get_token_text(ctx, i, j);
-                const float  p    = whisper_full_get_token_p   (ctx, i, j);
-
-                const int col = std::max(0, std::min((int) k_colors.size() - 1, (int) (std::pow(p, 3)*float(k_colors.size()))));
-
-                printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
-            }
-        } else {
-            const char * text = whisper_full_get_segment_text(ctx, i);
-
-            printf("%s%s", speaker.c_str(), text);
-        }
-
-        if (params.tinydiarize) {
-            if (whisper_full_get_segment_speaker_turn_next(ctx, i)) {
-                printf("%s", params.tdrz_speaker_turn.c_str());
-            }
-        }
-
-        // with timestamps or speakers: each segment on new line
-        if (!params.no_timestamps || params.diarize) {
-            printf("\n");
-        }
-        fflush(stdout);
-    }
-}
-
-std::string output_str(struct whisper_context * ctx, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
-    std::stringstream result;
-    const int n_segments = whisper_full_n_segments(ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(ctx, i);
-        std::string speaker = "";
-
-        if (params.diarize && pcmf32s.size() == 2)
-        {
-            const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-            const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-            speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
-        }
-
-        result << speaker << text << "\n";
-    }
-    return result.str();
-}
-
-void get_req_parameters(const Request & req, whisper_params & params)
-{
-    // user model configu.has_fileion
-    if (req.has_file("offset-t"))
-    {
-        params.offset_t_ms = std::stoi(req.get_file_value("offset-t").content);
-    }
-    if (req.has_file("offset-n"))
-    {
-        params.offset_n = std::stoi(req.get_file_value("offset-n").content);
-    }
-    if (req.has_file("duration"))
-    {
-        params.duration_ms = std::stoi(req.get_file_value("duration").content);
-    }
-    if (req.has_file("max-context"))
-    {
-        params.max_context = std::stoi(req.get_file_value("max-context").content);
-    }
-    if (req.has_file("prompt"))
-    {
-        params.prompt = req.get_file_value("prompt").content;
-    }
-    if (req.has_file("response-format"))
-    {
-        params.response_format = req.get_file_value("response-format").content;
-    }
-    if (req.has_file("temperature"))
-    {
-        params.userdef_temp = std::stof(req.get_file_value("temperature").content);
-    }
-}
-
-}  // namespace
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-    server_params sparams;
-
-    std::mutex whisper_mutex;
-
-    if (whisper_params_parse(argc, argv, params, sparams) == false) {
-        whisper_print_usage(argc, argv, params, sparams);
-        return 1;
-    }
-
-    if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
-        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-        whisper_print_usage(argc, argv, params, sparams);
-        exit(0);
-    }
-
-    if (params.diarize && params.tinydiarize) {
-        fprintf(stderr, "error: cannot use both --diarize and --tinydiarize\n");
-        whisper_print_usage(argc, argv, params, sparams);
-        exit(0);
-    }
-
-    if (sparams.ffmpeg_converter) {
-        check_ffmpeg_availibility();
-    }
-    // whisper init
-    struct whisper_context_params cparams;
-    cparams.use_gpu = params.use_gpu;
-
-    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
-
-    if (ctx == nullptr) {
-        fprintf(stderr, "error: failed to initialize whisper context\n");
-        return 3;
-    }
-
-    // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
-    whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
-
-    Server svr;
-    svr.set_default_headers({{"Server", "whisper.cpp"},
-                             {"Access-Control-Allow-Origin", "*"},
-                             {"Access-Control-Allow-Headers", "content-type"}});
-
-    std::string const default_content = "<html>hello</html>";
-
-    // this is only called if no index.html is found in the public --path
-    svr.Get("/", [&default_content](const Request &, Response &res){
-        res.set_content(default_content, "text/html");
-        return false;
-    });
-
-    svr.Post("/inference", [&](const Request &req, Response &res){
-        // acquire whisper model mutex lock
-        whisper_mutex.lock();
-
-        // first check user requested fields of the request
-        if (!req.has_file("file"))
-        {
-            fprintf(stderr, "error: no 'file' field in the request\n");
-            const std::string error_resp = "{\"error\":\"no 'file' field in the request\"}";
-            res.set_content(error_resp, "application/json");
-            whisper_mutex.unlock();
-            return;
-        }
-        auto audio_file = req.get_file_value("file");
-
-        // check non-required fields
-        get_req_parameters(req, params);
-
-        std::string filename{audio_file.filename};
-        printf("Received request: %s\n", filename.c_str());
-
-        // audio arrays
-        std::vector<float> pcmf32;               // mono-channel F32 PCM
-        std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
-
-        // write to temporary file
-        const std::string temp_filename = "whisper_server_temp_file.wav";
-        std::ofstream temp_file{temp_filename, std::ios::binary};
-        temp_file << audio_file.content;
-        temp_file.close();
-
-        // if file is not wav, convert to wav
-        
-        if (sparams.ffmpeg_converter) {
-            std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
-            const bool is_converted = convert_to_wav(temp_filename, error_resp);
-            if (!is_converted) {
-                res.set_content(error_resp, "application/json");
-                whisper_mutex.unlock();
-                return;
-            }
-        }
-
-        // read wav content into pcmf32
-        if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
-            fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
-            const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
-            res.set_content(error_resp, "application/json");
-            std::remove(temp_filename.c_str());
-            whisper_mutex.unlock();
-            return;
-        }
-        // remove temp file
-        std::remove(temp_filename.c_str());
-
-        printf("Successfully loaded %s\n", filename.c_str());
-
-        // print system information
-        {
-            fprintf(stderr, "\n");
-            fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
-                    params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
-        }
-
-        // print some info about the processing
-        {
-            fprintf(stderr, "\n");
-            if (!whisper_is_multilingual(ctx)) {
-                if (params.language != "en" || params.translate) {
-                    params.language = "en";
-                    params.translate = false;
-                    fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
-                }
-            }
-            if (params.detect_language) {
-                params.language = "auto";
-            }
-            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, %stimestamps = %d ...\n",
-                    __func__, filename.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
-                    params.n_threads, params.n_processors,
-                    params.language.c_str(),
-                    params.translate ? "translate" : "transcribe",
-                    params.tinydiarize ? "tdrz = 1, " : "",
-                    params.no_timestamps ? 0 : 1);
-
-            fprintf(stderr, "\n");
-        }
-
-        // run the inference
-        {
-            printf("Running whisper.cpp inference on %s\n", filename.c_str());
-            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-            wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
-
-            wparams.print_realtime   = false;
-            wparams.print_progress   = params.print_progress;
-            wparams.print_timestamps = !params.no_timestamps;
-            wparams.print_special    = params.print_special;
-            wparams.translate        = params.translate;
-            wparams.language         = params.language.c_str();
-            wparams.detect_language  = params.detect_language;
-            wparams.n_threads        = params.n_threads;
-            wparams.n_max_text_ctx   = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
-            wparams.offset_ms        = params.offset_t_ms;
-            wparams.duration_ms      = params.duration_ms;
-
-            wparams.thold_pt         = params.word_thold;
-            wparams.max_len          = params.max_len == 0 ? 60 : params.max_len;
-            wparams.split_on_word    = params.split_on_word;
-
-            wparams.speed_up         = params.speed_up;
-            wparams.debug_mode       = params.debug_mode;
-
-            wparams.tdrz_enable      = params.tinydiarize; // [TDRZ]
-
-            wparams.initial_prompt   = params.prompt.c_str();
-
-            wparams.greedy.best_of        = params.best_of;
-            wparams.beam_search.beam_size = params.beam_size;
-
-            wparams.temperature_inc  = params.userdef_temp;
-            wparams.entropy_thold    = params.entropy_thold;
-            wparams.logprob_thold    = params.logprob_thold;
-
-            whisper_print_user_data user_data = { &params, &pcmf32s, 0 };
-
-            // this callback is called on each new segment
-            if (params.print_realtime) {
-                wparams.new_segment_callback           = whisper_print_segment_callback;
-                wparams.new_segment_callback_user_data = &user_data;
-            }
-
-            if (wparams.print_progress) {
-                wparams.progress_callback           = whisper_print_progress_callback;
-                wparams.progress_callback_user_data = &user_data;
-            }
-
-            // examples for abort mechanism
-            // in examples below, we do not abort the processing, but we could if the flag is set to true
-
-            // the callback is called before every encoder run - if it returns false, the processing is aborted
-            {
-                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
-
-                wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
-                    bool is_aborted = *(bool*)user_data;
-                    return !is_aborted;
-                };
-                wparams.encoder_begin_callback_user_data = &is_aborted;
-            }
-
-            // the callback is called before every computation - if it returns true, the computation is aborted
-            {
-                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
-
-                wparams.abort_callback = [](void * user_data) {
-                    bool is_aborted = *(bool*)user_data;
-                    return is_aborted;
-                };
-                wparams.abort_callback_user_data = &is_aborted;
-            }
-
-            if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
-                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
-                const std::string error_resp = "{\"error\":\"failed to process audio\"}";
-                res.set_content(error_resp, "application/json");
-                whisper_mutex.unlock();
-                return;
-            }
-        }
-
-        // return results to user
-        if (params.response_format == text_format)
-        {
-            std::string results = output_str(ctx, params, pcmf32s);
-            res.set_content(results.c_str(), "text/html");
-        }
-        else if (params.response_format == srt_format)
-        {
-            std::stringstream ss;
-            const int n_segments = whisper_full_n_segments(ctx);
-            for (int i = 0; i < n_segments; ++i) {
-                const char * text = whisper_full_get_segment_text(ctx, i);
-                const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-                const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-                std::string speaker = "";
-
-                if (params.diarize && pcmf32s.size() == 2)
-                {
-                    speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
-                }
-
-                ss << i + 1 + params.offset_n << "\n";
-                ss << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
-                ss << speaker << text << "\n\n";
-            }
-            res.set_content(ss.str(), "application/x-subrip");
-        } else if (params.response_format == vtt_format) {
-            std::stringstream ss;
-
-            ss << "WEBVTT\n\n";
-
-            const int n_segments = whisper_full_n_segments(ctx);
-            for (int i = 0; i < n_segments; ++i) {
-                const char * text = whisper_full_get_segment_text(ctx, i);
-                const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-                const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-                std::string speaker = "";
-
-                if (params.diarize && pcmf32s.size() == 2)
-                {
-                    speaker = estimate_diarization_speaker(pcmf32s, t0, t1, true);
-                    speaker.insert(0, "<v Speaker");
-                    speaker.append(">");
-                }
-
-                ss << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
-                ss << speaker << text << "\n\n";
-            }
-            res.set_content(ss.str(), "text/vtt");
-        }
-        // TODO add more output formats
-        else
-        {
-            std::string results = output_str(ctx, params, pcmf32s);
-            json jres = json{
-                {"text", results}
-            };
-            res.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace),
-                            "application/json");
-        }
-
-        // return whisper model mutex lock
-        whisper_mutex.unlock();
-    });
-    svr.Post("/load", [&](const Request &req, Response &res){
-        whisper_mutex.lock();
-        if (!req.has_file("model"))
-        {
-            fprintf(stderr, "error: no 'model' field in the request\n");
-            const std::string error_resp = "{\"error\":\"no 'model' field in the request\"}";
-            res.set_content(error_resp, "application/json");
-            whisper_mutex.unlock();
-            return;
-        }
-        std::string model = req.get_file_value("model").content;
-        if (!is_file_exist(model.c_str()))
-        {
-            fprintf(stderr, "error: 'model': %s not found!\n", model.c_str());
-            const std::string error_resp = "{\"error\":\"model not found!\"}";
-            res.set_content(error_resp, "application/json");
-            whisper_mutex.unlock();
-            return;
-        }
-
-        // clean up
-        whisper_free(ctx);
-
-        // whisper init
-        ctx = whisper_init_from_file_with_params(model.c_str(), cparams);
-
-        // TODO perhaps load prior model here instead of exit
-        if (ctx == nullptr) {
-            fprintf(stderr, "error: model init  failed, no model loaded must exit\n");
-            exit(1);
-        }
-
-        // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
-        whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
-
-        const std::string success = "Load was successful!";
-        res.set_content(success, "application/text");
-
-        // check if the model is in the file system
-        whisper_mutex.unlock();
-    });
-
-    svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
-        const char fmt[] = "500 Internal Server Error\n%s";
-        char buf[BUFSIZ];
-        try {
-            std::rethrow_exception(std::move(ep));
-        } catch (std::exception &e) {
-            snprintf(buf, sizeof(buf), fmt, e.what());
-        } catch (...) {
-            snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
-        }
-        res.set_content(buf, "text/plain");
-        res.status = 500;
-    });
-
-    svr.set_error_handler([](const Request &, Response &res) {
-        if (res.status == 400) {
-            res.set_content("Invalid request", "text/plain");
-        } else if (res.status != 500) {
-            res.set_content("File Not Found", "text/plain");
-            res.status = 404;
-        }
-    });
-
-    // set timeouts and change hostname and port
-    svr.set_read_timeout(sparams.read_timeout);
-    svr.set_write_timeout(sparams.write_timeout);
-
-    if (!svr.bind_to_port(sparams.hostname, sparams.port))
-    {
-        fprintf(stderr, "\ncouldn't bind to server socket: hostname=%s port=%d\n\n",
-                sparams.hostname.c_str(), sparams.port);
-        return 1;
-    }
-
-    // Set the base directory for serving static files
-    svr.set_base_dir(sparams.public_path);
-
-    // to make it ctrl+clickable:
-    printf("\nwhisper server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port);
-
-    if (!svr.listen_after_bind())
-    {
-        return 1;
-    }
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    return 0;
-}
--- a/examples/talk-llama/CMakeLists.txt
+++ b/examples/talk-llama/CMakeLists.txt
@ -18,11 +18,6 @@ if (WHISPER_SDL2)
        ../../ggml-quants.c
        ../../whisper.cpp)

-    if(WIN32)
-    # It requires Windows 8.1 or later for PrefetchVirtualMemory
-    target_compile_definitions(${TARGET} PRIVATE -D_WIN32_WINNT=0x0602)
-    endif()
-
    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)
    target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})

--- a/examples/wchess/CMakeLists.txt
+++ b/examples/wchess/CMakeLists.txt
@ -1,9 +0,0 @@
-set(CMAKE_CXX_STANDARD 11)
-
-add_subdirectory(libwchess)
-
-if (EMSCRIPTEN)
-    add_subdirectory(wchess.wasm)
-else()
-    add_subdirectory(wchess.cmd)
-endif()
--- a/examples/wchess/README.md
+++ b/examples/wchess/README.md
@ -1,40 +0,0 @@
-# wchess
-
-Voice-controlled chess using Whisper
-
-Online demo: https://whisper.ggerganov.com/wchess/
-
-https://github.com/ggerganov/whisper.cpp/assets/1991296/c2b2f03c-9684-49f3-8106-357d2d4e67fa
-
-## Command-line tool
-
-```bash
-mkdir build && cd build
-cmake -DWHISPER_SDL2=1 ..
-make -j
-
-./bin/wchess -m ../models/ggml-base.en.bin
-
-Move: start
-
-a b c d e f g h
-r n b q k b n r 8
-p p p p p p p p 7
-. * . * . * . * 6
-* . * . * . * . 5
-. * . * . * . * 4
-* . * . * . * . 3
-P P P P P P P P 2
-R N B Q K B N R 1
-
-White's turn
-[(l)isten/(p)ause/(q)uit]: 
-```
-
-## TODO
-
- Improve web-browser audio capture - sometimes it does not record the voice properly
- Add support for more languages by making the generated grammar string multi-lingual
- Fix bugs in the chess moves logic
-
-PRs welcome!
--- a/examples/wchess/libwchess/CMakeLists.txt
+++ b/examples/wchess/libwchess/CMakeLists.txt
@ -1,19 +0,0 @@
-add_library(wchess-core STATIC
-    WChess.cpp
-    WChess.h
-    Chessboard.cpp
-    Chessboard.h
-)
-
-target_link_libraries(wchess-core
-    PUBLIC
-    whisper
-    common
-)
-
-target_include_directories(wchess-core
-    PUBLIC
-    "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
-)
-
-# add_executable(test-chessboard test-chessboard.cpp Chessboard.cpp)
--- a/examples/wchess/libwchess/Chessboard.cpp
+++ b/examples/wchess/libwchess/Chessboard.cpp
@ -1,803 +0,0 @@
-#include "Chessboard.h"
-
-#include <array>
-#include <vector>
-#include <algorithm>
-#include <cstring>
-#include <set>
-#include <list>
-#include <chrono>
-
-namespace {
-constexpr std::array<const char*, 64> positions = {
-    "a1", "b1", "c1", "d1", "e1", "f1", "g1", "h1",
-    "a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2",
-    "a3", "b3", "c3", "d3", "e3", "f3", "g3", "h3",
-    "a4", "b4", "c4", "d4", "e4", "f4", "g4", "h4",
-    "a5", "b5", "c5", "d5", "e5", "f5", "g5", "h5",
-    "a6", "b6", "c6", "d6", "e6", "f6", "g6", "h6",
-    "a7", "b7", "c7", "d7", "e7", "f7", "g7", "h7",
-    "a8", "b8", "c8", "d8", "e8", "f8", "g8", "h8",
-};
-constexpr char INVALID_POS = positions.size();
-constexpr int R = 0; // rank index
-constexpr int F = 1; // file index
-#define FILE (c[F] - '1')
-#define RANK (c[R] - 'a')
-constexpr char operator ""_P(const char * c, size_t size) {
-    return size < 2 || RANK < 0 || RANK > 7 ||
-        FILE < 0 || FILE > 7 ? INVALID_POS : FILE * 8 + RANK;
-}
-#undef FILE
-#undef RANK
-
-struct sview {
-    const char * ptr = nullptr;
-    size_t size = 0;
-
-    sview() = default;
-    sview(const char * p, size_t s) : ptr(p), size(s) {}
-    sview(const std::string& s) : ptr(s.data()), size(s.size()) {}
-
-    size_t find(char del, size_t pos) {
-        while (pos < size && ptr[pos] != del) ++pos;
-        return pos < size ? pos : std::string::npos;
-    }
-};
-
-std::vector<sview> split(sview str, char del) {
-    std::vector<sview> res;
-    size_t cur = 0;
-    size_t last = 0;
-    while (cur != std::string::npos) {
-        if (str.ptr[last] == ' ') {
-            ++last;
-            continue;
-        }
-        cur = str.find(del, last);
-        size_t len = cur == std::string::npos ? str.size - last : cur - last;
-        res.emplace_back(str.ptr + last, len);
-        last = cur + 1;
-    }
-    return res;
-}
-
-char strToPos(sview str) {
-    return operator ""_P(str.ptr, str.size);
-}
-
-constexpr std::array<const char*, 6> pieceNames =  {
-    "pawn", "knight", "bishop", "rook", "queen", "king",
-};
-
-static constexpr std::array<char, 6> blackShort =  {
-    'p', 'n', 'b', 'r', 'q', 'k',
-};
-static constexpr std::array<char, 6> whiteShort =  {
-    'P', 'N', 'B', 'R', 'Q', 'K',
-};
-
-char strToType(sview str) {
-    auto it = std::find_if(pieceNames.begin(), pieceNames.end(), [str] (const char* name) { return strncmp(name, str.ptr, str.size) == 0; });
-    return it != pieceNames.end() ? it - pieceNames.begin() : pieceNames.size();
-}
-
-// directions
-using Direction = std::array<char, 2>;
-
-constexpr Direction N   = {(char)  0, (char)  1};
-constexpr Direction NNE = {(char)  1, (char)  2};
-constexpr Direction NE  = {(char)  1, (char)  1};
-constexpr Direction ENE = {(char)  2, (char)  1};
-constexpr Direction E   = {(char)  1, (char)  0};
-constexpr Direction ESE = {(char)  2, (char) -1};
-constexpr Direction SE  = {(char)  1, (char) -1};
-constexpr Direction SSE = {(char)  1, (char) -2};
-constexpr Direction S   = {(char)  0, (char) -1};
-constexpr Direction SSW = {(char) -1, (char) -2};
-constexpr Direction SW  = {(char) -1, (char) -1};
-constexpr Direction WSW = {(char) -2, (char) -1};
-constexpr Direction W   = {(char) -1, (char)  0};
-constexpr Direction WNW = {(char) -2, (char)  1};
-constexpr Direction NW  = {(char) -1, (char)  1};
-constexpr Direction NNW = {(char) -1, (char)  2};
-
-char makeStep(char pos, const Direction& d) {
-    char next[2] = { char(positions[pos][R] + d[R]) , char(positions[pos][F] + d[F]) };
-    return strToPos(sview{next, sizeof(next)});
-}
-
-template<class Modifier>
-char traverse(char pos, const Direction& d, const Modifier& m, int count = 8) {
-    while (--count >= 0) {
-        pos = makeStep(pos, d);
-        if (pos == INVALID_POS || m(pos)) break;
-    }
-    return pos;
-}
-
-Direction normalize(const Direction& distance) {
-    //return {char((distance[R] > 0) - (distance[R] < 0)), char((distance[F] > 0) - (distance[F] < 0))};
-    const int drp = distance[R] > 0 ? 1 : 0;
-    const int drn = distance[R] < 0 ? 1 : 0;
-    const int dfp = distance[F] > 0 ? 1 : 0;
-    const int dfn = distance[F] < 0 ? 1 : 0;
-    return {char(drp - drn), char(dfp - dfn)};
-}
-
-struct Pin {
-    Direction d;
-    Piece* pinner;
-    Piece* pinned;
-};
-using Pins = std::list<Pin>;
-using Board = std::array<Piece*, 64>;
-
-std::vector<Direction> filter(const Direction& pin, std::initializer_list<Direction> directions) {
-    if (pin[R] == 0 && pin[F] == 0) return directions;
-    std::vector<Direction> result;
-    for (auto& d : directions) {
-        if ((d[R] == pin[R] || d[R] == -pin[R]) && (d[F] == pin[F] || d[F] == -pin[F])) result.push_back(d);
-    }
-    return result;
-}
-}
-
-class Piece {
-public:
-    enum Types : char {
-        Pawn,
-        Knight,
-        Bishop,
-        Rook,
-        Queen,
-        King,
-        //
-        NUM_PIECES
-    };
-
-    enum Colors : char {
-        White,
-        Black,
-    };
-
-    const char* name() const;
-    char initial() const;
-    Types type() const { return m_type; }
-    Colors color() const { return m_color; }
-    char pos() const { return m_pos; }
-    void setPos(char pos) {
-        m_pos = pos;
-        invalidate();
-    }
-    const char* coord() const;
-    const std::set<char>& allowed() const { return m_allowed; }
-    bool canReach(char pos) const;
-    virtual bool movePattern(char pos) const = 0;
-    void take();
-    virtual void reinit(const State& state) = 0;
-    void invalidate();
-protected:
-    Piece(Types type, Colors color, char pos, std::set<char> allowed)
-        : m_type(type), m_color(color), m_pos(pos), m_allowed(std::move(allowed)) {}
-    Piece(const Piece&) = delete;
-    ~Piece() = default;
-
-    const Types m_type;
-    const Colors m_color;
-    char m_pos;
-    std::set<char> m_allowed;
-    bool m_update = false;
-};
-
-struct Pawn : public Piece {
-    Pawn(Colors color, char pos, std::set<char> next) : Piece(Types::Pawn, color, pos, std::move(next)) {}
-
-    bool is_first_move() const {
-        return m_color ? coord()[F] == '7' : coord()[F] == '2';
-    }
-
-    virtual bool movePattern(char pos) const override {
-        if (m_pos == INVALID_POS) return false;
-        auto cur = coord();
-        auto next = positions[pos];
-        Direction distance = {char(next[R] - cur[R]), char(next[F] - cur[F])};
-        char forward = m_color ? -1 : 1;
-        return (forward == distance[F] && distance[R] * distance[R] <= 1)
-            || (is_first_move() && 2 * forward == distance[F] && distance[R] == 0);
-    }
-
-    virtual void reinit(const State& state) override;
-};
-
-struct Knight : public Piece {
-    Knight(Colors color, char pos, std::set<char> next) : Piece(Types::Knight, color, pos, std::move(next)) {}
-
-    virtual bool movePattern(char pos) const override {
-        if (m_pos == INVALID_POS) return false;
-        auto cur = coord();
-        auto next = positions[pos];
-        Direction diff = {char(next[R] - cur[R]), char(next[F] - cur[F])};
-        return diff[R]*diff[R] + diff[F]*diff[F] == 5;
-    }
-
-    virtual void reinit(const State& state) override;
-};
-
-struct Bishop : public Piece {
-    Bishop(Colors color, char pos) : Piece(Types::Bishop, color, pos, {}) {}
-
-    virtual bool movePattern(char pos) const override {
-        if (m_pos == INVALID_POS) return false;
-        auto cur = coord();
-        auto next = positions[pos];
-        return cur[R] - cur[F] == next[R] - next[F] || cur[R] + cur[F] == next[R] + next[F];
-    }
-
-    virtual void reinit(const State& state) override;
-};
-
-struct Rook : public Piece {
-    Rook(Colors color, char pos) : Piece(Types::Rook, color, pos, {}) {}
-
-    virtual bool movePattern(char pos) const override {
-        if (m_pos == INVALID_POS) return false;
-        auto cur = coord();
-        auto next = positions[pos];
-        return cur[R] == next[R] || cur[F] == next[F];
-    }
-
-    virtual void reinit(const State& state) override;
-};
-
-struct Queen : public Piece {
-    Queen(Colors color, char pos) : Piece(Types::Queen, color, pos, {}) {}
-
-    virtual bool movePattern(char pos) const override {
-        if (m_pos == INVALID_POS) return false;
-        auto cur = coord();
-        auto next = positions[pos];
-        return cur[R] == next[R] || cur[F] == next[F] || cur[R] - cur[F] == next[R] - next[F] || cur[R] + cur[F] == next[R] + next[F];
-    }
-
-    virtual void reinit(const State& state) override;
-};
-
-struct King : public Piece {
-    King(Colors color, char pos) : Piece(Types::King, color, pos, {}) {}
-
-    virtual bool movePattern(char pos) const override {
-        if (m_pos == INVALID_POS) return false;
-        auto cur = coord();
-        auto next = positions[pos];
-        Direction diff = {char(next[R] - cur[R]), char(next[F] - cur[F])};
-        return diff[R]*diff[R] + diff[F]*diff[F] <= 2;
-    }
-
-    virtual void reinit(const State& state) override;
-};
-
-struct PieceSet {
-    Piece* begin() { return &p1; }
-    Piece* end() { return &r2 + 1; }
-    const Piece* begin() const { return &p1; }
-    const Piece* end() const { return &r2 + 1; }
-    Piece& operator[](int i) { return *(begin() + i); }
-    const Piece& operator[](int i) const { return *(begin() + i); }
-
-    Pawn   p1;
-    Pawn   p2;
-    Pawn   p3;
-    Pawn   p4;
-    Pawn   p5;
-    Pawn   p6;
-    Pawn   p7;
-    Pawn   p8;
-    Rook   r1;
-    Knight n1;
-    Bishop b1;
-    Queen  q;
-    King   k;
-    Bishop b2;
-    Knight n2;
-    Rook   r2;
-};
-
-struct State {
-    State();
-    PieceSet blacks;
-    PieceSet whites;
-    Board board;
-    Pins blackPins;
-    Pins whitePins;
-};
-
-Direction findPin(const Piece& piece, const State& state) {
-    auto& pins = piece.color() ? state.blackPins : state.whitePins;
-    auto it = std::find_if(pins.begin(), pins.end(), [&] (const Pin& pin) { return pin.pinned == &piece; });
-    if (it != pins.end()) return it->d;
-    return {0, 0};
-}
-
-struct Find {
-    Find(const Board& board) : m_board(board) {}
-    bool operator() (char pos) const { return m_board[pos]; }
-    const Board& m_board;
-};
-
-struct Add {
-    Add(const Board& board, std::set<char>& moves, Piece::Colors color) : m_board(board), m_moves(moves), m_color(color) {}
-    bool operator() (char pos) const {
-        if (!m_board[pos] || m_board[pos]->color() != m_color) m_moves.insert(pos);
-        return m_board[pos];
-    }
-    const Board& m_board;
-    std::set<char>& m_moves;
-    Piece::Colors m_color;
-};
-
-void Pawn::reinit(const State& state) {
-    if (m_pos == INVALID_POS) return;
-    if (!m_update) return;
-    m_update = false;
-    m_allowed.clear();
-
-    auto pin = findPin(*this, state);
-
-    auto & left = m_color ? SW : NW;
-    auto & right = m_color ? SE : NE;
-
-    for (auto& direction : filter(pin, { left, right })) {
-        auto pos = makeStep(m_pos, direction);
-        if (pos != INVALID_POS && state.board[pos] && state.board[pos]->color() != m_color) m_allowed.insert(pos);
-    }
-
-    auto & forward = m_color ? S : N;
-    if (!filter(pin, {forward}).empty()) {
-        traverse(m_pos, forward, [&] (char pos) {
-                if (!state.board[pos]) m_allowed.insert(pos);
-                return state.board[pos] || !is_first_move();
-            }, 2);
-    }
-}
-
-void Knight::reinit(const State& state) {
-    if (m_pos == INVALID_POS) return;
-    if (!m_update) return;
-    m_update = false;
-    m_allowed.clear();
-    auto pin = findPin(*this, state);
-    if (pin[R] != 0 || pin[F] != 0) return;
-    for (auto& direction : { NNE, ENE, ESE, SSE, SSW, WSW, WNW, NNW }) {
-        auto pos = makeStep(m_pos, direction);
-        if (pos != INVALID_POS && (!state.board[pos] || state.board[pos]->color() != m_color)) m_allowed.insert(pos);
-    }
-}
-
-void Bishop::reinit(const State& state) {
-    if (m_pos == INVALID_POS) return;
-    if (!m_update) return;
-    m_update = false;
-    m_allowed.clear();
-    auto pin = findPin(*this, state);
-    for (auto& direction : filter(pin, { NE, SE, SW, NW })) {
-        traverse(m_pos, direction, Add(state.board, m_allowed, m_color));
-    }
-}
-
-void Rook::reinit(const State& state) {
-    if (m_pos == INVALID_POS) return;
-    if (!m_update) return;
-    m_update = false;
-    m_allowed.clear();
-    auto pin = findPin(*this, state);
-    for (auto& direction : filter(pin, { N, E, S, W })) {
-        traverse(m_pos, direction, Add(state.board, m_allowed, m_color));
-    }
-}
-
-void Queen::reinit(const State& state) {
-    if (m_pos == INVALID_POS) return;
-    if (!m_update) return;
-    m_update = false;
-    m_allowed.clear();
-    auto pin = findPin(*this, state);
-    for (auto& direction : filter(pin, { N, NE, E, SE, S, SW, W, NW })) {
-        traverse(m_pos, direction, Add(state.board, m_allowed, m_color));
-    }
-}
-
-void King::reinit(const State& state) {
-    if (m_pos == INVALID_POS) return;
-    if (!m_update) return;
-    m_update = false;
-    m_allowed.clear();
-    auto& enemyPieces = m_color ? state.whites : state.blacks;
-    auto& pawnAttackLeft = m_color ? SW : NW;
-    auto& pawnAttackRight = m_color ? SE : NE;
-    for (auto& direction : { N, NE, E, SE, S, SW, W, NW }) {
-        auto pos = makeStep(m_pos, direction);
-        bool accept = pos != INVALID_POS && !(state.board[pos] && state.board[pos]->color() == m_color);
-        if (accept) {
-            for (auto& p : enemyPieces) {
-                if (!p.movePattern(pos)) continue;
-                if (p.type() == Piece::Knight || p.type() == Piece::King) {
-                    accept = false;
-                    break;
-                }
-                else if (p.type() == Piece::Pawn) {
-                    auto from = positions[pos];
-                    auto to = p.coord();
-                    Direction d {char(to[R] - from[R]), char(to[F] - from[F])};
-                    if (d == pawnAttackLeft || d == pawnAttackRight) {
-                        accept = false;
-                        break;
-                    }
-                }
-                else {
-                    auto from = positions[pos];
-                    auto to = p.coord();
-                    Direction d = normalize({char(to[R] - from[R]), char(to[F] - from[F])});
-                    auto reached = traverse(pos, d, Find(state.board));
-                    if (p.pos() == reached) {
-                        accept = false;
-                        break;
-                    }
-                }
-            }
-        }
-        if (accept) m_allowed.insert(pos);
-    }
-}
-
-const char* Piece::name() const {
-    static_assert(pieceNames.size() == Piece::NUM_PIECES, "Mismatch between piece names and types");
-    return pieceNames[m_type];
-}
-
-char Piece::initial() const {
-    static_assert(blackShort.size() == Piece::NUM_PIECES, "Mismatch between piece names and types");
-    static_assert(whiteShort.size() == Piece::NUM_PIECES, "Mismatch between piece names and types");
-    return m_color ? blackShort[m_type] : whiteShort[m_type];
-}
-
-void Piece::invalidate() {
-    m_update = true;
-}
-
-
-const char* Piece::coord() const {
-    if (m_pos == INVALID_POS) return "";
-    return positions[m_pos];
-}
-
-bool Piece::canReach(char pos) const {
-    return movePattern(pos) && m_allowed.count(pos);
-}
-
-void Piece::take() {
-    m_pos = INVALID_POS;
-    m_allowed = {};
-}
-
-State::State()
-    : blacks {
-        {Piece::Black, "a7"_P, {"a5"_P, "a6"_P} },
-        {Piece::Black, "b7"_P, {"b5"_P, "b6"_P} },
-        {Piece::Black, "c7"_P, {"c5"_P, "c6"_P} },
-        {Piece::Black, "d7"_P, {"d5"_P, "d6"_P} },
-        {Piece::Black, "e7"_P, {"e5"_P, "e6"_P} },
-        {Piece::Black, "f7"_P, {"f5"_P, "f6"_P} },
-        {Piece::Black, "g7"_P, {"g5"_P, "g6"_P} },
-        {Piece::Black, "h7"_P, {"h5"_P, "h6"_P} },
-        {Piece::Black, "a8"_P},
-        {Piece::Black, "b8"_P, {"a6"_P, "c6"_P} },
-        {Piece::Black, "c8"_P},
-        {Piece::Black, "d8"_P},
-        {Piece::Black, "e8"_P},
-        {Piece::Black, "f8"_P},
-        {Piece::Black, "g8"_P, {"f6"_P, "h6"_P} },
-        {Piece::Black, "h8"_P},
-    }
-    , whites {
-        {Piece::White, "a2"_P, {"a3"_P, "a4"_P} },
-        {Piece::White, "b2"_P, {"b3"_P, "b4"_P} },
-        {Piece::White, "c2"_P, {"c3"_P, "c4"_P} },
-        {Piece::White, "d2"_P, {"d3"_P, "d4"_P} },
-        {Piece::White, "e2"_P, {"e3"_P, "e4"_P} },
-        {Piece::White, "f2"_P, {"f3"_P, "f4"_P} },
-        {Piece::White, "g2"_P, {"g3"_P, "g4"_P} },
-        {Piece::White, "h2"_P, {"h3"_P, "h4"_P} },
-        {Piece::White, "a1"_P},
-        {Piece::White, "b1"_P, {"a3"_P, "c3"_P} },
-        {Piece::White, "c1"_P},
-        {Piece::White, "d1"_P},
-        {Piece::White, "e1"_P},
-        {Piece::White, "f1"_P},
-        {Piece::White, "g1"_P, {"f3"_P, "h3"_P} },
-        {Piece::White, "h1"_P},
-    }
-    , board {{
-        &whites[ 8],  &whites[ 9],  &whites[10],  &whites[11],  &whites[12],  &whites[13],  &whites[14],  &whites[15],
-        &whites[ 0],  &whites[ 1],  &whites[ 2],  &whites[ 3],  &whites[ 4],  &whites[ 5],  &whites[ 6],  &whites[ 7],
-        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
-        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
-        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
-        nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,      nullptr,
-        &blacks[ 0],  &blacks[ 1],  &blacks[ 2],  &blacks[ 3],  &blacks[ 4],  &blacks[ 5],  &blacks[ 6],  &blacks[ 7],
-        &blacks[ 8],  &blacks[ 9],  &blacks[10],  &blacks[11],  &blacks[12],  &blacks[13],  &blacks[14],  &blacks[15],
-    }}
-{}
-
-Chessboard::Chessboard()
-    : m_state(new State())
-{
-    setGrammar();
-}
-
-Chessboard::~Chessboard() = default;
-
-void Chessboard::setPrompt(const std::string& prompt) {
-    m_prompt = prompt;
-    setGrammar();
-}
-
-void Chessboard::setGrammar() {
-    m_grammar.clear();
-
-    std::string result;
-    if (m_prompt.empty()) {
-        result += "move ::= \" \" ((piece | frompos) \" \" \"to \"?)? topos\n";
-        //result += "move ::= \" \" frompos \" \" \"to \"? topos\n";
-    }
-    else {
-        // result += "move ::= prompt \" \" ((piece | frompos) \" \" \"to \"?)? topos\n"
-        result += "move ::= prompt \" \" frompos \" \" \"to \"? topos\n"
-        "prompt ::= \" " + m_prompt + "\"\n";
-    }
-
-    std::set<Piece::Types> pieceTypes;
-    std::set<char> from_pos;
-    std::set<char> to_pos;
-    auto& pieces =  m_moveCounter % 2 ? m_state->blacks : m_state->whites;
-    std::set<size_t> flags;
-    for (auto& p : pieces) {
-        if (p.allowed().empty()) continue;
-        bool addPiece = false;
-        if (!m_inCheck || p.type() == Piece::King) {
-            to_pos.insert(p.allowed().begin(), p.allowed().end());
-            addPiece = !p.allowed().empty();
-        }
-        else {
-            for (auto move : p.allowed()) {
-                if (m_allowedInCheck.count(move)) {
-                    to_pos.insert(move);
-                    addPiece = true;
-                }
-            }
-        }
-        if (addPiece) {
-            pieceTypes.insert(p.type());
-            from_pos.insert(p.pos());
-        }
-    }
-    if (pieceTypes.empty()) return;
-
-    result += "piece ::= (";
-    for (auto& p : pieceTypes) result += " \"" + std::string(pieceNames[p]) + "\" |";
-    result.pop_back();
-    result += ")\n\n";
-
-    result += "frompos ::= (";
-    for (auto& p : from_pos) result += " \"" + std::string(positions[p]) + "\" |";
-    result.pop_back();
-    result += ")\n";
-
-    result += "topos ::= (";
-    for (auto& p : to_pos) result += " \"" + std::string(positions[p]) + "\" |";
-    result.pop_back();
-    result += ")\n";
-
-    m_grammar = std::move(result);
-}
-
-std::string Chessboard::stringifyBoard() {
-    std::string result;
-    result.reserve(16 + 2 * 64 + 16);
-    for (char rank = 'a'; rank <= 'h'; ++rank) {
-        result.push_back(rank);
-        result.push_back(' ');
-    }
-    result.back() = '\n';
-    for (int i = 7; i >= 0; --i) {
-        for (int j = 0; j < 8; ++j) {
-            auto p = m_state->board[i * 8 + j];
-            if (p) result.push_back(p->initial());
-            else result.push_back((i + j) % 2 ? '.' : '*');
-            result.push_back(' ');
-        }
-        result.push_back('0' + i + 1);
-        result.push_back('\n');
-    }
-    return result;
-}
-
-std::string Chessboard::process(const std::string& command) {
-    const auto t_start = std::chrono::high_resolution_clock::now();
-    auto color = Piece::Colors(m_moveCounter % 2);
-    Piece* piece = nullptr;
-    auto pos_to = INVALID_POS;
-    if (!parseCommand(command, piece, pos_to)) return "";
-
-    auto pos_from = piece->pos();
-
-    if (!move(*piece, pos_to)) return "";
-
-    flagUpdates(pos_from, pos_to);
-
-    detectChecks();
-
-    auto& enemyPieces = color ? m_state->whites : m_state->blacks;
-    for (auto& p : enemyPieces) p.reinit(*m_state); // only enemy moves needed next
-
-    std::string result = {positions[pos_from][R], positions[pos_from][F], '-', positions[pos_to][R], positions[pos_to][F]};
-    ++m_moveCounter;
-    setGrammar();
-    const auto t_end = std::chrono::high_resolution_clock::now();
-    auto t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
-    fprintf(stdout, "%s: Move '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", result.data(), "\033[0m", (int) t_ms);
-    if (m_grammar.empty()) result.push_back('#');
-    return result;
-}
-
-bool Chessboard::parseCommand(const std::string& command, Piece*& piece, char& pos_to) {
-    auto color = Piece::Colors(m_moveCounter % 2);
-    fprintf(stdout, "%s: Command to %s: '%s%.*s%s'\n", __func__, (color ? "Black" : "White"), "\033[1m", int(command.size()), command.data(), "\033[0m");
-
-    if (command.empty()) return false;
-    auto tokens = split(command, ' ');
-    auto pos_from = INVALID_POS;
-    auto type = Piece::Types::NUM_PIECES;
-    if (tokens.size() == 1) {
-        type = Piece::Types::Pawn;
-        pos_to = strToPos(tokens.front());
-    }
-    else {
-        pos_from = strToPos(tokens.front());
-        if (pos_from == INVALID_POS) type = Piece::Types(strToType(tokens.front()));
-        pos_to = strToPos(tokens.back());
-    }
-    if (pos_to == INVALID_POS) return false;
-    if (pos_from == INVALID_POS) {
-        if (type == Piece::Types::NUM_PIECES) return false;
-        auto& pieces = color ? m_state->blacks : m_state->whites;
-        for (auto& p : pieces) {
-            if (p.type() == type && p.canReach(pos_to)) {
-                pos_from = p.pos();
-                break;
-            }
-        }
-    }
-    if (pos_from == INVALID_POS) return false;
-    if (m_state->board[pos_from] == nullptr) return false;
-    piece = m_state->board[pos_from];
-    if (piece->color() != color) return false;
-    return true;
-}
-
-void Chessboard::flagUpdates(char pos_from, char pos_to) {
-    auto color = Piece::Colors(m_moveCounter % 2);
-    auto& enemyPieces = color ? m_state->whites : m_state->blacks;
-    auto& ownPieces = color ? m_state->blacks : m_state->whites;
-    for (auto& p : enemyPieces) {
-        if (p.movePattern(pos_to) || p.movePattern(pos_from)) {
-            updatePins(p);
-            p.invalidate();
-        }
-    }
-
-    for (auto& p : ownPieces) {
-        if (p.movePattern(pos_to) || p.movePattern(pos_from)) {
-            updatePins(p);
-            p.invalidate();
-        }
-    }
-}
-
-void Chessboard::updatePins(Piece& piece) {
-    if (piece.type() == Piece::Pawn || piece.type() == Piece::Knight || piece.type() == Piece::King) return;
-    auto& enemyPieces = piece.color() ? m_state->whites : m_state->blacks;
-    auto& enemyPins = piece.color() ? m_state->whitePins : m_state->blackPins;
-    auto& king = enemyPieces.k;
-    auto it = std::find_if(enemyPins.begin(), enemyPins.end(), [&] (const Pin& pin) { return pin.pinner == &piece; });
-    if (it != enemyPins.end()) {
-        it->pinned->invalidate();
-        enemyPins.erase(it);
-    }
-    if (piece.movePattern(king.pos())) {
-        auto to = positions[king.pos()];
-        auto from = piece.coord();
-        Direction d = normalize({char(to[R] - from[R]), char(to[F] - from[F])});
-
-        auto reached = traverse(piece.pos(), d, Find(m_state->board));
-        auto foundPiece = m_state->board[reached];
-        if (&king == foundPiece) {
-            // check
-            king.invalidate();
-        }
-        else if (foundPiece && foundPiece->color() != piece.color()) {
-            reached = traverse(reached, d, Find(m_state->board));
-            if (&king == m_state->board[reached]) {
-                enemyPins.push_back({d, &piece, foundPiece});
-                foundPiece->invalidate();
-            }
-        }
-    }
-}
-
-void Chessboard::detectChecks() {
-    auto color = Piece::Colors(m_moveCounter % 2);
-    auto& enemyPieces = color ? m_state->whites : m_state->blacks;
-    auto& ownPieces = color ? m_state->blacks : m_state->whites;
-    auto& king = enemyPieces.k;
-    auto& pawnAttackLeft = color ? SW : NW;
-    auto& pawnAttackRight = color ? SE : NE;
-    for (auto& p : ownPieces) {
-        if (!p.movePattern(king.pos())) continue;
-        auto to = positions[king.pos()];
-        auto from = p.coord();
-
-        if (p.type() == Piece::Knight) {
-            if (!m_inCheck) {
-                m_allowedInCheck = { p.pos() };
-            }
-            else {
-                m_allowedInCheck.clear();
-            }
-            m_inCheck = true;
-        }
-        else if (p.type() == Piece::Pawn) {
-            Direction d {char(to[R] - from[R]), char(to[F] - from[F])};
-            if (d == pawnAttackLeft || d == pawnAttackRight) {
-                if (!m_inCheck) {
-                    m_allowedInCheck = { p.pos() };
-                }
-                else {
-                    m_allowedInCheck.clear();
-                }
-                m_inCheck = true;
-            }
-        }
-        else {
-            Direction d = normalize({char(to[R] - from[R]), char(to[F] - from[F])});
-            std::set<char> tmp;
-            auto pos = traverse(p.pos(), d, Add(m_state->board, tmp, king.color()));
-            if (pos == king.pos()) {
-                tmp.insert(p.pos());
-                if (!m_inCheck) {
-                    m_allowedInCheck = std::move(tmp);
-                }
-                else {
-                    m_allowedInCheck.clear();
-                }
-                m_inCheck = true;
-            }
-        }
-    }
-}
-
-bool Chessboard::move(Piece& piece, char pos_to) {
-    auto& allowed = piece.allowed();
-
-    if (allowed.count(pos_to) == 0 || (m_inCheck && piece.type() != Piece::King && m_allowedInCheck.count(pos_to) == 0)) return false;
-    if (m_state->board[pos_to] && m_state->board[pos_to]->color() == piece.color()) return false;
-    if (m_state->board[pos_to]) m_state->board[pos_to]->take();
-    m_state->board[piece.pos()] = nullptr;
-    m_state->board[pos_to] = &piece;
-    piece.setPos(pos_to);
-
-    m_inCheck = false;
-    m_allowedInCheck.clear();
-
-    return true;
-}
--- a/examples/wchess/libwchess/Chessboard.h
+++ b/examples/wchess/libwchess/Chessboard.h
@ -1,33 +0,0 @@
-#pragma once
-#include <string>
-#include <set>
-#include <memory>
-
-// just basic validation
-// fixme: missing en passant, castling, promotion, etc.
-struct State;
-class Piece;
-class Chessboard {
-public:
-    Chessboard();
-    ~Chessboard();
-    std::string process(const std::string& command);
-    std::string stringifyBoard();
-    const std::string& grammar() { return m_grammar; }
-    const std::string& prompt() { return m_prompt; }
-    void setPrompt(const std::string& prompt);
-private:
-    bool parseCommand(const std::string& command, Piece*& piece, char& pos_to);
-    bool move(Piece& piece, char pos);
-    void flagUpdates(char pos_from, char pos_to);
-    void updatePins(Piece& piece);
-    void detectChecks();
-    void setGrammar();
-
-    std::unique_ptr<State> m_state;
-    std::set<char> m_allowedInCheck;
-    bool m_inCheck = false;
-    int m_moveCounter = 0;
-    std::string m_grammar;
-    std::string m_prompt;
-};
--- a/examples/wchess/libwchess/WChess.cpp
+++ b/examples/wchess/libwchess/WChess.cpp
@ -1,193 +0,0 @@
-#include "WChess.h"
-#include "Chessboard.h"
-#include "grammar-parser.h"
-#include "common.h"
-#include <thread>
-
-WChess::WChess(whisper_context * ctx,
-        const whisper_full_params & wparams,
-        callbacks cb,
-        settings s)
-        : m_ctx(ctx)
-        , m_wparams(wparams)
-        , m_cb(cb)
-        , m_settings(s)
-        , m_board(new Chessboard())
-{}
-
-WChess::~WChess() = default;
-
-void WChess::set_move(const std::string& moves, float prob) const {
-    if (m_cb.set_move) (*m_cb.set_move)(moves, prob);
-}
-
-void WChess::set_grammar(const std::string& grammar) const {
-    if (m_cb.set_grammar) (*m_cb.set_grammar)(grammar);
-}
-
-bool WChess::get_audio(std::vector<float>& pcmf32) const {
-    if (m_cb.get_audio) return (*m_cb.get_audio)(pcmf32);
-    return false;
-}
-
-std::string WChess::stringify_board() const {
-    return m_board->stringifyBoard();
-}
-
-std::string WChess::get_grammar() const {
-    return m_board->grammar();
-}
-
-void WChess::run() {
-    bool have_prompt  = true;
-    bool ask_prompt   = !have_prompt;
-
-    float logprob_min  = 0.0f;
-
-    float logprob_sum  = 0.0f;
-
-    int n_tokens  = 0;
-
-    std::vector<float> pcmf32_cur;
-    std::vector<float> pcmf32_prompt;
-
-    const std::string k_prompt = have_prompt ? "" : "rook to d4, f3";
-    int64_t t_ms = 0;
-
-    if (ask_prompt) {
-        fprintf(stdout, "\n");
-        fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
-        fprintf(stdout, "\n");
-
-        ask_prompt = false;
-    }
-
-    while (get_audio(pcmf32_cur)) {
-        if (!pcmf32_cur.empty()) {
-            // fprintf(stdout, "%s: Processing ...\n", __func__);
-
-            if (!have_prompt) {
-                const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
-
-                fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
-
-                const float sim = similarity(txt, k_prompt);
-
-                if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
-                    fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
-                    ask_prompt = true;
-                } else {
-                    fprintf(stdout, "\n");
-                    fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
-                    fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
-                    fprintf(stdout, "\n");
-
-                    // save the audio for the prompt
-                    pcmf32_prompt = pcmf32_cur;
-                    have_prompt = true;
-                    m_board->setPrompt(k_prompt);
-                }
-            } else {
-                if (!pcmf32_prompt.empty()) pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
-                constexpr size_t MIN_SIZE = 1.2 * WHISPER_SAMPLE_RATE;
-                if (MIN_SIZE > pcmf32_cur.size()) pcmf32_cur.insert(pcmf32_cur.begin(), MIN_SIZE - pcmf32_cur.size(), 0.0f);
-
-                // fprintf(stdout, "%s: grammar rules:\n'%s'\n", __func__, m_board->grammar().c_str());
-
-                auto grammar_parsed = grammar_parser::parse(m_board->grammar().c_str());
-                auto grammar_rules  = grammar_parsed.c_rules();
-
-                m_wparams.grammar_rules   = grammar_rules.data();
-                m_wparams.n_grammar_rules = grammar_rules.size();
-
-                m_wparams.i_start_rule    = grammar_parsed.symbol_ids.at("move");
-                auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
-
-                const float p = 100.0f * std::exp(logprob_min);
-
-                fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
-
-                // find the prompt in the text
-                float best_sim = 0.0f;
-                size_t best_len = 0;
-                for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
-                    const auto prompt = txt.substr(0, n);
-
-                    const float sim = similarity(prompt, k_prompt);
-
-                    //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
-
-                    if (sim > best_sim) {
-                        best_sim = sim;
-                        best_len = n;
-                    }
-                }
-
-                fprintf(stdout, "%s:   DEBUG: txt = '%s', prob = %.2f%%\n", __func__, txt.c_str(), p);
-                std::string command = ::trim(txt.substr(best_len));
-
-                fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
-                fprintf(stdout, "\n");
-
-                if (!command.empty()) {
-                    set_move(m_board->process(command), p);
-                    set_grammar(m_board->grammar());
-                }
-                if (m_board->grammar().empty()) {
-                    fprintf(stdout, "%s: No more moves possible\n", __func__);
-                    break;
-                }
-            }
-        }
-
-        if (ask_prompt) {
-            fprintf(stdout, "\n");
-            fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
-            fprintf(stdout, "\n");
-
-            ask_prompt = false;
-        }
-    }
-}
-
-std::string WChess::transcribe(
-                const std::vector<float> & pcmf32,
-                float & logprob_min,
-                float & logprob_sum,
-                int & n_tokens,
-                int64_t & t_ms) {
-    const auto t_start = std::chrono::high_resolution_clock::now();
-
-    logprob_min = 0.0f;
-    logprob_sum = 0.0f;
-    n_tokens    = 0;
-    t_ms = 0;
-
-    if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
-        return {};
-    }
-
-    std::string result;
-
-    const int n_segments = whisper_full_n_segments(m_ctx);
-    for (int i = 0; i < n_segments; ++i) {
-        const char * text = whisper_full_get_segment_text(m_ctx, i);
-
-        result += text;
-
-        const int n = whisper_full_n_tokens(m_ctx, i);
-        for (int j = 0; j < n; ++j) {
-            const auto token = whisper_full_get_token_data(m_ctx, i, j);
-
-            if(token.plog > 0.0f) return {};
-            logprob_min = std::min(logprob_min, token.plog);
-            logprob_sum += token.plog;
-            ++n_tokens;
-        }
-    }
-
-    const auto t_end = std::chrono::high_resolution_clock::now();
-    t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
-
-    return result;
-}
--- a/examples/wchess/libwchess/WChess.h
+++ b/examples/wchess/libwchess/WChess.h
@ -1,63 +0,0 @@
-#pragma once
-#include "whisper.h"
-#include <string>
-#include <vector>
-#include <memory>
-
-class Chessboard;
-
-class WChess {
-public:
-    using CheckRunningCb = bool (*)();
-    using GetAudioCb = bool (*)(std::vector<float> &);
-    using SetMovesCb = void (*)(const std::string &, float);
-    using SetGrammarCb = void (*)(const std::string &);
-    using ClearAudioCb = void (*)();
-
-    struct callbacks {
-        GetAudioCb get_audio = nullptr;
-        SetMovesCb set_move = nullptr;
-        SetGrammarCb set_grammar = nullptr;
-    };
-
-    struct settings {
-        int32_t vad_ms     = 2000;
-        int32_t prompt_ms  = 5000;
-        int32_t command_ms = 4000;
-        float vad_thold    = 0.2f;
-        float freq_thold   = 100.0f;
-        bool print_energy  = false;
-    };
-
-    WChess(
-        whisper_context * ctx,
-        const whisper_full_params & wparams,
-        callbacks cb,
-        settings s
-    );
-    ~WChess();
-
-    void run();
-
-    std::string stringify_board() const;
-
-    std::string get_grammar() const;
-
-private:
-    bool get_audio(std::vector<float>& pcmf32) const;
-    void set_move(const std::string& moves, float prob) const;
-    void set_grammar(const std::string& grammar) const;
-
-    std::string transcribe(
-                    const std::vector<float> & pcmf32,
-                    float & logprob_min,
-                    float & logprob_sum,
-                    int & n_tokens,
-                    int64_t & t_ms);
-
-    whisper_context * m_ctx;
-    whisper_full_params m_wparams;
-    const callbacks m_cb;
-    const settings m_settings;
-    std::unique_ptr<Chessboard> m_board;
-};
--- a/examples/wchess/libwchess/test-chessboard.cpp
+++ b/examples/wchess/libwchess/test-chessboard.cpp
@ -1,117 +0,0 @@
-#include "Chessboard.h"
-
-#define ASSERT(x) \
-    do { \
-        if (!(x)) { \
-            fprintf(stderr, "ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
-            fflush(stderr); \
-            exit(1); \
-        } \
-    } while (0)
-
-
-int main() {
-    {
-        Chessboard chess;
-
-        ASSERT(chess.process("pawn to d4") == "d2-d4");
-        ASSERT(chess.process("e5") == "e7-e5");
-        ASSERT(chess.process("c1 h6") == "c1-h6");
-        ASSERT(chess.process("queen h4") == "d8-h4");
-        ASSERT(chess.process("bishop to g5") == "h6-g5");
-        ASSERT(chess.process("bishop to b4") == "f8-b4");
-        ASSERT(chess.process("c4") == "");
-        ASSERT(chess.process("knight c3") == "b1-c3");
-        ASSERT(chess.process("knight c6") == "b8-c6");
-        ASSERT(chess.process("f3") == "");
-    }
-
-    {
-        Chessboard chess;
-
-        ASSERT(chess.process("d4") == "d2-d4");
-        ASSERT(chess.process("e5") == "e7-e5");
-        ASSERT(chess.process("e4") == "e2-e4");
-        ASSERT(chess.process("queen h4") == "d8-h4");
-        ASSERT(chess.process("queen h5") == "d1-h5");
-        ASSERT(chess.process("f5") == "");
-        ASSERT(chess.process("g6") == "g7-g6");
-        ASSERT(chess.process("knight e2") == "g1-e2");
-        ASSERT(chess.process("f5") == "f7-f5");
-        ASSERT(chess.process("knight g3") == "e2-g3");
-        ASSERT(chess.process("g5") == "");
-        ASSERT(chess.process("king e7") == "e8-e7");
-        ASSERT(chess.process("f4") == "f2-f4");
-        ASSERT(chess.process("g5") == "g6-g5");
-    }
-
-    {
-        Chessboard chess;
-
-        ASSERT(chess.process("e4") == "e2-e4");
-        ASSERT(chess.process("c5") == "c7-c5");
-        ASSERT(chess.process("e5") == "e4-e5");
-        ASSERT(chess.process("c4") == "c5-c4");
-        ASSERT(chess.process("e6") == "e5-e6");
-        ASSERT(chess.process("c3") == "c4-c3");
-        ASSERT(chess.process("e7") == "");
-        ASSERT(chess.process("f7") == "e6-f7");
-        ASSERT(chess.process("d2") == "");
-        ASSERT(chess.process("king to f7") == "e8-f7");
-        ASSERT(chess.process("f4") == "f2-f4");
-        ASSERT(chess.process("d2") == "c3-d2");
-        ASSERT(chess.process("f5") == "");
-        ASSERT(chess.process("king to e2") == "e1-e2");
-        ASSERT(chess.process("king to g6") == "f7-g6");
-        ASSERT(chess.process("f5") == "f4-f5");
-        ASSERT(chess.process("e6") == "");
-        ASSERT(chess.process("king to h5") == "g6-h5");
-        ASSERT(chess.process("g4") == "g2-g4");
-        ASSERT(chess.process("king to g5") == "h5-g5");
-        ASSERT(chess.process("h4") == "h2-h4");
-        ASSERT(chess.process("king to h5") == "");
-        ASSERT(chess.process("king to g6") == "");
-        ASSERT(chess.process("king to h6") == "g5-h6");
-        ASSERT(chess.process("bishop to d2") == "c1-d2");
-        ASSERT(chess.process("king to g5") == "");
-        ASSERT(chess.process("g5") == "g7-g5");
-    }
-
-    {
-        Chessboard chess;
-        ASSERT(chess.process("f4") == "f2-f4");
-        ASSERT(chess.process("e5") == "e7-e5");
-        ASSERT(chess.process("g4") == "g2-g4");
-        ASSERT(chess.process("queen to h4") == "d8-h4#");
-        ASSERT(chess.process("knight f3") == "");
-        ASSERT(chess.grammar().empty());
-    }
-
-    {
-        Chessboard chess;
-        ASSERT(chess.process("f4") == "f2-f4");
-        ASSERT(chess.process("e5") == "e7-e5");
-        ASSERT(chess.process("g4") == "g2-g4");
-        ASSERT(chess.process("d5") == "d7-d5");
-        ASSERT(chess.process("g1 f3") == "g1-f3");
-        ASSERT(chess.process("queen to h4") == "d8-h4");
-        ASSERT(!chess.grammar().empty());
-    }
-
-    {
-        Chessboard chess;
-        ASSERT(chess.process("knight c3") == "b1-c3");
-        ASSERT(chess.process("knight c6") == "b8-c6");
-        ASSERT(chess.process("knight b5") == "c3-b5");
-        ASSERT(chess.process("knight f6") == "g8-f6");
-        ASSERT(chess.process("knight d6") == "b5-d6");
-        ASSERT(chess.process("knight d4") == "");
-        ASSERT(chess.process("d6") == "c7-d6");
-        ASSERT(chess.process("e4") == "e2-e4");
-        ASSERT(chess.process("knight d4") == "c6-d4");
-        ASSERT(chess.process("d3") == "d2-d3");
-        ASSERT(chess.process("knight e4") == "f6-e4");
-        ASSERT(chess.process("king to e2") == "");
-        ASSERT(chess.process("king to d2") == "");
-    }
-}
--- a/examples/wchess/wchess.cmd/CMakeLists.txt
+++ b/examples/wchess/wchess.cmd/CMakeLists.txt
@ -1,8 +0,0 @@
-if (WHISPER_SDL2)
-    set(TARGET wchess)
-    add_executable(${TARGET} wchess.cmd.cpp)
-
-    include(DefaultTargetOptions)
-
-    target_link_libraries(${TARGET} PRIVATE wchess-core common-sdl ${CMAKE_THREAD_LIBS_INIT})
-endif ()
--- a/examples/wchess/wchess.cmd/wchess.cmd.cpp
+++ b/examples/wchess/wchess.cmd/wchess.cmd.cpp
@ -1,247 +0,0 @@
-// Command line voice assisted chess
-//
-// Speak chess move commands to the microphone.
-// The moves will translated to chessboard positions.
-//
-//
-
-#include "WChess.h"
-#include "common-sdl.h"
-#include <iostream>
-
-#include <memory>
-#include <thread>
-
-// command-line parameters
-struct whisper_params {
-    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t prompt_ms  = 5000;
-    int32_t command_ms = 8000;
-    int32_t capture_id = -1;
-    int32_t max_tokens = 32;
-    int32_t audio_ctx  = 0;
-
-    float vad_thold  = 0.6f;
-    float freq_thold = 100.0f;
-
-    float grammar_penalty = 100.0f;
-
-    bool speed_up      = false;
-    bool translate     = false;
-    bool print_special = false;
-    bool print_energy  = false;
-    bool no_timestamps = true;
-    bool use_gpu       = true;
-
-    std::string language  = "en";
-    std::string model     = "models/ggml-base.en.bin";
-    std::string fname_out;
-    std::string commands;
-    std::string prompt;
-    std::string context;
-    std::string grammar;
-};
-
-void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options]\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,         --help           [default] show this help message and exit\n");
-    fprintf(stderr, "  -t N,       --threads N      [%-7d] number of threads to use during computation\n", params.n_threads);
-    fprintf(stderr, "  -pms N,     --prompt-ms N    [%-7d] prompt duration in milliseconds\n",             params.prompt_ms);
-    fprintf(stderr, "  -cms N,     --command-ms N   [%-7d] command duration in milliseconds\n",            params.command_ms);
-    fprintf(stderr, "  -c ID,      --capture ID     [%-7d] capture device ID\n",                           params.capture_id);
-    fprintf(stderr, "  -mt N,      --max-tokens N   [%-7d] maximum number of tokens per audio chunk\n",    params.max_tokens);
-    fprintf(stderr, "  -ac N,      --audio-ctx N    [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
-    fprintf(stderr, "  -vth N,     --vad-thold N    [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
-    fprintf(stderr, "  -fth N,     --freq-thold N   [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
-    fprintf(stderr, "  -su,        --speed-up       [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
-    fprintf(stderr, "  -tr,        --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
-    fprintf(stderr, "  -ps,        --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
-    fprintf(stderr, "  -pe,        --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
-    fprintf(stderr, "  -ng,        --no-gpu         [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
-    fprintf(stderr, "  -l LANG,    --language LANG  [%-7s] spoken language\n",                             params.language.c_str());
-    fprintf(stderr, "  -m FNAME,   --model FNAME    [%-7s] model path\n",                                  params.model.c_str());
-    fprintf(stderr, "  -f FNAME,   --file FNAME     [%-7s] text output file name\n",                       params.fname_out.c_str());
-    fprintf(stderr, "  -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n",             params.commands.c_str());
-    fprintf(stderr, "  -p,         --prompt         [%-7s] the required activation prompt\n",              params.prompt.c_str());
-    fprintf(stderr, "  -ctx,       --context        [%-7s] sample text to help the transcription\n",       params.context.c_str());
-    fprintf(stderr, "  --grammar-penalty N          [%-7.1f] scales down logits of nongrammar tokens\n",   params.grammar_penalty);
-    fprintf(stderr, "\n");
-}
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-        else if (arg == "-t"   || arg == "--threads")       { params.n_threads     = std::stoi(argv[++i]); }
-        else if (arg == "-pms" || arg == "--prompt-ms")     { params.prompt_ms     = std::stoi(argv[++i]); }
-        else if (arg == "-cms" || arg == "--command-ms")    { params.command_ms    = std::stoi(argv[++i]); }
-        else if (arg == "-c"   || arg == "--capture")       { params.capture_id    = std::stoi(argv[++i]); }
-        else if (arg == "-mt"  || arg == "--max-tokens")    { params.max_tokens    = std::stoi(argv[++i]); }
-        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
-        else if (arg == "-vth" || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
-        else if (arg == "-fth" || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
-        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
-        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
-        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
-        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
-        else if (arg == "-ng"  || arg == "--no-gpu")        { params.use_gpu       = false; }
-        else if (arg == "-l"   || arg == "--language")      { params.language      = argv[++i]; }
-        else if (arg == "-m"   || arg == "--model")         { params.model         = argv[++i]; }
-        else if (arg == "-f"   || arg == "--file")          { params.fname_out     = argv[++i]; }
-        else if (arg == "-cmd" || arg == "--commands")      { params.commands      = argv[++i]; }
-        else if (arg == "-p"   || arg == "--prompt")        { params.prompt        = argv[++i]; }
-        else if (arg == "-ctx" || arg == "--context")       { params.context       = argv[++i]; }
-        else if (                 arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
-        else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-std::unique_ptr<WChess> g_wchess;
-int g_moveCount = 0;
-void set_move(const std::string & move, float) {
-    if (!move.empty()) {
-        g_moveCount++;
-        fprintf(stdout, "Move: %s\n\n", move.c_str());
-    }
-    else fprintf(stdout, "Move rejected\n\n");
-    fprintf(stdout, "%s\n", g_wchess->stringify_board().c_str());
-    fprintf(stdout, "%s\n", g_moveCount ? "White's turn" : "Black's turn");
-}
-
-audio_async g_audio(30*1000);
-bool g_listening = false;
-std::vector<float> g_pcmf32;
-
-bool read_input() {
-    std::string input;
-    while (true) {
-        fprintf(stdout, "[(l)isten/(p)ause/(q)uit]: ");
-        std::cin >> input;
-        fprintf(stdout, "\n");
-        if (input[0] == 'q') {
-            fprintf(stdout, "Quitting\n");
-            return false;
-        }
-        if (input[0] == 'l') {
-            if (!g_listening) {
-                fprintf(stdout, "Listening\n");
-                g_listening = true;
-                g_pcmf32.clear();
-                g_audio.resume();
-                g_audio.clear();
-            }
-            else fprintf(stdout, "Still listening\n");
-            return true;
-        }
-        else {
-            if (g_listening) {
-                g_listening = false;
-                g_audio.get(0, g_pcmf32);
-                g_audio.pause();
-                fprintf(stdout, "Processing\n");
-            }
-            else fprintf(stdout, "Not listening\n");
-            return true;
-        }
-    }
-    return true;
-}
-
-bool get_audio(std::vector<float> & pcmf32_cur) {
-    if (!read_input()) return false;
-    if (!g_pcmf32.empty()) pcmf32_cur = std::move(g_pcmf32);
-    else pcmf32_cur.clear();
-    return true;
-}
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    if (whisper_lang_id(params.language.c_str()) == -1) {
-        fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-        whisper_print_usage(argc, argv, params);
-        exit(0);
-    }
-
-    // whisper init
-
-    struct whisper_context_params cparams;
-    cparams.use_gpu = params.use_gpu;
-
-    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
-    if (!ctx) {
-        fprintf(stderr, "%s: whisper_init_from_file_with_params() failed!\n", __func__);
-        return 1;
-    }
-
-    // init audio
-
-    if (!g_audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
-        fprintf(stderr, "%s: audio.init() failed!\n", __func__);
-        return 1;
-    }
-
-    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-    wparams.offset_ms        = 0;
-    wparams.translate        = false;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.print_realtime   = false;
-    wparams.print_progress   = false;
-    wparams.print_timestamps = true;
-    wparams.print_special    = false;
-    wparams.no_timestamps    = true;
-
-    wparams.max_tokens       = 32;
-    wparams.audio_ctx        = 768; // partial encoder context for better performance
-
-    wparams.temperature     = 0.0f;
-    wparams.temperature_inc = 2.0f;
-    wparams.greedy.best_of  = 1;
-
-    wparams.beam_search.beam_size = 1;
-
-    wparams.language         = "en";
-
-    wparams.grammar_penalty = 100.0;
-
-    wparams.initial_prompt = params.context.data();
-
-    WChess::callbacks cb;
-    cb.get_audio = get_audio;
-    cb.set_move = set_move;
-
-    WChess::settings s;
-    s.vad_ms = 2000;
-    s.prompt_ms = params.prompt_ms;
-    s.command_ms = params.command_ms;
-    s.vad_thold = params.vad_thold;
-    s.freq_thold = params.freq_thold;
-    s.print_energy = params.print_energy;
-
-    g_wchess.reset(new WChess(ctx, wparams, cb, s));
-    set_move("start", 0);
-    g_wchess->run();
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    return 0;
-}
--- a/examples/wchess/wchess.wasm/CMakeLists.txt
+++ b/examples/wchess/wchess.wasm/CMakeLists.txt
@ -1,51 +0,0 @@
-set(TARGET wchess.wasm)
-
-add_executable(${TARGET}
-    wchess.wasm.cpp
-    )
-
-include(DefaultTargetOptions)
-
-target_link_libraries(${TARGET} PRIVATE
-    common
-    wchess-core
-    )
-
-unset(EXTRA_FLAGS)
-
-if (WHISPER_WASM_SINGLE_FILE)
-    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
-    message(STATUS "Embedding WASM inside chess.js")
-
-    add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_BINARY_DIR}/bin/${TARGET}.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/chess.js
-        )
-endif()
-
-set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-    --bind \
-    -s USE_PTHREADS=1 \
-    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1024MB \
-    -s TOTAL_MEMORY=1024MB \
-    -s FORCE_FILESYSTEM=1 \
-    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
-    ${EXTRA_FLAGS} \
-    ")
-
-
-add_custom_command(
-        TARGET ${TARGET} POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-        ${CMAKE_CURRENT_SOURCE_DIR}/chessboardjs-1.0.0
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/
-        COMMAND ${CMAKE_COMMAND} -E copy
-        ${CMAKE_CURRENT_SOURCE_DIR}/jquery-3.7.1.min.js
-        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/
-    )
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
-configure_file(${CMAKE_SOURCE_DIR}/examples/helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/helpers.js @ONLY)
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css
@ -1,54 +0,0 @@
-/*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
-
-.clearfix-7da63 {
-  clear: both;
-}
-
-.board-b72b1 {
-  border: 2px solid #404040;
-  box-sizing: content-box;
-}
-
-.square-55d63 {
-  float: left;
-  position: relative;
-
-  /* disable any native browser highlighting */
-  -webkit-touch-callout: none;
-    -webkit-user-select: none;
-     -khtml-user-select: none;
-       -moz-user-select: none;
-        -ms-user-select: none;
-            user-select: none;
-}
-
-.white-1e1d7 {
-  background-color: #f0d9b5;
-  color: #b58863;
-}
-
-.black-3c85d {
-  background-color: #b58863;
-  color: #f0d9b5;
-}
-
-.highlight1-32417, .highlight2-9c5d2 {
-  box-shadow: inset 0 0 3px 3px yellow;
-}
-
-.notation-322f9 {
-  cursor: default;
-  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
-  font-size: 14px;
-  position: absolute;
-}
-
-.alpha-d2270 {
-  bottom: 1px;
-  right: 3px;
-}
-
-.numeric-fc462 {
-  top: 2px;
-  left: 2px;
-}
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
@ -1,2 +0,0 @@
-/*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
-.clearfix-7da63{clear:both}.board-b72b1{border:2px solid #404040;box-sizing:content-box}.square-55d63{float:left;position:relative;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.white-1e1d7{background-color:#f0d9b5;color:#b58863}.black-3c85d{background-color:#b58863;color:#f0d9b5}.highlight1-32417,.highlight2-9c5d2{box-shadow:inset 0 0 3px 3px #ff0}.notation-322f9{cursor:default;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;position:absolute}.alpha-d2270{bottom:1px;right:3px}.numeric-fc462{top:2px;left:2px}
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/CHANGELOG.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/CHANGELOG.md
@ -1,32 +0,0 @@
-# chessboard.js Change Log
-
-All notable changes to this project will be documented in this file.
-
-## [1.0.0] - 2019-06-11
- Orientation methods now return current orientation. [Issue #64]
- Drop support for IE8
- Do not check for `window.JSON` (Error #1004)
- Rename `ChessBoard` to `Chessboard` (`ChessBoard` is still supported, however)
- id query selectors are now supported as the first argument to `Chessboard()`
- Remove Error #1002
- Format code according to [StandardJS]
- Bump minimum jQuery version to 1.8.3
- Throttle piece drag functions
-
-## [0.3.0] - 2013-08-10
- Added `appearSpeed` animation config property
- Added `onSnapbackEnd` event
- Added `onMoveEnd` event
-
-## [0.2.0] - 2013-08-05
- Added `onMouseoverSquare` and `onMouseoutSquare` events
- Added `onSnapEnd` event
- Added square code as CSS class on the squares
- Added [chess.js] integration examples
-
-## [0.1.0] - 2013-05-21
- Initial release
-
-[chess.js]:https://github.com/jhlywa/chess.js
-[Issue #64]:https://github.com/oakmac/chessboardjs/issues/64
-[StandardJS]:https://standardjs.com/
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/LICENSE.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/LICENSE.md
@ -1,20 +0,0 @@
-Copyright 2019 Chris Oakman
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/README.md
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/README.md
@ -1,82 +0,0 @@
-# chessboard.js
-
-chessboard.js is a JavaScript chessboard component. It depends on [jQuery].
-
-Please see [chessboardjs.com] for documentation and examples.
-
-## What is chessboard.js?
-
-chessboard.js is a JavaScript chessboard component with a flexible "just a
-board" API that
-
-chessboard.js is a standalone JavaScript Chess Board. It is designed to be "just
-a board" and expose a powerful API so that it can be used in different ways.
-Here's a non-exhaustive list of things you can do with chessboard.js:
-
- Use chessboard.js to show game positions alongside your expert commentary.
- Use chessboard.js to have a tactics website where users have to guess the best
-  move.
- Integrate chessboard.js and [chess.js] with a PGN database and allow people to
-  search and playback games (see [Example 5000])
- Build a chess server and have users play their games out using the
-  chessboard.js board.
-
-chessboard.js is flexible enough to handle any of these situations with relative
-ease.
-
-## What can chessboard.js **not** do?
-
-The scope of chessboard.js is limited to "just a board." This is intentional and
-makes chessboard.js flexible for handling a multitude of chess-related problems.
-
-This is a common source of confusion for new users. [remove?]
-
-Specifically, chessboard.js does not understand anything about how the game of
-chess is played: how a knight moves, who's turn is it, is White in check?, etc.
-
-Fortunately, the powerful [chess.js] library deals with exactly this sort of
-problem domain and plays nicely with chessboard.js's flexible API. Some examples
-of chessboard.js combined with chess.js: 5000, 5001, 5002
-
-Please see the powerful [chess.js] library for an API to deal with these sorts
-of questions.
-
-
-This logic is distinct from the logic of the board. Please see the powerful
-[chess.js] library for this aspect of your application.
-
-
-
-Here is a list of things that chessboard.js is **not**:
-
- A chess engine
- A legal move validator
- A PGN parser
-
-chessboard.js is designed to work well with any of those things, but the idea
-behind chessboard.js is that the logic that controls the board should be
-independent of those other problems.
-
-## Docs and Examples
-
- Docs - <http://chessboardjs.com/docs>
- Examples - <http://chessboardjs.com/examples>
-
-## Developer Tools
-
-```sh
-# create a build in the build/ directory
-npm run build
-
-# re-build the website
-npm run website
-```
-
-## License
-
-[MIT License](LICENSE.md)
-
-[jQuery]:https://jquery.com/
-[chessboardjs.com]:http://chessboardjs.com
-[chess.js]:https://github.com/jhlywa/chess.js
-[Example 5000]:http://chessboardjs.com/examples#5000
--- a/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/package.json
+++ b/examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/package.json
@ -1,29 +0,0 @@
-{
-  "author": "Chris Oakman <chris@oakmac.com> (http://chrisoakman.com/)",
-  "name": "@chrisoakman/chessboardjs",
-  "description": "JavaScript chessboard widget",
-  "homepage": "https://chessboardjs.com",
-  "license": "MIT",
-  "version": "1.0.0",
-  "repository": {
-    "type": "git",
-    "url": "git://github.com/oakmac/chessboardjs.git"
-  },
-  "files": ["dist/"],
-  "dependencies": {
-    "jquery": ">=3.4.1"
-  },
-  "devDependencies": {
-    "csso": "3.5.1",
-    "fs-plus": "3.1.1",
-    "kidif": "1.1.0",
-    "mustache": "2.3.0",
-    "standard": "10.0.2",
-    "uglify-js": "3.6.0"
-  },
-  "scripts": {
-    "build": "standard lib/chessboard.js && node scripts/build.js",
-    "standard": "standard --fix lib/*.js website/js/*.js",
-    "website": "node scripts/website.js"
-  }
-}
--- a/examples/wchess/wchess.wasm/index-tmpl.html
+++ b/examples/wchess/wchess.wasm/index-tmpl.html
@ -1,499 +0,0 @@
-<!doctype html>
-<html lang="en-us">
-    <head>
-        <title>wchess : voice-controlled chess using Whisper + WebAssembly</title>
-        <script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
-
-        <meta name="viewport" content="width=device-width, initial-scale=0.7, maximum-scale=1, minimum-scale=0.7, user-scalable=no"/>
-        <meta name="apple-mobile-web-app-capable" content="yes" />
-
-        <style>
-            #output {
-                width: 100%;
-                height: 100%;
-                margin: 0 auto;
-                margin-top: 10px;
-                border-left: 0px;
-                border-right: 0px;
-                padding-left: 0px;
-                padding-right: 0px;
-                display: block;
-                background-color: black;
-                color: white;
-                font-size: 10px;
-                font-family: 'Lucida Console', Monaco, monospace;
-                outline: none;
-                white-space: pre;
-                overflow-wrap: normal;
-                overflow-x: scroll;
-            }
-            .button {
-                background-color: #000000;
-                color: #FFFFFF;
-                padding: 20px;
-                border-radius: 10px;
-                -moz-border-radius: 10px;
-                -webkit-border-radius: 10px;
-                margin:10px;
-                width:  100px;
-                height:  50px;
-                -webkit-touch-callout: none; /* Safari */
-                -webkit-user-select: none; /* Chrome */
-                -moz-user-select: none; /* Firefox */
-                -ms-user-select: none; /* Internet Explorer/Edge */
-                user-select: none;
-            }
-            button[disabled]{
-                background-color: #cccccc;
-                color: #666666;
-                padding: 20px;
-                border-radius: 10px;
-                -moz-border-radius: 10px;
-                -webkit-border-radius: 10px;
-                margin:10px;
-                width: 100px;
-            }
-            .center {
-                display: flex;
-                justify-content: center;
-                align-items: center;
-                width: 500px;
-            }
-            #description {
-                width: 500px;
-            }
-        </style>
-        <link rel="stylesheet" href="css/chessboard-1.0.0.min.css" integrity="sha384-q94+BZtLrkL1/ohfjR8c6L+A6qzNH9R2hBLwyoAfu3i/WCvQjzL2RQJ3uNHDISdU" crossorigin="anonymous">
-    </head>
-    <body>
-        <div id="main-container">
-            <div id="description">
-                <b>wchess : voice-controlled chess using Whisper + WebAssembly</b>
-
-                <br><br>
-
-                This is a demonstration of using Whisper to recognize voice commands in the browser.
-
-                <br><br>
-
-                Usage:<br>
-
-                <ul>
-                    <li>Select a Whisper model</li>
-                    <li>Accept the microphone permission request if prompted</li>
-                    <li>Hold the button and say a chess move (e.g. "Knight to c3")</li>
-                    <li>Release the button and wait for the move to be recognized</li>
-                    <li>Repeat</li>
-                </ul>
-
-                Examples:<br>
-
-                <ul>
-                    <li><b>"d4"</b></li>
-                    <li><b>"e2 e4"</b></li>
-                    <li><b>"Knight f3"</b></li>
-                    <li><b>"Bishop to b5"</b></li>
-                </ul>
-
-                Features:<br>
-
-                <ul>
-                    <li>Model quantization for reduced memory footprint (~42MB)</li>
-                    <li><a href="https://github.com/ggerganov/whisper.cpp/pull/1229">Grammar-based sampling</a> for improved recognition accuracy</li>
-                </ul>
-
-                <b>
-                Note that not all chess moves are supported. For example, castling and pawn promotion
-                currently do not work, but can be easily implemented. There could also be some bugs in
-                the move handling logic in general. The main reason for that is to keep the implementation
-                simple. The assumption is that a real application would already have a proper move
-                validation logic in place.<br><br>
-
-                The main purpose of this example is to demonstrate the capabilities of whisper.cpp and
-                its application in the browser for voice recognition locally on your device.
-                </b>
-
-                <br><br>
-
-                You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/wchess">GitHub</a>.
-
-                <br><br>
-
-                <b>More examples:</b>
-                    <a href="https://whisper.ggerganov.com/">main</a> |
-                    <a href="https://whisper.ggerganov.com/bench">bench</a> |
-                    <a href="https://whisper.ggerganov.com/stream">stream</a> |
-                    <a href="https://whisper.ggerganov.com/command">command</a> |
-                    <a href="https://whisper.ggerganov.com/talk">talk</a> |
-
-                <br><br>
-
-            </div>
-
-            <hr>
-
-            <div id="model-whisper">
-                Whisper model: <span id="model-whisper-status"></span>
-                <button id="fetch-whisper-tiny-en" onclick="loadWhisper()">tiny.en (Q8_0, 42 MB)</button>
-                <span id="fetch-whisper-progress"></span>
-                <br><br>
-                <button id="clear" onclick="clearCache()">Clear browser cache</button>
-                <!--
-                    <input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
-                -->
-            </div>
-
-            <div id="game">
-                <br>
-                <div id="chessboard" style="width: 500px"></div>
-                <script src="js/jquery-3.7.1.min.js"></script>
-                <script src="js/chessboard-1.0.0.min.js"></script>
-                <script>
-                    var board = Chessboard('chessboard', 'start')
-                    var move_count = 0;
-                </script>
-
-                <br>
-
-                <div id="state">
-                    Status: <b><span id="state-status">select model</span></b>
-
-                    <div id="input" class="center">
-                        <button id="toggler" class="button" onselectstart="return false" style="display: none">Hold</button>
-                    </div>
-
-                    <pre id="state-grammar">[The grammar will be displayed here]</pre>
-
-                    <pre id="state-moves">[The moves will be displayed here]</pre>
-                </div>
-            </div>
-
-            <hr>
-
-            Debug output:
-            <textarea id="output" rows="20"></textarea>
-
-            <br>
-
-            <b>Troubleshooting</b>
-
-            <br><br>
-
-            The page does some heavy computations, so make sure:
-
-            <ul>
-                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
-                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
-            </ul>
-
-            <div class="cell-version">
-                <span>
-                    |
-                    Build time: <span class="nav-link">@GIT_DATE@</span> |
-                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
-                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
-                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">Source Code</a> |
-                </span>
-            </div>
-        </div>
-
-        <script type="text/javascript" src="js/helpers.js"></script>
-        <script type='text/javascript'>
-            // web audio context
-            var context = null;
-
-            // the command instance
-            var instance = null;
-
-            // model name
-            var model_whisper = null;
-            var model_file = null;
-
-            var module_ready = null;
-
-            var Module = {
-                print: printTextarea,
-                printErr: printTextarea,
-                setStatus: function(text) {
-                    printTextarea('js: ' + text);
-                },
-                monitorRunDependencies: function(left) {
-                },
-                preRun: function() {
-                    printTextarea('js: Preparing ...');
-                },
-                postRun: function() {
-                    printTextarea('js: Module initialized successfully!');
-                    module_ready = true;
-                    initInstance();
-                }
-            };
-
-            function initInstance() {
-                if (!module_ready || !model_file || instance) return
-
-                instance = Module.init(model_file);
-
-                if (instance) {
-                    setStatus('Ready');
-                    printTextarea("js: whisper initialized, instance: " + instance);
-                }
-                else {
-                    printTextarea("js: failed to initialize whisper");
-                }
-            }
-
-            function setStatus(text) {
-                document.getElementById('state-status').innerHTML = text;
-            }
-
-            //
-            // fetch models
-            //
-
-            let dbVersion = 1
-            let dbName    = 'whisper.ggerganov.com';
-            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
-
-            function storeFS(fname, buf) {
-                // write to WASM file using FS_createDataFile
-                // if the file exists, delete it
-                try {
-                    Module.FS_unlink(fname);
-                } catch (e) {
-                    // ignore
-                }
-
-                Module.FS_createDataFile("/", fname, buf, true, true);
-
-                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
-
-                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
-
-                model_file = fname;
-                initInstance();
-            }
-
-            function loadWhisper() {
-                setStatus('Loading')
-                //let url     = 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin';
-                let url     = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q8_0.bin';
-                let dst     = 'whisper.bin';
-                let size_mb = 42;
-
-                model_whisper = 'tiny.en-q8_0';
-
-                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model_whisper + '" ... ';
-                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
-
-                cbProgress = function(p) {
-                    let el = document.getElementById('fetch-whisper-progress');
-                    el.innerHTML = Math.round(100*p) + '%';
-                };
-
-                cbCancel = function() {
-                    var el;
-                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
-                };
-
-                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
-
-                // init audio capture so that the user receives a permission request
-                {
-                    let context = new AudioContext({
-                        sampleRate: 16000,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                    navigator.mediaDevices.getUserMedia({audio: true, video: false})
-                        .then(function(s) {
-                            stream = s;
-                            stream.getTracks().forEach(function(track) {
-                                track.stop();
-                            });
-                        })
-                        .catch(function(err) {
-                            printTextarea('js: error getting audio stream: ' + err);
-                        });
-                    context.close();
-                }
-
-                document.getElementById('toggler').style.display = 'block';
-            }
-
-            //
-            // microphone
-            //
-
-            const kSampleRate = 16000;
-            const kRestartRecording_s = 120;
-            const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
-
-            var mediaRecorder = null;
-            var doRecording = false;
-            var startTime = 0;
-
-            window.AudioContext = window.AudioContext || window.webkitAudioContext;
-            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
-
-            function stopRecording() {
-                if (mediaRecorder) {
-                    mediaRecorder.stop();
-                }
-            }
-
-            function startRecording() {
-                if (!context) {
-                    context = new AudioContext({
-                        sampleRate: kSampleRate,
-                        channelCount: 1,
-                        echoCancellation: false,
-                        autoGainControl:  true,
-                        noiseSuppression: true,
-                    });
-                }
-
-                startTime = Date.now();
-
-                var chunks = [];
-                var stream = null;
-
-                navigator.mediaDevices.getUserMedia({audio: true, video: false})
-                    .then(function(s) {
-                        stream = s;
-                        mediaRecorder = new MediaRecorder(stream);
-                        mediaRecorder.ondataavailable = function(e) {
-                            chunks.push(e.data);
-
-                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
-                            var reader = new FileReader();
-
-                            reader.onload = function(event) {
-                                var buf = new Uint8Array(reader.result);
-                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
-                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
-                                    var source = offlineContext.createBufferSource();
-                                    source.buffer = audioBuffer;
-                                    source.connect(offlineContext.destination);
-                                    source.start(0);
-
-                                    offlineContext.startRendering().then(function(renderedBuffer) {
-                                        let audio = renderedBuffer.getChannelData(0);
-                                        printTextarea('js: number of samples: ' + audio.length);
-                                        Module.set_audio(instance, audio);
-                                    });
-
-                                    mediaRecorder = null;
-                                    context = null;
-                                });
-                            }
-
-                            reader.readAsArrayBuffer(blob);
-                        };
-
-                        mediaRecorder.onstop = function(e) {
-                            stream.getTracks().forEach(function(track) {
-                                track.stop();
-                            });
-                        };
-
-                        mediaRecorder.start();
-                    })
-                    .catch(function(err) {
-                        printTextarea('js: error getting audio stream: ' + err);
-                    });
-            }
-
-            //
-            // main
-            //
-
-            var nLines = 0;
-            var movesAll = '';
-
-            // document.body.addEventListener('keydown', function(event) {
-            //     if (event.keyCode === 32) {
-            //         document.getElementById('toggler').innerText = "";
-            //         onStart();
-            //     }
-            // }, true);
-
-            // document.body.addEventListener('keyup', function(event) {
-            //     if (event.keyCode === 32) {
-            //         document.getElementById('toggler').innerText = "Hold";
-            //         onStop();
-            //     }
-            // }, true);
-
-            document.getElementById('toggler').addEventListener("touchstart", function(event){
-                this.innerText = "";
-                onStart();
-            }, true);
-
-            document.getElementById('toggler').addEventListener("touchend", function(event){
-                this.innerText = "Hold";
-                onStop();
-            }, true)
-
-            document.getElementById('toggler').addEventListener('mousedown', function(event) {
-                this.innerText = "";
-                onStart();
-            }, true);
-
-            document.getElementById('toggler').addEventListener('mouseup', function(event) {
-                this.innerText = "Hold";
-                onStop();
-            }, true);
-
-            function onStart() {
-                if (!instance) return;
-                setStatus('Listening');
-
-                startRecording();
-            }
-
-            function onStop() {
-                setStatus('Processing');
-                printTextarea('js: stopping recording ...');
-                stopRecording();
-            }
-
-            function setMove(move, prob) {
-                if (move != null && move.length > 1) {
-                    let gameOver =  move[move.length - 1] === '#';
-                    if (gameOver) {
-                        move = move.substring(0, move.length - 1);
-                        document.getElementById('toggler').disabled = true;
-                    }
-                    board.move(move);
-
-                    movesAll += move + ', prob = ' + prob.toFixed(2) + '% <br>';
-                    nLines++;
-
-                    // if more than 10 lines, remove the first line
-                    if (nLines > 10) {
-                        var i = movesAll.indexOf('<br>');
-                        if (i > 0) {
-                            movesAll = movesAll.substring(i + 4);
-                            nLines--;
-                        }
-                    }
-                    ++move_count;
-                    setStatus(gameOver ? 'Done' : move_count % 2 ? 'Black\'s turn' : 'White\'s turn');
-                    document.getElementById('state-moves').innerHTML = movesAll;
-                }
-                else {
-                    setStatus('Failed. ' + (move_count % 2 ? 'Black\'s turn' : 'White\'s turn'));
-                }
-            }
-
-            function setGrammar(grammar) {
-                document.getElementById('state-grammar').innerHTML = grammar;
-            }
-
-        </script>
-        <script type="text/javascript" src="js/chess.js"></script>
-    </body>
-</html>
--- a/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
+++ b/examples/wchess/wchess.wasm/jquery-3.7.1.min.js
--- a/examples/wchess/wchess.wasm/wchess.wasm.cpp
+++ b/examples/wchess/wchess.wasm/wchess.wasm.cpp
@ -1,141 +0,0 @@
-#include <WChess.h>
-#include <emscripten.h>
-#include <emscripten/bind.h>
-
-#include <thread>
-
-constexpr int N_THREAD = 8;
-
-std::vector<struct whisper_context *> g_contexts(4, nullptr);
-
-std::mutex  g_mutex;
-std::thread g_worker;
-
-std::condition_variable g_cv;
-
-bool g_running(false);
-std::vector<float> g_pcmf32;
-
-void set_move(const std::string & move, float prob) {
-    MAIN_THREAD_EM_ASM({
-        setMove(UTF8ToString($0), $1)
-    }, move.c_str(), prob);
-}
-
-void set_grammar(const std::string & grammar) {
-    MAIN_THREAD_EM_ASM({
-        setGrammar(UTF8ToString($0))
-    }, grammar.c_str());
-}
-
-bool get_audio(std::vector<float> & audio) {
-    std::unique_lock<std::mutex> lock(g_mutex);
-    g_cv.wait(lock, [] { return !g_running || !g_pcmf32.empty(); });
-    if (!g_running) return false;
-    audio = std::move(g_pcmf32);
-    return true;
-}
-
-void wchess_main(size_t i) {
-    struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
-
-    wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
-    wparams.offset_ms        = 0;
-    wparams.translate        = false;
-    wparams.no_context       = true;
-    wparams.single_segment   = true;
-    wparams.print_realtime   = false;
-    wparams.print_progress   = false;
-    wparams.print_timestamps = true;
-    wparams.print_special    = false;
-    wparams.no_timestamps    = true;
-
-    wparams.max_tokens       = 32;
-    wparams.audio_ctx        = 1280; // partial encoder context for better performance
-
-    wparams.temperature      = 0.0f;
-    wparams.temperature_inc  = 2.0f;
-    wparams.greedy.best_of   = 1;
-
-    wparams.beam_search.beam_size = 1;
-
-    wparams.language         = "en";
-
-    wparams.grammar_penalty = 100.0;
-    wparams.initial_prompt = "bishop to c3, rook to d4, knight to e5, d4 d5, knight to c3, c3, queen to d4, king b1, pawn to a1, bishop to b2, knight to c3,";
-
-    printf("command: using %d threads\n", wparams.n_threads);
-
-    WChess::callbacks cb;
-    cb.get_audio = get_audio;
-    cb.set_move = set_move;
-    cb.set_grammar = set_grammar;
-
-    WChess(g_contexts[i], wparams, cb, {}).run();
-
-    if (i < g_contexts.size()) {
-        whisper_free(g_contexts[i]);
-        g_contexts[i] = nullptr;
-    }
-}
-
-EMSCRIPTEN_BINDINGS(command) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
-        for (size_t i = 0; i < g_contexts.size(); ++i) {
-            if (g_contexts[i] == nullptr) {
-                g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
-                if (g_contexts[i] != nullptr) {
-                    g_running = true;
-                    if (g_worker.joinable()) {
-                        g_worker.join();
-                    }
-                    g_worker = std::thread([i]() {
-                        wchess_main(i);
-                    });
-
-                    return i + 1;
-                } else {
-                    return (size_t) 0;
-                }
-            }
-        }
-
-        return (size_t) 0;
-    }));
-
-    emscripten::function("free", emscripten::optional_override([](size_t /* index */) {
-        {
-            std::unique_lock<std::mutex> lock(g_mutex);
-            g_running = false;
-        }
-        g_cv.notify_one();
-    }));
-
-    emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
-        --index;
-
-        if (index >= g_contexts.size()) {
-            return -1;
-        }
-
-        if (g_contexts[index] == nullptr) {
-            return -2;
-        }
-
-        {
-            std::lock_guard<std::mutex> lock(g_mutex);
-            const int n = audio["length"].as<int>();
-
-            emscripten::val heap = emscripten::val::module_property("HEAPU8");
-            emscripten::val memory = heap["buffer"];
-
-            g_pcmf32.resize(n);
-
-            emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
-            memoryView.call<void>("set", audio);
-        }
-        g_cv.notify_one();
-
-        return 0;
-    }));
-}
--- a/examples/whisper.objc/whisper.objc/ViewController.m
+++ b/examples/whisper.objc/whisper.objc/ViewController.m
@ -206,7 +206,6 @@ void AudioInputCallback(void * inUserData,
        params.offset_ms        = 0;
        params.no_context       = true;
        params.single_segment   = self->stateInp.isRealtime;
-        params.no_timestamps    = params.single_segment;

        CFTimeInterval startTime = CACurrentMediaTime();

--- a/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
+++ b/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
@ -8,15 +8,15 @@ enum WhisperError: Error {
 // Meet Whisper C++ constraint: Don't access from more than one thread at a time.
 actor WhisperContext {
    private var context: OpaquePointer
-
+    
    init(context: OpaquePointer) {
        self.context = context
    }
-
+    
    deinit {
        whisper_free(context)
    }
-
+    
    func fullTranscribe(samples: [Float]) {
        // Leave 2 processors free (i.e. the high-efficiency cores).
        let maxThreads = max(1, min(8, cpuCount() - 2))
@ -24,17 +24,17 @@ actor WhisperContext {
        var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
        "en".withCString { en in
            // Adapted from whisper.objc
-            params.print_realtime   = true
-            params.print_progress   = false
+            params.print_realtime = true
+            params.print_progress = false
            params.print_timestamps = true
-            params.print_special    = false
-            params.translate        = false
-            params.language         = en
-            params.n_threads        = Int32(maxThreads)
-            params.offset_ms        = 0
-            params.no_context       = true
-            params.single_segment   = false
-
+            params.print_special = false
+            params.translate = false
+            params.language = en
+            params.n_threads = Int32(maxThreads)
+            params.offset_ms = 0
+            params.no_context = true
+            params.single_segment = false
+            
            whisper_reset_timings(context)
            print("About to run whisper_full")
            samples.withUnsafeBufferPointer { samples in
@ -46,7 +46,7 @@ actor WhisperContext {
            }
        }
    }
-
+    
    func getTranscription() -> String {
        var transcription = ""
        for i in 0..<whisper_full_n_segments(context) {
@ -54,7 +54,7 @@ actor WhisperContext {
        }
        return transcription
    }
-
+    
    static func createContext(path: String) throws -> WhisperContext {
        var params = whisper_context_default_params()
 #if targetEnvironment(simulator)
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@ -137,7 +137,7 @@ void ggml_tallocr_alloc(ggml_tallocr_t alloc, struct ggml_tensor * tensor) {

 #ifdef GGML_ALLOCATOR_DEBUG
    add_allocated_tensor(alloc, tensor);
-    size_t cur_max = (char*)addr - (char*)alloc->base + size;
+    size_t cur_max = (char*)addr - (char*)alloc->data + size;
    if (cur_max > alloc->max_size) {
        printf("max_size = %.2f MB: tensors: ", cur_max / 1024.0 / 1024.0);
        for (int i = 0; i < 1024; i++) {
@ -168,6 +168,10 @@ static void ggml_tallocr_free_tensor(ggml_tallocr_t alloc, struct ggml_tensor *
    size = aligned_offset(NULL, size, alloc->alignment);
    AT_PRINTF("%s: freeing %s at %p (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, ptr, size, alloc->n_free_blocks);

+    if (!alloc->measure) {
+        ggml_backend_buffer_free_tensor(alloc->buffer, tensor);
+    }
+
 #ifdef GGML_ALLOCATOR_DEBUG
    remove_allocated_tensor(alloc, tensor);
 #endif
@ -233,7 +237,7 @@ void ggml_tallocr_reset(ggml_tallocr_t alloc) {
 }

 ggml_tallocr_t ggml_tallocr_new(void * data, size_t size, size_t alignment) {
-    struct ggml_backend_buffer * buffer = ggml_backend_cpu_buffer_from_ptr(data, size);
+    struct ggml_backend_buffer * buffer = ggml_backend_cpu_buffer_from_ptr(NULL, data, size);

    ggml_tallocr_t alloc = (ggml_tallocr_t)malloc(sizeof(struct ggml_tallocr));

@ -442,19 +446,18 @@ static ggml_tallocr_t node_tallocr(ggml_gallocr_t galloc, struct ggml_tensor * n
    return galloc->hash_allocs[ggml_hash_find_or_insert(galloc->hash_set, node)];
 }

-static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view, bool update_backend) {
+static void init_view(ggml_gallocr_t galloc, struct ggml_tensor * view) {
    ggml_tallocr_t alloc = node_tallocr(galloc, view);

+    //printf("init_view: %s from src %s\n", view->name, view->view_src->name);
    GGML_ASSERT(view->view_src != NULL && view->view_src->data != NULL);
-    if (update_backend) {
-        view->backend = view->view_src->backend;
-    }
+    view->backend = view->view_src->backend;
    view->buffer  = view->view_src->buffer;
    view->data    = (char *)view->view_src->data + view->view_offs;

    // FIXME: the view should be initialized by the owning buffer, but currently this breaks the CUDA backend
    // due to the ggml_tensor_extra_gpu ring buffer overwriting the KV cache extras
-    assert(ggml_tallocr_is_measure(alloc) || !view->buffer || view->buffer->buft == alloc->buffer->buft);
+    assert(ggml_tallocr_is_measure(alloc) || !view->buffer || view->buffer->backend == alloc->buffer->backend);

    if (!alloc->measure) {
        ggml_backend_buffer_init_tensor(alloc->buffer, view);
@ -466,7 +469,7 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {

    if (node->data == NULL) {
        if (ggml_is_view(node)) {
-            init_view(galloc, node, true);
+            init_view(galloc, node);
        } else {
            // see if we can reuse a parent's buffer (inplace)
            if (ggml_op_can_inplace(node->op)) {
@ -496,14 +499,15 @@ static void allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node) {
                                AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
                                node->view_src = view_src;
                                view_src_hn->n_views += 1;
-                                init_view(galloc, node, false);
+                                init_view(galloc, node);
                                return;
                            }
-                        } else {
+                        }
+                        else {
                            AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
                            node->view_src = parent;
                            p_hn->n_views += 1;
-                            init_view(galloc, node, false);
+                            init_view(galloc, node);
                            return;
                        }
                    }
@ -533,7 +537,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
            hash_get(galloc, view_src)->n_views += 1;
            if (node->buffer == NULL && node->data != NULL) {
                // view of a pre-allocated tensor, didn't call init_view() yet
-                init_view(galloc, node, true);
+                init_view(galloc, node);
            }
        }

@ -544,7 +548,7 @@ static void ggml_tallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
            }
            hash_get(galloc, parent)->n_children += 1;
            if (ggml_is_view(parent) && parent->buffer == NULL && parent->data != NULL) {
-                init_view(galloc, parent, true);
+                init_view(galloc, parent);
            }
        }
   }
@ -659,7 +663,7 @@ size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, st
    return max_size;
 }

-void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_talloc) {
+void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, struct ggml_hash_set hash_set, ggml_tallocr_t * hash_node_alloct) {
    const size_t hash_size = hash_set.size;

    GGML_ASSERT(hash_size >= (size_t)(graph->n_nodes + graph->n_leafs));
@ -682,7 +686,7 @@ void ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, struct ggml_cgraph * grap
    // reset hash values
    memset(galloc->hash_values, 0, sizeof(struct hash_node) * hash_size);

-    galloc->hash_allocs = hash_node_talloc;
+    galloc->hash_allocs = hash_node_alloct;

    ggml_tallocr_alloc_graph_impl(galloc, graph);

@ -760,43 +764,3 @@ size_t ggml_allocr_max_size(ggml_allocr_t alloc) {
 size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph * graph) {
    return ggml_gallocr_alloc_graph(alloc->galloc, alloc->talloc, graph);
 }
-
-// utils
-ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
-    GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
-
-    size_t alignment = ggml_backend_buft_get_alignment(buft);
-
-    size_t nbytes = 0;
-    for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
-        if (t->data == NULL && t->view_src == NULL) {
-            nbytes += GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
-        }
-    }
-
-    if (nbytes == 0) {
-        fprintf(stderr, "%s: no tensors to allocate\n", __func__);
-        return NULL;
-    }
-
-    ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes);
-    ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer);
-
-    for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
-        if (t->data == NULL) {
-            if (t->view_src == NULL) {
-                ggml_tallocr_alloc(tallocr, t);
-            } else {
-                ggml_backend_view_init(buffer, t);
-            }
-        }
-    }
-
-    ggml_tallocr_free(tallocr);
-
-    return buffer;
-}
-
-ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
-    return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
-}
--- a/ggml-alloc.h
+++ b/ggml-alloc.h
@ -8,7 +8,6 @@ extern "C" {

 struct ggml_backend;
 struct ggml_backend_buffer;
-struct ggml_backend_buffer_type;

 //
 // Legacy API
@ -43,7 +42,7 @@ GGML_API size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph
 // ggml-backend v2 API
 //

-// Separate tensor and graph allocator objects
+// Seperate tensor and graph allocator objects
 // This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
 // The original API is kept as a wrapper around the new API

@ -81,12 +80,6 @@ GGML_API void   ggml_gallocr_alloc_graph_n(
                    struct ggml_hash_set hash_set,
                    ggml_tallocr_t * hash_node_talloc);

-
-// Utils
-// Create a buffer and allocate all the tensors in a ggml_context
-GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, struct ggml_backend_buffer_type * buft);
-GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, struct ggml_backend * backend);
-
 #ifdef  __cplusplus
 }
 #endif
--- a/ggml-backend-impl.h
+++ b/ggml-backend-impl.h
@ -12,50 +12,31 @@ extern "C" {
    // Backend buffer
    //

-    // buffer type
-    typedef void * ggml_backend_buffer_type_context_t;
-
-    struct ggml_backend_buffer_type_i {
-        ggml_backend_buffer_t (*alloc_buffer)    (ggml_backend_buffer_type_t buft, size_t size);
-        size_t                (*get_alignment)   (ggml_backend_buffer_type_t buft); // tensor alignment
-        size_t                (*get_alloc_size)  (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
-        bool                  (*supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
-    };
-
-    struct ggml_backend_buffer_type {
-        struct ggml_backend_buffer_type_i  iface;
-        ggml_backend_buffer_type_context_t context;
-    };
-
-    // buffer
    typedef void * ggml_backend_buffer_context_t;

    struct ggml_backend_buffer_i {
-        void     (*free_buffer)(ggml_backend_buffer_t buffer);
-        //void     (*reset)      (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
-        void *   (*get_base)   (ggml_backend_buffer_t buffer);
-        void     (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
-        void     (*set_tensor) (ggml_backend_buffer_t buffer,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
-        void     (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
-        // (optional) copy tensor between different buffer-type, allow for single-copy tranfers
-        void (*cpy_tensor_from)(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
-        void (*cpy_tensor_to)  (ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
+        void   (*free_buffer)   (ggml_backend_buffer_t buffer);
+        void * (*get_base)      (ggml_backend_buffer_t buffer); // get base pointer
+        size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
+        void   (*init_tensor)   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
+        void   (*free_tensor)   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
    };

    struct ggml_backend_buffer {
-        struct ggml_backend_buffer_i  iface;
-        ggml_backend_buffer_type_t    buft;
+        struct ggml_backend_buffer_i iface;
+
+        ggml_backend_t                backend;
        ggml_backend_buffer_context_t context;
+
        size_t size;
    };

-    ggml_backend_buffer_t ggml_backend_buffer_init(
-                   ggml_backend_buffer_type_t      buft,
+    GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
+            struct ggml_backend                  * backend,
            struct ggml_backend_buffer_i           iface,
                   ggml_backend_buffer_context_t   context,
                   size_t                          size);

-
    //
    // Backend
    //
@ -68,18 +49,21 @@ extern "C" {
        void (*free)(ggml_backend_t backend);

        // buffer allocation
-        ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
+        ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);

-        // (optional) asynchroneous tensor data access
+        // get buffer alignment
+        size_t (*get_alignment)(ggml_backend_t backend);
+
+        // tensor data access
+        // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
        void (*set_tensor_async)(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
        void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
-
-        // (optional) asynchroneous tensor copy
-        void (*cpy_tensor_from_async)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
-        void (*cpy_tensor_to_async)  (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
-
        void (*synchronize)     (ggml_backend_t backend);

+        // (optional) copy tensor between different backends, allow for single-copy tranfers
+        void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
+        void (*cpy_tensor_to)  (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
+
        // compute graph with a plan
        ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
        void                      (*graph_plan_free)   (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
@ -98,15 +82,6 @@ extern "C" {
        ggml_backend_context_t context;
    };

-
-    //
-    // Backend registry
-    //
-
-    typedef ggml_backend_t (*ggml_backend_init_fn)(const char * params, void * user_data);
-
-    void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
-
 #ifdef  __cplusplus
 }
 #endif
--- a/ggml-backend.c
+++ b/ggml-backend.c
--- a/ggml-backend.h
+++ b/ggml-backend.h
@ -7,44 +7,41 @@
 extern "C" {
 #endif

-    typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
-    typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
-    typedef struct ggml_backend * ggml_backend_t;
-    typedef void * ggml_backend_graph_plan_t;
-
    //
    // Backend buffer
    //

-    // buffer type
-    GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
-    GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
-    GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
-    GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
+    struct ggml_backend_buffer;
+    typedef struct ggml_backend_buffer * ggml_backend_buffer_t;

-    // buffer
+    // backend buffer functions
    GGML_API void   ggml_backend_buffer_free          (ggml_backend_buffer_t buffer);
+    GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
    GGML_API void * ggml_backend_buffer_get_base      (ggml_backend_buffer_t buffer);
    GGML_API size_t ggml_backend_buffer_get_size      (ggml_backend_buffer_t buffer);
-    GGML_API void   ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
-    GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
    GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
-    GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_type(ggml_backend_buffer_t buffer);
+    GGML_API void   ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
+    GGML_API void   ggml_backend_buffer_free_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);

    //
    // Backend
    //

+    struct ggml_backend;
+    typedef struct ggml_backend * ggml_backend_t;
+    typedef void * ggml_backend_graph_plan_t;
+
+    GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);

    GGML_API const char * ggml_backend_name(ggml_backend_t backend);
    GGML_API void         ggml_backend_free(ggml_backend_t backend);

-    GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
-    GGML_API ggml_backend_buffer_t      ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
-    GGML_API size_t                     ggml_backend_get_alignment(ggml_backend_t backend);
+    GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);

-    GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
-    GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
+    GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
+
+    GGML_API void ggml_backend_tensor_set_async(      struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
+    GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);

    GGML_API void ggml_backend_tensor_set(      struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
    GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
@ -60,7 +57,6 @@ extern "C" {

    // tensor copy between different backends
    GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
-    GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy

    //
    // CPU backend
@ -72,23 +68,8 @@ extern "C" {
    GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);

    // Create a backend buffer from an existing pointer
-    GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
+    GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);

-    GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
-
-    //
-    // Backend registry
-    //
-
-    // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
-
-    GGML_API size_t                     ggml_backend_reg_get_count(void);
-    GGML_API size_t                     ggml_backend_reg_find_by_name(const char * name);
-    GGML_API ggml_backend_t             ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
-    GGML_API const char *               ggml_backend_reg_get_name(size_t i);
-    GGML_API ggml_backend_t             ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
-    GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
-    GGML_API ggml_backend_buffer_t      ggml_backend_reg_alloc_buffer(size_t i, size_t size);

    //
    // Backend scheduler
@ -150,32 +131,6 @@ extern "C" {
            ggml_backend_sched_t sched,
            struct ggml_cgraph * graph);

-
-    //
-    // Utils
-    //
-
-    struct ggml_backend_graph_copy {
-        ggml_backend_buffer_t buffer;
-        struct ggml_context * ctx_allocated;
-        struct ggml_context * ctx_unallocated;
-        struct ggml_cgraph * graph;
-    };
-
-    // Copy a graph to a different backend
-    GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
-    GGML_API void                           ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
-
-    typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
-
-    // Compare the output of two backends
-    GGML_API void ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
-
-    // Tensor initialization
-    GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
-    GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
-
-
 #ifdef  __cplusplus
 }
 #endif
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@ -49,15 +49,7 @@ GGML_API int    ggml_cuda_get_device_count(void);
 GGML_API void   ggml_cuda_get_device_description(int device, char * description, size_t description_size);

 // backend API
-GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
-
-GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
-GGML_API int  ggml_backend_cuda_get_device(ggml_backend_t backend);
-
-GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
-
-// pinned host buffer for use with CPU backend for faster copies between CPU and GPU
-GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
+GGML_API ggml_backend_t ggml_backend_cuda_init(void); // TODO: take a list of devices to use

 #ifdef  __cplusplus
 }
--- a/ggml-impl.h
+++ b/ggml-impl.h
@ -232,7 +232,7 @@ bool   ggml_hash_contains      (const struct ggml_hash_set hash_set, struct ggml
 // returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
 size_t ggml_hash_find          (const struct ggml_hash_set hash_set, struct ggml_tensor * key);

-// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
+// returns GGML_HAHSHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
 size_t ggml_hash_insert        (      struct ggml_hash_set hash_set, struct ggml_tensor * key);

 // return index, asserts if table is full
--- a/ggml-metal.h
+++ b/ggml-metal.h
@ -99,12 +99,6 @@ GGML_API ggml_backend_t ggml_backend_metal_init(void);
 GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);

 GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
-GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
-
-// helper to check if the device supports a specific family
-// ideally, the user code should be doing these checks
-// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
-GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);

 #ifdef __cplusplus
 }
--- a/ggml-metal.m
+++ b/ggml-metal.m
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@ -1,18 +1,20 @@
-#include "ggml.h"
 #include "ggml-opencl.h"

 #include <array>
 #include <atomic>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <limits>
 #include <sstream>
 #include <vector>
+#include <limits>

 #define CL_TARGET_OPENCL_VERSION 110
 #include <clblast.h>

+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ggml.h"
+
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
--- a/ggml-quants.c
+++ b/ggml-quants.c
@ -19,7 +19,7 @@
 #ifdef __wasm_simd128__
 #include <wasm_simd128.h>
 #else
-#if defined(__POWER9_VECTOR__) || defined(__powerpc64__)
+#ifdef __POWER9_VECTOR__
 #include <altivec.h>
 #undef bool
 #define bool _Bool
@ -1368,12 +1368,7 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
    float max = x[0];
    float sum_w = weights[0];
    float sum_x = sum_w * x[0];
-#ifdef HAVE_BUGGY_APPLE_LINKER
-    // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
-    for (volatile int i = 1; i < n; ++i) {
-#else
    for (int i = 1; i < n; ++i) {
-#endif
        if (x[i] < min) min = x[i];
        if (x[i] > max) max = x[i];
        float w = weights[i];
@ -3114,7 +3109,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri

    size_t vl = __riscv_vsetvl_e8m1(qk/2);

-    // These temporary registers are for masking and shift operations
+    // These tempory registers are for masking and shift operations
    vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
    vuint32m2_t vt_2 = __riscv_vsll_vv_u32m2(__riscv_vmv_v_x_u32m2(1, vl), vt_1, vl);

@ -4757,7 +4752,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri

            vl = 16;

-            // retrieve lane to multiply with scale
+            // retreive lane to multiply with scale
            vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl);
            vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl);
            vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl);
--- a/ggml.c
+++ b/ggml.c
--- a/ggml.h
+++ b/ggml.h
@ -215,9 +215,9 @@
 #define GGML_QNT_VERSION_FACTOR 1000 // do not change this

 #define GGML_MAX_DIMS           4
-#define GGML_MAX_PARAMS         2048
+#define GGML_MAX_PARAMS         1024
 #define GGML_MAX_CONTEXTS       64
-#define GGML_MAX_SRC            10
+#define GGML_MAX_SRC            6
 #define GGML_MAX_NAME           64
 #define GGML_MAX_OP_PARAMS      64
 #define GGML_DEFAULT_N_THREADS  4
@ -244,10 +244,11 @@
 #define GGML_ASSERT(x) \
    do { \
        if (!(x)) { \
-            fflush(stdout); \
            fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
+            fflush(stderr); \
+            fflush(stdout); \
            ggml_print_backtrace(); \
-            abort(); \
+            exit(1); \
        } \
    } while (0)

@ -283,20 +284,6 @@
    const type prefix##3 = (pointer)->array[3]; \
    GGML_UNUSED(prefix##3);

-#define GGML_TENSOR_UNARY_OP_LOCALS \
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
-    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
-    GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
-
-#define GGML_TENSOR_BINARY_OP_LOCALS \
-    GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
-    GGML_TENSOR_LOCALS(size_t,  nb0, src0, nb) \
-    GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
-    GGML_TENSOR_LOCALS(size_t,  nb1, src1, nb) \
-    GGML_TENSOR_LOCALS(int64_t, ne,  dst,  ne) \
-    GGML_TENSOR_LOCALS(size_t,  nb,  dst,  nb)
-
 #ifdef  __cplusplus
 extern "C" {
 #endif
@ -395,7 +382,6 @@ extern "C" {
        GGML_OP_GROUP_NORM,

        GGML_OP_MUL_MAT,
-        GGML_OP_MUL_MAT_ID,
        GGML_OP_OUT_PROD,

        GGML_OP_SCALE,
@ -422,10 +408,8 @@ extern "C" {
        GGML_OP_CONV_TRANSPOSE_2D,
        GGML_OP_POOL_1D,
        GGML_OP_POOL_2D,
+
        GGML_OP_UPSCALE, // nearest interpolate
-        GGML_OP_PAD,
-        GGML_OP_ARGSORT,
-        GGML_OP_LEAKY_RELU,

        GGML_OP_FLASH_ATTN,
        GGML_OP_FLASH_FF,
@ -465,8 +449,7 @@ extern "C" {
        GGML_UNARY_OP_GELU,
        GGML_UNARY_OP_GELU_QUICK,
        GGML_UNARY_OP_SILU,
-
-        GGML_UNARY_OP_COUNT,
+        GGML_UNARY_OP_LEAKY
    };

    enum ggml_object_type {
@ -649,9 +632,6 @@ extern "C" {
    GGML_API const char * ggml_op_name  (enum ggml_op   op);
    GGML_API const char * ggml_op_symbol(enum ggml_op   op);

-    GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
-    GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
-
    GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor);

    GGML_API bool    ggml_is_quantized(enum ggml_type type);
@ -794,9 +774,6 @@ extern "C" {
            struct ggml_tensor  * a,
            struct ggml_tensor  * b);

-    // dst = a
-    // view(dst, nb1, nb2, nb3, offset) += b
-    // return dst
    GGML_API struct ggml_tensor * ggml_acc(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,
@ -961,14 +938,15 @@ extern "C" {
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

-    GGML_API struct ggml_tensor * ggml_leaky_relu(
+    GGML_API struct ggml_tensor * ggml_leaky(
            struct ggml_context * ctx,
-            struct ggml_tensor  * a, float negative_slope, bool inplace);
+            struct ggml_tensor  * a);

    GGML_API struct ggml_tensor * ggml_relu_inplace(
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

+    // TODO: double-check this computation is correct
    GGML_API struct ggml_tensor * ggml_gelu(
            struct ggml_context * ctx,
            struct ggml_tensor  * a);
@ -1050,16 +1028,6 @@ extern "C" {
            struct ggml_tensor  * a,
            struct ggml_tensor  * b);

-    // indirect matrix multiplication
-    //  ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
-    GGML_API struct ggml_tensor * ggml_mul_mat_id(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * const as[],
-            int                   n_as,
-            struct ggml_tensor  * ids,
-            int                   id,
-            struct ggml_tensor  * b);
-
    // A: m columns, n rows,
    // B: p columns, n rows,
    // result is m columns, p rows
@ -1267,7 +1235,6 @@ extern "C" {
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

-    // supports 3D: a->ne[2] == b->ne[1]
    GGML_API struct ggml_tensor * ggml_get_rows(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,
@ -1316,14 +1283,6 @@ extern "C" {
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

-    // fused soft_max(a*scale + mask)
-    // mask is optional
-    GGML_API struct ggml_tensor * ggml_soft_max_ext(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            struct ggml_tensor  * mask,
-            float                 scale);
-
    GGML_API struct ggml_tensor * ggml_soft_max_back(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,
@ -1412,13 +1371,8 @@ extern "C" {
            int                   n_dims,
            int                   mode,
            int                   n_ctx,
-            int                   n_orig_ctx,
            float                 freq_base,
            float                 freq_scale,
-            float                 ext_factor,
-            float                 attn_factor,
-            float                 beta_fast,
-            float                 beta_slow,
            float                 xpos_base,
            bool                  xpos_down);

@ -1554,32 +1508,6 @@ extern "C" {
            struct ggml_tensor  * a,
            int                   scale_factor);

-    // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
-    GGML_API struct ggml_tensor * ggml_pad(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            int                  p0,
-            int                  p1,
-            int                  p2,
-            int                  p3);
-
-    // sort rows
-    enum ggml_sort_order {
-        GGML_SORT_ASC,
-        GGML_SORT_DESC,
-    };
-
-    GGML_API struct ggml_tensor * ggml_argsort(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            enum ggml_sort_order  order);
-
-    // top k elements per row
-    GGML_API struct ggml_tensor * ggml_top_k(
-            struct ggml_context * ctx,
-            struct ggml_tensor  * a,
-            int                   k);
-
    GGML_API struct ggml_tensor * ggml_flash_attn(
            struct ggml_context * ctx,
            struct ggml_tensor  * q,
@ -1641,6 +1569,7 @@ extern "C" {
            int                   kh);

    // used in sam
+
    GGML_API struct ggml_tensor * ggml_add_rel_pos(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,
@ -1815,7 +1744,7 @@ extern "C" {
    GGML_API struct ggml_cgraph * ggml_new_graph         (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
    GGML_API struct ggml_cgraph * ggml_new_graph_custom  (struct ggml_context * ctx, size_t size, bool grads);
    GGML_API struct ggml_cgraph * ggml_graph_dup         (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
-    GGML_API struct ggml_cgraph   ggml_graph_view        (struct ggml_cgraph * cgraph, int i0, int i1);
+    GGML_API struct ggml_cgraph * ggml_graph_view        (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
    GGML_API void                 ggml_graph_cpy         (struct ggml_cgraph * src, struct ggml_cgraph * dst);
    GGML_API void                 ggml_graph_reset       (struct ggml_cgraph * cgraph);  // zero grads
    GGML_API void                 ggml_graph_clear       (struct ggml_cgraph * cgraph);
@ -2111,7 +2040,6 @@ extern "C" {
    GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
    GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
    GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
-    GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
    GGML_API int          gguf_get_arr_n   (const struct gguf_context * ctx, int key_id);
    GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
    GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
--- a/whisper.cpp
+++ b/whisper.cpp
@ -850,8 +850,9 @@ struct whisper_context {
    int64_t t_load_us  = 0;
    int64_t t_start_us = 0;

-    ggml_type wtype = ggml_type::GGML_TYPE_F16; // weight type (FP32 / FP16 / QX)
-    ggml_type itype = ggml_type::GGML_TYPE_F16; // intermediate type (FP32 or FP16)
+    ggml_type wtype_e = ggml_type::GGML_TYPE_F16; // weight type (FP32 / FP16 / QX) Encoder
+    ggml_type wtype_d = ggml_type::GGML_TYPE_F16; // weight type (FP32 / FP16 / QX) Decoder
+    ggml_type itype   = ggml_type::GGML_TYPE_F16; // intermediate type (FP32 or FP16)

    whisper_context_params params;

@ -1063,7 +1064,7 @@ static ggml_backend_t whisper_backend_init(const whisper_context_params & params
 #ifdef GGML_USE_CUBLAS
    if (params.use_gpu && ggml_cublas_loaded()) {
        WHISPER_LOG_INFO("%s: using CUDA backend\n", __func__);
-        backend_gpu = ggml_backend_cuda_init(0);
+        backend_gpu = ggml_backend_cuda_init();
        if (!backend_gpu) {
            WHISPER_LOG_ERROR("%s: ggml_backend_cuda_init() failed\n", __func__);
        }
@ -1077,10 +1078,6 @@ static ggml_backend_t whisper_backend_init(const whisper_context_params & params
        backend_gpu = ggml_backend_metal_init();
        if (!backend_gpu) {
            WHISPER_LOG_ERROR("%s: ggml_backend_metal_init() failed\n", __func__);
-        } else if (!ggml_backend_metal_supports_family(backend_gpu, 7)) {
-            WHISPER_LOG_ERROR("%s: Metal GPU does not support family 7 - falling back to CPU\n", __func__);
-            ggml_backend_free(backend_gpu);
-            backend_gpu = NULL;
        }
    }
 #endif
@ -1172,8 +1169,8 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con

        // for the big tensors, we have the option to store the data in 16-bit floats or quantized
        // in order to save memory and also to speed up the computation
-        wctx.wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype));
-        if (wctx.wtype == GGML_TYPE_COUNT) {
+        wctx.wtype_e = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype));
+        if (wctx.wtype_e == GGML_TYPE_COUNT) {
            WHISPER_LOG_ERROR("%s: invalid model (bad ftype value %d)\n", __func__, model.hparams.ftype);
            return false;
        }
@ -1294,8 +1291,9 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
        WHISPER_LOG_INFO("%s: n_langs       = %d\n", __func__, vocab.num_languages());
    }

-    const ggml_type wtype = wctx.wtype;
-    const ggml_type vtype = wctx.wtype == GGML_TYPE_F32 ? GGML_TYPE_F32 : GGML_TYPE_F16; // conv type
+    const ggml_type wtype_e = wctx.wtype_e;
+    const ggml_type wtype_d = wctx.wtype_d;
+    const ggml_type vtype   = wctx.wtype_e == GGML_TYPE_F32 ? GGML_TYPE_F32 : GGML_TYPE_F16; // conv type

    // create the ggml context
    {
@ -1345,10 +1343,10 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
            model.e_pe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_audio_state, n_audio_ctx);

            model.e_conv_1_w     = ggml_new_tensor_3d(ctx, vtype,         3, n_mels,     n_audio_state);
-            model.e_conv_1_b     = ggml_new_tensor_2d(ctx, GGML_TYPE_F32,         1,     n_audio_state);
+            model.e_conv_1_b     = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2*n_audio_ctx, n_audio_state);

            model.e_conv_2_w     = ggml_new_tensor_3d(ctx, vtype,         3, n_audio_state, n_audio_state);
-            model.e_conv_2_b     = ggml_new_tensor_2d(ctx, GGML_TYPE_F32,                1, n_audio_state);
+            model.e_conv_2_b     = ggml_new_tensor_2d(ctx, GGML_TYPE_F32,    n_audio_ctx,   n_audio_state);

            model.e_ln_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_audio_state);
            model.e_ln_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_audio_state);
@ -1371,24 +1369,24 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
                layer.mlp_ln_w    = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);
                layer.mlp_ln_b    = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);

-                layer.mlp_0_w     = ggml_new_tensor_2d(ctx, wtype,           n_audio_state, 4*n_audio_state);
+                layer.mlp_0_w     = ggml_new_tensor_2d(ctx, wtype_e,         n_audio_state, 4*n_audio_state);
                layer.mlp_0_b     = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_audio_state);

-                layer.mlp_1_w     = ggml_new_tensor_2d(ctx, wtype,         4*n_audio_state, n_audio_state);
+                layer.mlp_1_w     = ggml_new_tensor_2d(ctx, wtype_e,       4*n_audio_state, n_audio_state);
                layer.mlp_1_b     = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);

                layer.attn_ln_0_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);
                layer.attn_ln_0_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);

-                layer.attn_q_w    = ggml_new_tensor_2d(ctx, wtype,           n_audio_state, n_audio_state);
+                layer.attn_q_w    = ggml_new_tensor_2d(ctx, wtype_e,         n_audio_state, n_audio_state);
                layer.attn_q_b    = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);

-                layer.attn_k_w    = ggml_new_tensor_2d(ctx, wtype,           n_audio_state, n_audio_state);
+                layer.attn_k_w    = ggml_new_tensor_2d(ctx, wtype_e,         n_audio_state, n_audio_state);

-                layer.attn_v_w    = ggml_new_tensor_2d(ctx, wtype,           n_audio_state, n_audio_state);
+                layer.attn_v_w    = ggml_new_tensor_2d(ctx, wtype_e,         n_audio_state, n_audio_state);
                layer.attn_v_b    = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);

-                layer.attn_ln_1_w = ggml_new_tensor_2d(ctx, wtype,           n_audio_state, n_audio_state);
+                layer.attn_ln_1_w = ggml_new_tensor_2d(ctx, wtype_e,         n_audio_state, n_audio_state);
                layer.attn_ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_audio_state);

                // map by name
@ -1421,7 +1419,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
        {
            model.d_pe   = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_text_state, n_text_ctx);

-            model.d_te   = ggml_new_tensor_2d(ctx, wtype,         n_text_state, n_vocab);
+            model.d_te   = ggml_new_tensor_2d(ctx, wtype_d,       n_text_state, n_vocab);

            model.d_ln_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_text_state);
            model.d_ln_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_text_state);
@ -1440,38 +1438,38 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
                layer.mlp_ln_w          = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);
                layer.mlp_ln_b          = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

-                layer.mlp_0_w           = ggml_new_tensor_2d(ctx, wtype,           n_text_state, 4*n_text_state);
+                layer.mlp_0_w           = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, 4*n_text_state);
                layer.mlp_0_b           = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4*n_text_state);

-                layer.mlp_1_w           = ggml_new_tensor_2d(ctx, wtype,         4*n_text_state, n_text_state);
+                layer.mlp_1_w           = ggml_new_tensor_2d(ctx, wtype_d,       4*n_text_state, n_text_state);
                layer.mlp_1_b           = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

                layer.attn_ln_0_w       = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);
                layer.attn_ln_0_b       = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

-                layer.attn_q_w          = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.attn_q_w          = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);
                layer.attn_q_b          = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

-                layer.attn_k_w          = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.attn_k_w          = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);

-                layer.attn_v_w          = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.attn_v_w          = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);
                layer.attn_v_b          = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

-                layer.attn_ln_1_w       = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.attn_ln_1_w       = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);
                layer.attn_ln_1_b       = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

                layer.cross_attn_ln_0_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);
                layer.cross_attn_ln_0_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

-                layer.cross_attn_q_w    = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.cross_attn_q_w    = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);
                layer.cross_attn_q_b    = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

-                layer.cross_attn_k_w    = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.cross_attn_k_w    = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);

-                layer.cross_attn_v_w    = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.cross_attn_v_w    = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);
                layer.cross_attn_v_b    = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

-                layer.cross_attn_ln_1_w = ggml_new_tensor_2d(ctx, wtype,           n_text_state, n_text_state);
+                layer.cross_attn_ln_1_w = ggml_new_tensor_2d(ctx, wtype_d,         n_text_state, n_text_state);
                layer.cross_attn_ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,   n_text_state);

                // map by name
@ -1578,25 +1576,29 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con

            auto tensor = model.tensors[name.data()];

-            if (ggml_nelements(tensor) != nelements) {
-                WHISPER_LOG_ERROR("%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
-                WHISPER_LOG_ERROR("%s: shape: [%d, %d, %d], expected: [%d, %d, %d]\n",
-                        __func__, ne[0], ne[1], ne[2], (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2]);
-                return false;
-            }
+            const bool is_conv_bias = (name == "encoder.conv1.bias" || name == "encoder.conv2.bias");

-            if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1] || tensor->ne[2] != ne[2]) {
-                WHISPER_LOG_ERROR("%s: tensor '%s' has wrong shape in model file: got [%d, %d, %d], expected [%d, %d, %d]\n",
-                        __func__, name.data(), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], ne[0], ne[1], ne[2]);
-                return false;
-            }
+            if (!is_conv_bias) {
+                if (ggml_nelements(tensor) != nelements) {
+                    WHISPER_LOG_ERROR("%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
+                    WHISPER_LOG_ERROR("%s: shape: [%d, %d, %d], expected: [%d, %d, %d]\n",
+                            __func__, ne[0], ne[1], ne[2], (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2]);
+                    return false;
+                }

-            const size_t bpe = ggml_type_size(ggml_type(ttype));
+                if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1] || tensor->ne[2] != ne[2]) {
+                    WHISPER_LOG_ERROR("%s: tensor '%s' has wrong shape in model file: got [%d, %d, %d], expected [%d, %d, %d]\n",
+                            __func__, name.data(), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], ne[0], ne[1], ne[2]);
+                    return false;
+                }

-            if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
-                WHISPER_LOG_ERROR("%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
-                        __func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
-                return false;
+                const size_t bpe = ggml_type_size(ggml_type(ttype));
+
+                if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
+                    WHISPER_LOG_ERROR("%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
+                            __func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
+                    return false;
+                }
            }

            ggml_backend_t backend = wctx.backend;
@ -1607,7 +1609,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
 #ifdef GGML_USE_METAL
                || ggml_backend_is_metal(backend)
 #endif
-                )) {
+                ) && !is_conv_bias) {
                // for the CPU and Metal backend, we can read directly into the tensor
                loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
                BYTESWAP_TENSOR(tensor);
@ -1615,7 +1617,24 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
                // read into a temporary buffer first, then copy to device memory
                read_buf.resize(ggml_nbytes(tensor));

-                loader->read(loader->context, read_buf.data(), read_buf.size());
+                // we repeat the 2 bias tensors along dim 0:
+                // [1, 512] -> [3000, 512] (conv1.bias)
+                // [1, 512] -> [1500, 512] (conv2.bias)
+                if (is_conv_bias) {
+                    loader->read(loader->context, read_buf.data(), read_buf.size() / tensor->ne[0]);
+
+                    float * data_f32 = (float *) read_buf.data();
+                    for (int64_t y = 0; y < tensor->ne[1]; ++y) {
+                        const int64_t yy = tensor->ne[1] - y - 1;
+                        const float val = data_f32[yy];
+
+                        for (int64_t x = 0; x < tensor->ne[0]; ++x) {
+                            data_f32[yy*tensor->ne[0] + x] = val;
+                        }
+                    }
+                } else {
+                    loader->read(loader->context, read_buf.data(), read_buf.size());
+                }

                ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
            }
@ -1716,11 +1735,21 @@ static struct ggml_cgraph * whisper_build_graph_conv(
        {
            cur = ggml_conv_1d_ph(ctx0, model.e_conv_1_w, mel, 1, 1);
            cur = ggml_add(ctx0, cur, model.e_conv_1_b);
+            //cur = ggml_add(ctx0,
+            //        ggml_repeat(ctx0,
+            //            model.e_conv_1_b,
+            //            cur),
+            //        cur);

            cur = ggml_gelu(ctx0, cur);

            cur = ggml_conv_1d_ph(ctx0, model.e_conv_2_w, cur, 2, 1);
            cur = ggml_add(ctx0, cur, model.e_conv_2_b);
+            //cur = ggml_add(ctx0,
+            //        ggml_repeat(ctx0,
+            //            model.e_conv_2_b,
+            //            cur),
+            //        cur);

            cur = ggml_gelu(ctx0, cur);
        }
@ -3500,7 +3529,7 @@ int whisper_encode(struct whisper_context * ctx, int offset, int n_threads) {
 int whisper_decode_with_state(struct whisper_context * ctx, struct whisper_state * state, const whisper_token * tokens, int n_tokens, int n_past, int n_threads) {
    whisper_batch_prep_legacy(state->batch, tokens, n_tokens, n_past, 0);

-    whisper_kv_cache_seq_rm(state->kv_self, 0, n_past, -1);
+    whisper_kv_cache_seq_rm(ctx->state->kv_self, 0, n_past, -1);

    if (!whisper_decode_internal(*ctx, *state, state->batch, n_threads, nullptr, nullptr)) {
        WHISPER_LOG_ERROR("%s: failed to eval\n", __func__);
@ -3513,10 +3542,19 @@ int whisper_decode_with_state(struct whisper_context * ctx, struct whisper_state
 int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, int n_tokens, int n_past, int n_threads) {
    if (ctx->state == nullptr) {
        WHISPER_LOG_ERROR("%s: ERROR state was not loaded.\n", __func__);
-        return -1;
+        return false;
    }

-    return whisper_decode_with_state(ctx, ctx->state, tokens, n_tokens, n_past, n_threads);
+    whisper_kv_cache_seq_rm(ctx->state->kv_self, 0, n_past, -1);
+
+    whisper_batch_prep_legacy(ctx->state->batch, tokens, n_tokens, n_past, 0);
+
+    if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->batch, n_threads, nullptr, nullptr)) {
+        WHISPER_LOG_ERROR("%s: failed to eval\n", __func__);
+        return 1;
+    }
+
+    return 0;
 }

 int whisper_tokenize(struct whisper_context * ctx, const char * text, whisper_token * tokens, int n_max_tokens) {
@ -3568,17 +3606,6 @@ const char * whisper_lang_str(int id) {
    return nullptr;
 }

-const char * whisper_lang_str_full(int id) {
-   for (const auto & kv : g_lang) {
-        if (kv.second.first == id) {
-            return kv.second.second.c_str();
-        }
-    }
-
-    WHISPER_LOG_ERROR("%s: unknown language id %d\n", __func__, id);
-    return nullptr;
-}
-
 int whisper_lang_auto_detect_with_state(
        struct whisper_context * ctx,
          struct whisper_state * state,
@ -5028,7 +5055,6 @@ int whisper_full_with_state(
    // basically don't process anything that is less than 1.0s
    // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
    if (seek_end < seek_start + (params.speed_up ? 50 : 100)) {
-        WHISPER_PRINT_DEBUG("%s: input is too short - %d ms < 1000 ms\n", __func__, (seek_end - seek_start)*10);
        return 0;
    }

@ -5164,10 +5190,10 @@ int whisper_full_with_state(
            const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start);

            params.progress_callback(
-                ctx, state, progress_cur, params.progress_callback_user_data);
+                ctx, ctx->state, progress_cur, params.progress_callback_user_data);
        }

-        // if only 1 second left, then stop
+        // of only 1 second left, then stop
        if (seek + 100 >= seek_end) {
            break;
        }
@ -5456,7 +5482,6 @@ int whisper_full_with_state(

                            // do not allow to go back in time
                            if (has_ts && seek_delta > seek_delta_new && result_len < i) {
-                                WHISPER_PRINT_DEBUG("%s: decoder %d: failed due to seek_delta (%d > %d)\n", __func__, j, seek_delta, seek_delta_new);
                                failed = true; // TODO: maybe this is not a failure ?
                                continue;
                            }
@ -5485,7 +5510,6 @@ int whisper_full_with_state(
                                if (seek + seek_delta + 100 >= seek_end) {
                                    result_len = i + 1;
                                } else {
-                                    WHISPER_PRINT_DEBUG("%s: decoder %d failed (result_len = 0)\n", __func__, j);
                                    failed = true;
                                    continue;
                                }
@ -5496,7 +5520,6 @@ int whisper_full_with_state(
                                seek_delta = 100*WHISPER_CHUNK_SIZE;
                            }

-                            WHISPER_PRINT_DEBUG("%s: decoder %d completed\n", __func__, j);
                            completed = true;
                            continue;
                        }
@ -5512,7 +5535,6 @@ int whisper_full_with_state(
                    // sometimes, the decoding can get stuck in a repetition loop
                    // this is an attempt to mitigate such cases - we flag the decoding as failed and use a fallback strategy
                    if (i == n_max - 1 && (result_len == 0 || seek_delta < 100*WHISPER_CHUNK_SIZE/2)) {
-                        WHISPER_PRINT_DEBUG("%s: decoder %d: failed due to repetition loop\n", __func__, j);
                        failed = true;
                        continue;
                    }
@ -5656,27 +5678,28 @@ int whisper_full_with_state(
                WHISPER_PRINT_DEBUG("%s: best decoder = %d\n", __func__, best_decoder_id);
            }

-            bool success = true;
-
            // was the decoding successful for the current temperature?
            // do fallback only if:
            // - we are not at the last temperature
-            if (it != (int) temperatures.size() - 1) {
+            // - we are not at the end of the audio (3 sec)
+            if (it != (int) temperatures.size() - 1 &&
+                seek_end - seek > 10*WHISPER_CHUNK_SIZE) {
+                bool success = true;
+
                const auto & decoder = state->decoders[best_decoder_id];

                if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
-                    WHISPER_PRINT_DEBUG("%s: failed due to avg_logprobs %8.5f < %8.5f\n", __func__, decoder.sequence.avg_logprobs, params.logprob_thold);
                    success = false;
                    state->n_fail_p++;
                }
-            }

-            if (success) {
-                //for (auto & token : ctx->decoders[best_decoder_id].sequence.tokens) {
-                //    WHISPER_PRINT_DEBUG("%s: token = %d, p = %6.3f, pt = %6.3f, ts = %s, str = %s\n", __func__, token.id, token.p, token.pt, ctx->vocab.id_to_token.at(token.tid).c_str(), ctx->vocab.id_to_token.at(token.id).c_str());
-                //}
+                if (success) {
+                    //for (auto & token : ctx->decoders[best_decoder_id].sequence.tokens) {
+                    //    WHISPER_PRINT_DEBUG("%s: token = %d, p = %6.3f, pt = %6.3f, ts = %s, str = %s\n", __func__, token.id, token.p, token.pt, ctx->vocab.id_to_token.at(token.tid).c_str(), ctx->vocab.id_to_token.at(token.id).c_str());
+                    //}

-                break;
+                    break;
+                }
            }

            WHISPER_PRINT_DEBUG("\n%s: failed to decode with temperature = %.2f\n", __func__, t_cur);
@ -6054,43 +6077,6 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
    // 1GB array
    const size_t size = arr*1e6;

-    double sum  = 0.0;
-
-    // heat-up
-    {
-        char * src = (char *) malloc(size);
-        char * dst = (char *) malloc(size);
-
-        for (size_t i = 0; i < size; i++) src[i] = i;
-
-        memcpy(dst, src, size); // heat-up
-
-        double tsum = 0.0;
-
-        for (size_t i = 0; i < n; i++) {
-            const int64_t t0 = ggml_time_us();
-
-            memcpy(dst, src, size);
-
-            const int64_t t1 = ggml_time_us();
-
-            tsum += (t1 - t0)*1e-6;
-
-            src[rand() % size] = rand() % 256;
-        }
-
-        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (heat-up)\n", (double) (n*size)/(tsum*1e9));
-        s += strbuf;
-
-        // needed to prevent the compiler from optimizing the memcpy away
-        {
-            for (size_t i = 0; i < size; i++) sum += dst[i];
-        }
-
-        free(src);
-        free(dst);
-    }
-
    // single-thread
    {
        char * src = (char *) malloc(size);
@ -6101,6 +6087,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
        memcpy(dst, src, size); // heat-up

        double tsum = 0.0;
+        double sum  = 0.0;

        for (size_t i = 0; i < n; i++) {
            const int64_t t0 = ggml_time_us();
@ -6114,73 +6101,21 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
            src[rand() % size] = rand() % 256;
        }

-        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s ( 1 thread)\n", (double) (n*size)/(tsum*1e9));
+        snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1e9));
        s += strbuf;

        // needed to prevent the compiler from optimizing the memcpy away
        {
            for (size_t i = 0; i < size; i++) sum += dst[i];
+
+            snprintf(strbuf, sizeof(strbuf), "sum:    %f\n", sum);
+            s += strbuf;
        }

        free(src);
        free(dst);
    }

-    // multi-thread
-
-    for (uint32_t k = 1; k <= n_threads; k++) {
-        char * src = (char *) malloc(size);
-        char * dst = (char *) malloc(size);
-
-        for (size_t i = 0; i < size; i++) src[i] = i;
-
-        memcpy(dst, src, size); // heat-up
-
-        double tsum = 0.0;
-
-        auto helper = [&](int th) {
-            const int64_t i0 = (th + 0)*size/k;
-            const int64_t i1 = (th + 1)*size/k;
-
-            for (size_t i = 0; i < n; i++) {
-                memcpy(dst + i0, src + i0, i1 - i0);
-
-                src[i0 + rand() % (i1 - i0)] = rand() % 256;
-            };
-        };
-
-        const int64_t t0 = ggml_time_us();
-
-        std::vector<std::thread> threads(k - 1);
-        for (uint32_t th = 0; th < k - 1; ++th) {
-            threads[th] = std::thread(helper, th);
-        }
-
-        helper(k - 1);
-
-        for (uint32_t th = 0; th < k - 1; ++th) {
-            threads[th].join();
-        }
-
-        const int64_t t1 = ggml_time_us();
-
-        tsum += (t1 - t0)*1e-6;
-
-        snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (%2d thread)\n", (double) (n*size)/(tsum*1e9), k);
-        s += strbuf;
-
-        // needed to prevent the compiler from optimizing the memcpy away
-        {
-            for (size_t i = 0; i < size; i++) sum += dst[i];
-        }
-
-        free(src);
-        free(dst);
-    }
-
-    snprintf(strbuf, sizeof(strbuf), "sum:    %f\n", sum);
-    s += strbuf;
-
    return s.c_str();
 }

--- a/whisper.h
+++ b/whisper.h
@ -50,9 +50,7 @@ extern "C" {
    //
    //     ...
    //
-    //     whisper_context_params cparams = whisper_context_default_params();
-    //
-    //     struct whisper_context * ctx = whisper_init_from_file_with_params("/path/to/ggml-base.en.bin", cparams);
+    //     struct whisper_context * ctx = whisper_init_from_file("/path/to/ggml-base.en.bin");
    //
    //     if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
    //         fprintf(stderr, "failed to process audio\n");
@ -315,9 +313,6 @@ extern "C" {
    // Return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found
    WHISPER_API const char * whisper_lang_str(int id);

-    // Return the short string of the specified language name (e.g. 2 -> "german"), returns nullptr if not found
-    WHISPER_API const char * whisper_lang_str_full(int id);
-
    // Use mel data at offset_ms to try and auto-detect the spoken language
    // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
    // Returns the top language id or negative on failure