tmp : demonstrate how to measure time of ggml ops

2025-06-24 17:15:19 +00:00 · 2023-03-09 09:28:06 +02:00
820 changed files with 21747 additions and 254929 deletions
--- a/.devops/cublas.Dockerfile
+++ b/.devops/cublas.Dockerfile
@ -1,28 +0,0 @@
-ARG UBUNTU_VERSION=22.04
-
-# This needs to generally match the container host's environment.
-ARG CUDA_VERSION=11.7.1
-
-# Target the CUDA build image
-ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
-
-FROM ${BASE_CUDA_DEV_CONTAINER} as build
-
-# Unless otherwise specified, we make a fat build.
-ARG CUDA_DOCKER_ARCH=all
-
-RUN apt-get update && \
-    apt-get install -y build-essential git cmake libsdl2-dev wget
-
-WORKDIR /app
-
-COPY . .
-
-# Set nvcc architecture
-ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV GGML_CUDA=1
-
-RUN make base.en
-
-ENTRYPOINT ["/app/main"]
--- a/.devops/main-cuda.Dockerfile
+++ b/.devops/main-cuda.Dockerfile
@ -1,40 +0,0 @@
-ARG UBUNTU_VERSION=22.04
-# This needs to generally match the container host's environment.
-ARG CUDA_VERSION=12.3.1
-# Target the CUDA build image
-ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
-# Target the CUDA runtime image
-ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
-
-FROM ${BASE_CUDA_DEV_CONTAINER} AS build
-WORKDIR /app
-
-# Unless otherwise specified, we make a fat build.
-ARG CUDA_DOCKER_ARCH=all
-# Set nvcc architecture
-ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV GGML_CUDA=1
-
-RUN apt-get update && \
-    apt-get install -y build-essential libsdl2-dev wget cmake \
-    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
-
-# Ref: https://stackoverflow.com/a/53464012
-ENV CUDA_MAIN_VERSION=12.3
-ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
-
-COPY .. .
-RUN make base.en
-
-FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
-ENV CUDA_MAIN_VERSION=12.3
-ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
-WORKDIR /app
-
-RUN apt-get update && \
-  apt-get install -y curl ffmpeg wget cmake \
-  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
-
-COPY --from=build /app /app
-ENTRYPOINT [ "bash", "-c" ]
--- a/.devops/main.Dockerfile
+++ b/.devops/main.Dockerfile
@ -1,19 +0,0 @@
-FROM ubuntu:22.04 AS build
-WORKDIR /app
-
-RUN apt-get update && \
-  apt-get install -y build-essential wget cmake \
-  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
-
-COPY .. .
-RUN make base.en
-
-FROM ubuntu:22.04 AS runtime
-WORKDIR /app
-
-RUN apt-get update && \
-  apt-get install -y curl ffmpeg libsdl2-dev wget cmake \
-  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
-
-COPY --from=build /app /app
-ENTRYPOINT [ "bash", "-c" ]
--- a/.github/workflows/bindings-go.yml
+++ b/.github/workflows/bindings-go.yml
@ -13,10 +13,10 @@ jobs:
  ubuntu-latest:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/setup-go@v5
+      - uses: actions/setup-go@v3
        with:
-          go-version: '^1.23'
-      - uses: actions/checkout@v4
+          go-version: '^1.19'
+      - uses: actions/checkout@v1
      - run: |
          cd bindings/go
          make test
--- a/.github/workflows/bindings-ruby.yml
+++ b/.github/workflows/bindings-ruby.yml
@ -3,53 +3,20 @@ on:
  push:
    paths:
      - bindings/ruby/**
-      - src/**/*.c
-      - src/**/*.cpp
-      - src/**/*.h
-      - src/**/*.m
-      - src/**/*.metal
-      - include/**/*.c
-      - include/**/*.cpp
-      - include/**/*.h
-      - include/**/*.m
-      - include/**/*.metal
-      - ggml/**/*.c
-      - ggml/**/*.cpp
-      - ggml/**/*.h
-      - ggml/**/*.m
-      - ggml/**/*.metal
-      - scripts/get-flags.mk
-      - examples/dr_wav.h
+      - whisper.h
  pull_request:
    paths:
      - bindings/ruby/**
-      - src/**/*.c
-      - src/**/*.cpp
-      - src/**/*.h
-      - src/**/*.m
-      - src/**/*.metal
-      - include/**/*.c
-      - include/**/*.cpp
-      - include/**/*.h
-      - include/**/*.m
-      - include/**/*.metal
-      - ggml/**/*.c
-      - ggml/**/*.cpp
-      - ggml/**/*.h
-      - ggml/**/*.m
-      - ggml/**/*.metal
-      - scripts/get-flags.mk
-      - examples/dr_wav.h
+      - whisper.h

 jobs:
  ubuntu-latest:
    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: bindings/ruby
    steps:
      - uses: ruby/setup-ruby@v1
        with:
-          ruby-version: '3.1'
-      - uses: actions/checkout@v4
-      - run: rake test
+          ruby-version: '3.0'
+      - uses: actions/checkout@v1
+      - run: |
+          cd bindings/ruby/ext
+          ruby extconf.rb && make
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -1,301 +1,118 @@
 name: CI
 on: [push, pull_request]

-env:
-  ubuntu_image: "ubuntu:22.04"
-  VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
-
 jobs:
  ubuntu-latest:
    runs-on: ubuntu-latest

-    strategy:
-      fail-fast: false
-      matrix:
-        arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
-
    steps:
      - name: Clone
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
+      - name: Dependencies
        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            apt update
-            apt install -y build-essential libsdl2-dev cmake
-            cmake -B build
-            cmake --build build --config Release -j $(nproc)'
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install libsdl2-dev
+
+      - name: Build
+        run: |
+          make
+          make stream

  macOS-latest:
    runs-on: macOS-latest

    steps:
      - name: Clone
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

      - name: Dependencies
        run: |
          brew update
-          brew install sdl2 cmake
+          brew install sdl2

      - name: Build
        run: |
-          cmake -B build
-          cmake --build build --config Release
-
-#  freeBSD-latest:
-#    runs-on: macos-12
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v4
-#
-#      - name: Build
-#        uses: cross-platform-actions/action@v0.24.0
-#        with:
-#          operating_system: freebsd
-#          version: '13.3'
-#          run: |
-#            sudo pkg update
-#            sudo pkg install -y gmake sdl2 cmake
-#            cmake -B build
-#            cmake --build build --config Release
+          make
+          make stream

  ubuntu-latest-gcc:
    runs-on: ubuntu-latest

    strategy:
-      fail-fast: false
      matrix:
        build: [Debug, Release]
-        arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]

    steps:
      - name: Clone
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
+      - name: Dependencies
        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            apt update
-            apt install -y build-essential cmake libsdl2-dev
-            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-            make
-            ctest -L gh --output-on-failure'
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install cmake
+          sudo apt-get install libsdl2-dev
+
+      - name: Configure
+        run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+
+      - name: Build
+        run: |
+          make
+          ctest -L gh --output-on-failure

  ubuntu-latest-clang:
    runs-on: ubuntu-latest

    strategy:
-      fail-fast: false
      matrix:
        build: [Debug, Release]
-        #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
-        # TODO: arm/v7 disabled due to clang bug
-        #       https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
-        arch: [linux/amd64, linux/arm64, linux/ppc64le]

    steps:
      - name: Clone
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
+      - name: Dependencies
        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            apt update
-            apt install -y clang build-essential cmake libsdl2-dev
-            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
-            make
-            ctest -L gh --output-on-failure'
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install cmake
+          sudo apt-get install libsdl2-dev
+
+      - name: Configure
+        run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
+
+      - name: Build
+        run: |
+          make
+          ctest -L gh --output-on-failure

  ubuntu-latest-gcc-sanitized:
    runs-on: ubuntu-latest

    strategy:
-      fail-fast: false
      matrix:
        sanitizer: [ADDRESS, THREAD, UNDEFINED]
-        arch: [linux/amd64]

    steps:
      - name: Clone
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
+      - name: Dependencies
        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            apt update
-            apt install -y build-essential cmake
-            cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
-            make
-            ctest -L gh --output-on-failure'
+          sudo apt-get update
+          sudo apt-get install build-essential
+          sudo apt-get install cmake

-  ubuntu-22-cmake-sycl:
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        dwhisper_sycl: [ON]
-        dcmake_c_compiler: [icx]
-        dcmake_cxx_compiler: [icpx]
-        arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
-
-    continue-on-error: true
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-
-      - name: add oneAPI to apt
-        shell: bash
-        run: |
-          cd /tmp
-          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-
-      - name: install oneAPI dpcpp compiler
-        shell: bash
-        run: |
-          sudo apt update
-          sudo apt install intel-oneapi-compiler-dpcpp-cpp
-
-      - name: install oneAPI MKL library
-        shell: bash
-        run: |
-          sudo apt install intel-oneapi-mkl-devel
-
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v4
+      - name: Configure
+        run: cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON

      - name: Build
-        id: cmake_build
        run: |
-          source /opt/intel/oneapi/setvars.sh
-          mkdir build
-          cd build
-          cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
-          cmake --build . --config Release -j $(nproc)
-
-  ubuntu-22-cmake-sycl-fp16:
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        dwhisper_sycl: [ON]
-        dcmake_c_compiler: [icx]
-        dcmake_cxx_compiler: [icpx]
-        arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
-
-    continue-on-error: true
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-
-      - name: add oneAPI to apt
-        shell: bash
-        run: |
-          cd /tmp
-          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-
-      - name: install oneAPI dpcpp compiler
-        shell: bash
-        run: |
-          sudo apt update
-          sudo apt install intel-oneapi-compiler-dpcpp-cpp
-
-      - name: install oneAPI MKL library
-        shell: bash
-        run: |
-          sudo apt install intel-oneapi-mkl-devel
-
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v4
-
-      - name: Build
-        id: cmake_build
-        run: |
-          source /opt/intel/oneapi/setvars.sh
-          mkdir build
-          cd build
-          cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
-          cmake --build . --config Release -j $(nproc)
-
-  windows-msys2:
-    runs-on: windows-latest
-
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - { sys: UCRT64,  env: ucrt-x86_64,  build: Release }
-          - { sys: CLANG64, env: clang-x86_64, build: Release }
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-
-      - name: Setup ${{ matrix.sys }}
-        uses: msys2/setup-msys2@v2
-        with:
-          update: true
-          msystem: ${{matrix.sys}}
-          install: >-
-            base-devel
-            mingw-w64-${{matrix.env}}-toolchain
-            mingw-w64-${{matrix.env}}-cmake
-            mingw-w64-${{matrix.env}}-SDL2
-            mingw-w64-${{matrix.env}}-openblas
-
-      - name: Build using CMake
-        shell: msys2 {0}
-        run: |
-            cmake -B build -DWHISPER_SDL2=ON
-            cmake --build build --config ${{ matrix.build }} -j $(nproc)
-
-      - name: Clean after building using CMake
-        shell: msys2 {0}
-        run: |
-            rm -rf build
-
-      - name: Build using CMake w/ OpenBLAS
-        shell: msys2 {0}
-        run: |
-            cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-            cmake --build build --config ${{ matrix.build }} -j $(nproc)
+          make
+          ctest -L gh --output-on-failure

  windows:
    runs-on: windows-latest
@ -308,19 +125,17 @@ jobs:
        include:
          - arch: Win32
            s2arc: x86
-            jnaPath: win32-x86
          - arch: x64
            s2arc: x64
-            jnaPath: win32-x86-64
          - sdl2: ON
-            s2ver: 2.28.5
+            s2ver: 2.26.0

    steps:
      - name: Clone
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

      - name: Add msbuild to PATH
-        uses: microsoft/setup-msbuild@v2
+        uses: microsoft/setup-msbuild@v1

      - name: Fetch SDL2 and set SDL2_DIR
        if: matrix.sdl2 == 'ON'
@ -333,7 +148,7 @@ jobs:
        run: >
          cmake -S . -B ./build -A ${{ matrix.arch }}
          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DWHISPER_SDL2=${{ matrix.sdl2 }}
+          -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}

      - name: Build
        run: |
@ -344,15 +159,9 @@ jobs:
        if: matrix.sdl2 == 'ON'
        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}

-      - name: Upload dll
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.jnaPath }}_whisper.dll
-          path: build/bin/${{ matrix.build }}/whisper.dll
-
      - name: Upload binaries
        if: matrix.sdl2 == 'ON'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v1
        with:
          name: whisper-bin-${{ matrix.arch }}
          path: build/bin/${{ matrix.build }}
@ -368,31 +177,29 @@ jobs:
        sdl2: [ON]
        include:
          - arch: Win32
+            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
            s2arc: x86
          - arch: x64
+            obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
            s2arc: x64
          - sdl2: ON
-            s2ver: 2.28.5
+            s2ver: 2.26.0

    steps:
      - name: Clone
-        uses: actions/checkout@v4
-
-      - name: Export GitHub Actions cache environment variables
-        uses: actions/github-script@v7
-        with:
-          script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
-            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
+        uses: actions/checkout@v1

      - name: Add msbuild to PATH
-        uses: microsoft/setup-msbuild@v2
+        uses: microsoft/setup-msbuild@v1

-      - name: Install OpenBLAS and pkgconfiglite
+      - name: Fetch OpenBLAS
        if: matrix.blas == 'ON'
        run: |
-          vcpkg install --triplet=${{ matrix.s2arc }}-windows openblas
-          choco install pkgconfiglite
+          C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
+          7z x blas.zip -oblas -y
+          copy blas/include/cblas.h .
+          copy blas/include/openblas_config.h .
+          echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV

      - name: Fetch SDL2 and set SDL2_DIR
        if: matrix.sdl2 == 'ON'
@ -404,20 +211,19 @@ jobs:
      - name: Configure
        run: >
          cmake -S . -B ./build -A ${{ matrix.arch }}
-          -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DGGML_BLAS=${{ matrix.blas }}
-          -DGGML_BLAS_VENDOR=OpenBLAS
-          -DWHISPER_SDL2=${{ matrix.sdl2 }}
+          -DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
+          -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
+          -DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}

      - name: Build
        run: |
          cd ./build
          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}

-      - name: Copy openblas.dll
+      - name: Copy libopenblas.dll
        if: matrix.blas == 'ON'
-        run: copy "C:/vcpkg/packages/openblas_${{ matrix.s2arc }}-windows/bin/openblas.dll" build/bin/${{ matrix.build }}
+        run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}

      - name: Copy SDL2.dll
        if: matrix.sdl2 == 'ON'
@ -425,78 +231,11 @@ jobs:

      - name: Upload binaries
        if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v1
        with:
          name: whisper-blas-bin-${{ matrix.arch }}
          path: build/bin/${{ matrix.build }}

-# TODO: fix and re-enable
-#  windows-cublas:
-#    runs-on: windows-2019
-#
-#    strategy:
-#      matrix:
-#        build: [Release]
-#        arch: [x64]
-#        cublas: [ON]
-#        sdl2: [ON]
-#        cuda-toolkit: [12.2.0, 11.8.0]
-#        include:
-#          - arch: x64
-#            s2arc: x64
-#          - sdl2: ON
-#            s2ver: 2.28.5
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v4
-#
-#      - name: Add msbuild to PATH
-#        uses: microsoft/setup-msbuild@v2
-#
-#      - name: Install CUDA Toolkit
-#        id: cuda-toolkit
-#        uses: Jimver/cuda-toolkit@v0.2.15
-#        with:
-#          cuda: '${{ matrix.cuda-toolkit }}'
-#
-#      - name: Fetch SDL2 and set SDL2_DIR
-#        if: matrix.sdl2 == 'ON'
-#        run: |
-#          C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
-#          7z x sdl2.zip
-#          echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-#
-#      - name: Configure
-#        run: >
-#          cmake -S . -B ./build -A ${{ matrix.arch }}
-#          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-#          -DGGML_CUDA=${{ matrix.cublas }}
-#          -DWHISPER_SDL2=${{ matrix.sdl2 }}
-#
-#      - name: Build ${{ matrix.cuda-toolkit }}
-#        run: |
-#          cd ./build
-#          cmake --build . --config ${{ matrix.build }}
-#
-#      - name: Copy CUDA DLLs
-#        run: >
-#          Copy-Item -PassThru
-#          -Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
-#          -Include cudart64_*,cublas64_*,cublasLt64_*
-#          -Destination build/bin/${{ matrix.build }}
-#
-#      - name: Copy SDL2.dll
-#        if: matrix.sdl2 == 'ON'
-#        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
-#
-#      - name: Upload binaries
-#        if: matrix.sdl2 == 'ON'
-#        uses: actions/upload-artifact@v4
-#        with:
-#          name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
-#          path: build/bin/${{ matrix.build }}
-
  emscripten:
    runs-on: ubuntu-latest

@ -506,173 +245,23 @@ jobs:

    steps:
      - name: Clone
-        uses: actions/checkout@v4
+        uses: actions/checkout@v1

-      - name: Setup emsdk
-        uses: mymindstorm/setup-emsdk@v14
-
-      - name: Verify
-        run: emcc -v
-
-      - name: Build
+      - name: Dependencies
        run: |
-          emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          make
-
-  ios-xcode-build:
-    runs-on: macos-latest
-
-    strategy:
-      matrix:
-        build: [Release]
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
+          wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
+          tar -xvf master.tar.gz
+          emsdk-master/emsdk update
+          emsdk-master/emsdk install latest
+          emsdk-master/emsdk activate latest

      - name: Configure
-        run: |
-          cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
-          mkdir models/ggml-base.en-encoder.mlmodelc
-
-      - name: Build
-        id: cmake_build
-        run: |
-          sysctl -a
-          mkdir build
-          cd build
-          cmake -G Xcode .. \
-            -DGGML_METAL_USE_BF16=ON \
-            -DGGML_METAL_EMBED_LIBRARY=ON \
-            -DWHISPER_BUILD_EXAMPLES=OFF \
-            -DWHISPER_BUILD_TESTS=OFF \
-            -DWHISPER_BUILD_SERVER=OFF \
-            -DCMAKE_SYSTEM_NAME=iOS \
-            -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
-          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
-          sudo cmake --install . --config Release
-
-      - name: xcodebuild for swift package
-        id: xcodebuild
-        run: |
-          xcodebuild -scheme whisper-Package -destination 'generic/platform=iOS'
-
-#- name: Build objc example
-#  run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos build
-
-      - name: Build swiftui example
-        run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
-
-  android:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-        with:
-          path: whisper
-
-      - name: Install Java
-        uses: actions/setup-java@v4
-        with:
-          distribution: zulu
-          java-version: 21
-
-      - name: Setup Android SDK
-        uses: android-actions/setup-android@v3
+        run: echo "tmp"

      - name: Build
        run: |
-          cd whisper/examples/whisper.android
-          ./gradlew assembleRelease --no-daemon
-
-      - name: Build with external ggml
-        run: |
-          export PATH_TO_GGML=$PWD/ggml
-          cd whisper/examples/whisper.android
-          ./gradlew assembleRelease --no-daemon
-
-# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
-#  android_java:
-#    runs-on: ubuntu-latest
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v4
-#
-#      - name: set up JDK 11
-#        uses: actions/setup-java@v4
-#        with:
-#          java-version: '11'
-#          distribution: 'temurin'
-#          cache: gradle
-#
-#      - name: Setup Android SDK
-#        uses: android-actions/setup-android@v3
-#        with:
-#          cmdline-tools-version: 9.0
-#
-#      - name: Build
-#        run: |
-#          cd examples/whisper.android.java
-#          chmod +x ./gradlew
-#          ./gradlew assembleRelease
-
-# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
-#  java:
-#    needs: [ 'windows' ]
-#    runs-on: windows-latest
-#    steps:
-#      - uses: actions/checkout@v4
-#
-#      - name: Install Java
-#        uses: actions/setup-java@v4
-#        with:
-#          distribution: zulu
-#          java-version: 20
-#
-#      - name: Download Windows lib
-#        uses: actions/download-artifact@v4
-#        with:
-#          name: win32-x86-64_whisper.dll
-#          path: bindings/java/build/generated/resources/main/win32-x86-64
-#
-#      - name: Build
-#        run: |
-#          models\download-ggml-model.cmd tiny.en
-#          cd bindings/java
-#          chmod +x ./gradlew
-#          ./gradlew build
-#
-#      - name: Upload jar
-#        uses: actions/upload-artifact@v4
-#        with:
-#          name: whispercpp.jar
-#          path: bindings/java/build/libs/whispercpp-*.jar
-#
-#      - name: Publish package
-#        if: ${{ github.ref == 'refs/heads/master' }}
-#        uses: gradle/gradle-build-action@v2.4.2
-#        with:
-#          arguments: publish
-#          build-root-directory: bindings/java
-#        env:
-#          MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
-#          MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
-#          PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
-#          PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
-
-  quantize:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v4
-
-      - name: Test quantize
-        run: |
-          ./models/download-ggml-model.sh tiny.en
-          cmake -B build
-          cmake --build build --config Release
-          ./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
+          pushd emsdk-master
+          source ./emsdk_env.sh
+          popd
+          emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+          make
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -1,59 +0,0 @@
-name: Publish Docker image
-
-on:
-  pull_request:
-  push:
-    branches:
-      - master
-
-jobs:
-  push_to_registry:
-    name: Push Docker image to Docker Hub
-    if: github.event.pull_request.draft == false
-
-    runs-on: ubuntu-latest
-    env:
-      COMMIT_SHA: ${{ github.sha }}
-    strategy:
-      matrix:
-        config:
-          - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
-          #TODO: the cuda image keeps failing - disable for now
-          #      https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
-          #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
-
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v3
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Log in to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Build and push Docker image (versioned)
-        if: github.event_name == 'push'
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          platforms: ${{ matrix.config.platform }}
-          tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
-          file: ${{ matrix.config.dockerfile }}
-
-      - name: Build and push Docker image (tagged)
-        uses: docker/build-push-action@v4
-        with:
-          context: .
-          push: ${{ github.event_name == 'push' }}
-          platforms: ${{ matrix.config.platform }}
-          tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
-          file: ${{ matrix.config.dockerfile }}
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@ -37,7 +37,7 @@ jobs:
        run: npm install

      - name: Compile addon.node
-        run: npx cmake-js compile -T addon.node -B Release
+        run: npx cmake-js compile -T whisper-addon -B Release

      - name: Download test model
        run: |
--- a/.gitignore
+++ b/.gitignore
@ -1,37 +1,24 @@
 *.o
 *.a
-*.d
 .cache/
-.coreml/
-.test/
-.venv/
 .vs/
 .vscode/
 .DS_Store
-.vimspector.json
-/CMakeSettings.json
-/talk-llama.dSYM/

 build/
-build-*/
-
-# SPM
-.build/
-.swiftpm
-*.metallib
-
-ggml-metal-embed.metal
-ggml-metal-embed.metal.tmp
+build-em/
+build-debug/
+build-release/
+build-static/
+build-no-accel/
+build-sanitize-addr/
+build-sanitize-thread/

 /main
 /stream
 /command
 /talk
-/talk-llama
 /bench
-/quantize
-/server
-/lsp

 arm_neon.h
 sync.sh
@ -45,16 +32,3 @@ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
 examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata

 extra/bench-gg.txt
-
-models/*.mlmodel
-models/*.mlmodelc
-models/*.mlpackage
-bindings/java/.gradle/
-bindings/java/.idea/
-.idea/
-
-benchmark_results.csv
-cmake-build-debug/
-.cxx/
-.gradle/
-local.properties
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "bindings/ios"]
+	path = bindings/ios
+	url = https://github.com/ggerganov/whisper.spm
--- a/301
+++ b/301
@ -1,301 +0,0 @@
-# date: Tue Apr  9 20:27:03 EEST 2024
-# this file is auto-generated by scripts/gen-authors.sh
-
-0/0 <zero@imaskeleton.me>
-0cc4m <picard12@live.de>
-0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
-AT <manyoso@users.noreply.github.com>
-Aarni Koskela <akx@iki.fi>
-Aaron Pham <29749331+aarnphm@users.noreply.github.com>
-Aaron Taylor <aaron@exphat.com>
-Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
-Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
-AfryMask <AfryMask@163.com>
-Ahmad Bilal <ahmad.bilal@empglabs.com>
-AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
-Akash Mahajan <akash7190@gmail.com>
-Akash Mahajan <akashmjn@stanford.edu>
-Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
-Alan <unknown>
-Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
-Alex Azarov <alex@azarov.by>
-Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
-Alex Evgrashin <aevgrashin@yandex.ru>
-Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
-Alexandru Mariuti <alex@mariuti.com>
-Alexey Kharlamov <alexey@kharlamov.biz>
-Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
-Ali Alameh <ali.alameh@isae.edu.lb>
-Ananta Bastola <anantarajbastola@gmail.com>
-Andreu Huguet <andreuhuguet@gmail.com>
-Andrew Huynh <a5thuynh@gmail.com>
-Andrew S <andrews54757@gmail.com>
-Andy Maloney <asmaloney@gmail.com>
-Anton Kostin <masguit42@users.noreply.github.com>
-Artyom Mezin <psycho.fading@gmail.com>
-Asad Memon <asad.lionpk@gmail.com>
-Ashraful Islam <ashraful.meche@gmail.com>
-AsukaMinato <asukaminato@nyan.eu.org>
-AustinMroz <austinmroz@utexas.edu>
-Avik Sengupta <avik@sengupta.net>
-Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
-Baffin Lee <baffinlee@gmail.com>
-Ben Nortier <bjnortier@gmail.com>
-Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
-Bo-Yi Wu <appleboy.tw@gmail.com>
-Boris Bliznioukov <blib@mail.com>
-Borislav Stanimirov <b.stanimirov@abv.bg>
-Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
-Brian Murray <brian@bmurray.ca>
-CRD716 <crd716@gmail.com>
-Canis Lupus <Canis-UK@users.noreply.github.com>
-Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
-ChangSeok Oh <shivamidow@users.noreply.github.com>
-Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
-Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
-Chidi Williams <williamschidi1@gmail.com>
-Christian <12550267+iceychris@users.noreply.github.com>
-Clifford Heath <clifford.heath@gmail.com>
-Colin <github@whoisc.cc>
-DGdev91 <DGdev91@users.noreply.github.com>
-Damian Czaja <trojan295@protonmail.com>
-Daniel Bevenius <daniel.bevenius@gmail.com>
-David <dnhkng@gmail.com>
-David Thorpe <djt@mutablelogic.com>
-Davidson Francis <davidsondfgl@gmail.com>
-Dener Stassun <denerstassun@gmail.com>
-Didzis Gosko <didzis@users.noreply.github.com>
-Digipom <admin@digipom.com>
-Dimo <dimo@ieee.org>
-Dody Suria Wijaya <dodysw@gmail.com>
-Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
-Duncan McConnell <ddmcconnell4@gmail.com>
-Egor Egorov <me@egorfine.com>
-Elkana Bardugo <ttv200@gmail.com>
-Emmanuel Schmidbauer <eschmidbauer@gmail.com>
-Engininja2 <139037756+Engininja2@users.noreply.github.com>
-Eric Swanson <eswanson@alloscomp.com>
-Eric Tendian <erictendian@gmail.com>
-Erik Scholz <Green-Sky@users.noreply.github.com>
-Evan Jones <evan.q.jones@gmail.com>
-Evan Martin <evan.martin@gmail.com>
-Eve <139727413+netrunnereve@users.noreply.github.com>
-Evgeny Kuznetsov <evgeny@kuznetsov.md>
-F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
-Fangjun Kuang <csukuangfj@gmail.com>
-Felix <stenbackfelix@gmail.com>
-Finn Voorhees <finnvoorhees@gmail.com>
-FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
-Gang Chen <goncha@gmail.com>
-Gavin Cai <gavin1818@hotmail.com>
-George Hindle <george@georgehindle.com>
-Georgi Gerganov <ggerganov@gmail.com>
-GitAritron <103900385+GitAritron@users.noreply.github.com>
-GiviMAD <GiviMAD@users.noreply.github.com>
-Gleicon Moraes <gleicon@gmail.com>
-Gregor Jasny <gjasny@googlemail.com>
-Guillaume Wenzek <gwenzek@users.noreply.github.com>
-HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
-Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
-Hang <bebound@gmail.com>
-Herman Semenov <GermanAizek@yandex.ru>
-Hrishikesh Barman <geekodour@users.noreply.github.com>
-Ian Bicking <ian@ianbicking.org>
-Ian Bull <irbull@eclipsesource.com>
-Ikko Ashimine <eltociear@gmail.com>
-InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
-Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
-Ivan Gorin <ivangorin21@gmail.com>
-JJ <103335846+computerscienceiscool@users.noreply.github.com>
-Jack Mousseau <jmousseau@users.noreply.github.com>
-JacobLinCool <jacoblincool@gmail.com>
-Jakub Ráček <blizzcz@gmail.com>
-Jared Van Bortel <jared@nomic.ai>
-Jay Binks <jaybinks@gmail.com>
-Jhen-Jie Hong <developer@jhen.me>
-Jhen-Jie Hong <iainst0409@gmail.com>
-JidongZhang-THU <1119708529@qq.com>
-Jo Liss <joliss42@gmail.com>
-Johan <jr.raffin@gmail.com>
-Johannes Gäßler <johannesg@5d6.de>
-John Balis <phobossystems@gmail.com>
-Jonathan Soo <jcsoo@agora.com>
-Jonno <1160532+razodactyl@users.noreply.github.com>
-Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
-Jose <34888496+Jerry-Master@users.noreply.github.com>
-Josh Bleecher Snyder <josharian@gmail.com>
-Judd <foldl@users.noreply.github.com>
-Jumper775 <78500318+jumpers775@users.noreply.github.com>
-Justine Tunney <jtunney@gmail.com>
-KP Kaiser <kirk@zothcorp.com>
-Kamilake <exjang0@gmail.com>
-Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
-Kasumi <90275229+kasumi-1@users.noreply.github.com>
-Kawrakow <48489457+ikawrakow@users.noreply.github.com>
-Kevin Brothaler <admin@digipom.com>
-Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
-Kreijstal <rainb@tfwno.gf>
-Kylin <56434533+KyL0N@users.noreply.github.com>
-LBlue <153975653+lbluep@users.noreply.github.com>
-Larry Battle <larry.battle.tech@gmail.com>
-Laytan Laats <laytanlaats@hotmail.com>
-Leo Moll <leo.moll@yeasoft.com>
-Lexevolution <31176843+Lexevolution@users.noreply.github.com>
-LittleLoli <26589867+WhichWho@users.noreply.github.com>
-Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
-Luis Herrera <herrera-luis@users.noreply.github.com>
-Lukas Rist <glaslos@gmail.com>
-M. A. Ali <73258591+MightyStud@users.noreply.github.com>
-M. Eren Akbiyik <erenakbiyik@gmail.com>
-Maciek <maciek.mab122@gmail.com>
-Marcin Mielniczuk <marmistrz.dev@zoho.eu>
-Martin Warnaar <martinwarnaar@gmail.com>
-Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
-Mathijs de Bruin <mathijs@mathijsfietst.nl>
-Matija Pevec <mightymatth@users.noreply.github.com>
-Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
-Meng, Hengyu <hengyu.meng@intel.com>
-Michael Podvitskiy <podvitskiymichael@gmail.com>
-Michael Rienstra <mrienstra@gmail.com>
-Mikhail Grigorev <sleuthhound@gmail.com>
-Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
-Mohit Agarwal <mohit@sdf.org>
-Murilo Santana <mvrilo@gmail.com>
-Neil Chudleigh <nchudleigh@users.noreply.github.com>
-Neo Zhang Jianyu <jianyu.zhang@intel.com>
-Neuman Vong <neuman.vong@gmail.com>
-Nicholas Albion <nalbion@yahoo.com>
-Niels Mayer <Niels.Mayer@gmail.com>
-Okabintaro <103938900+Okabintaro@users.noreply.github.com>
-Oleg Sidorov <me@whitebox.io>
-Oleg Sidorov <oleg@sidorov.nl>
-Ondrej Kokes <ondrej.kokes@gmail.com>
-Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
-Paul Tsochantaris <ptsochantaris@icloud.com>
-Philipp Zabel <philipp.zabel@gmail.com>
-Philippe Normand <phil@base-art.net>
-Przemysław Pawełczyk <przemoc@gmail.com>
-Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
-Radosław Gryta <radek.gryta@gmail.com>
-Reinforce-II <fate@eastal.com>
-Reinis Muiznieks <muiznieks.reinis@gmail.com>
-RelatedTitle <r3latedtitle@gmail.com>
-RhinoDevel <RhinoDevel@users.noreply.github.com>
-Rich Jones <miserlou@gmail.com>
-Robin <robin.xw@hotmail.com>
-Roddur Dasgupta <roddurd@gmail.com>
-Roland Rabien <figbug@gmail.com>
-Rotem Dan <rotemdan@gmail.com>
-Ryan Hitchman <hitchmanr@gmail.com>
-Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
-RyanChang <ftes90015@gmail.com>
-Sam <49637763+Onlyartist9@users.noreply.github.com>
-Sam Pullara <spullara@gmail.com>
-Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
-Sergio López <slp@sinrega.org>
-Siddharth Ramakrishnan <srr2141@columbia.edu>
-Simon Moisselin <simon.moisstoll@gmail.com>
-Sindre Sorhus <sindresorhus@gmail.com>
-Slava Primenko <primenko.s@gmail.com>
-Syahmi Azhar <prsyahmi@gmail.com>
-Syed Jafri <syedjafri97@gmail.com>
-Sơn Phan Trung <phantrungson17@gmail.com>
-Taisei Mima <bhbstar.me@gmail.com>
-Takeshi Inoue <inoue.takeshi@gmail.com>
-Tamotsu Takahashi <ttakah+github@gmail.com>
-Taras Glek <taras@thegp.com>
-Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
-Thijs Raymakers <thijs@raymakers.nl>
-Thomas Fitzsimmons <fitzsim@fitzsim.org>
-Tiago Fassoni <tiagofassoni@users.noreply.github.com>
-Tienshiao Ma <tienshiao@tienshiao.org>
-Timothy Cronin <40186632+4imothy@users.noreply.github.com>
-Tobrun <tobrun.van.nuland@gmail.com>
-Todd <taf2@users.noreply.github.com>
-Tong Li <31761981+litongjava@users.noreply.github.com>
-Topping1 <78745143+Topping1@users.noreply.github.com>
-Travis Cline <travis.cline@gmail.com>
-UEXTM.com <84163508+uextm@users.noreply.github.com>
-Vadim Peretokin <vperetokin@hey.com>
-Valentin Gosu <1454649+valenting@users.noreply.github.com>
-Vulcan <93451215+trholding@users.noreply.github.com>
-WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
-Xiang (Kevin) Li <kevinli020508@gmail.com>
-Xiao-Yong Jin <jinxiaoyong@gmail.com>
-XiaotaoChen <chenxiaotao1234@gmail.com>
-Yajing Tang <phillis@google.com>
-Yang Shen <aplshenyang@gmail.com>
-Yunès <jean.baptiste.yunes@free.fr>
-ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
-Zigfrid Zvezdin <ziggerZZ@gmail.com>
-Zollner <24618122+Zolliner@users.noreply.github.com>
-ai-at-home <149282006+ai-at-home@users.noreply.github.com>
-alonfaraj <alonfaraj@gmail.com>
-andypayne <apayne@gmail.com>
-ardfork <134447697+ardfork@users.noreply.github.com>
-automaticcat <daogiatuank54@gmail.com>
-be-next <jerome.ramette@gmail.com>
-bert hubert <bert@hubertnet.nl>
-bmwl <brian.marshall@tolko.com>
-bobqianic <129547291+bobqianic@users.noreply.github.com>
-bocytko <bocytko+github@gmail.com>
-boolemancer <48014766+boolemancer@users.noreply.github.com>
-boolemancer <boolemancer@gmail.com>
-bradmit <151883577+bradmit@users.noreply.github.com>
-brunofaustino <b.fa.amorim@gmail.com>
-bssrdf <merlintiger@hotmail.com>
-byte-6174 <88070277+byte-6174@users.noreply.github.com>
-cdosoftei <ciprian.dosoftei@gmail.com>
-clach04 <Chris.Clark@actian.com>
-compilade <113953597+compilade@users.noreply.github.com>
-conradg <conradjgodfrey@gmail.com>
-ddpasa <112642920+ddpasa@users.noreply.github.com>
-denersc <denerstassun@gmail.com>
-dscripka <dscripka@users.noreply.github.com>
-duthils <duthils@duthils.net>
-ecneladis <ecneladis@users.noreply.github.com>
-faker <nspyia2002@gmail.com>
-fitzsim <fitzsim@fitzsim.org>
-fraxy-v <65565042+fraxy-v@users.noreply.github.com>
-genevera (she/her) <genevera@users.noreply.github.com>
-geniusnut <geniusnut@gmail.com>
-greeshmay <greeshmay@gmail.com>
-hydai <z54981220@gmail.com>
-iamthad <thadeus.j.fleming@gmail.com>
-james wolf <contractorwolf@hotmail.com>
-joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
-jorismertz <35079666+jorismertz@users.noreply.github.com>
-junkfood <69683722+JunkFood02@users.noreply.github.com>
-jwijffels <jwijffels@bnosac.be>
-kamranjon <kamranjon@gmail.com>
-katsu560 <katsu560oo-@docomo.ne.jp>
-kennethge <57784063+kenneth-ge@users.noreply.github.com>
-keyehzy <msamuel@aluno.puc-rio.br>
-leejet <leejet714@gmail.com>
-litong <31761981+litongjava@users.noreply.github.com>
-lnyan <lkwq007@gmail.com>
-m.bell <m.bell@techsmith.com>
-mkiol <mkiol@users.noreply.github.com>
-novag <7754358+novag@users.noreply.github.com>
-pajowu <pajowu@pajowu.de>
-polarmoon <90010972+polarmoon@users.noreply.github.com>
-rlapray <lapray.romain@gmail.com>
-sandrohanea <40202887+sandrohanea@users.noreply.github.com>
-semiformal-net <84111142+semiformal-net@users.noreply.github.com>
-shibukazu <61775791+shibukazu@users.noreply.github.com>
-shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
-slaren <slarengh@gmail.com>
-slashlib <slashlib@users.noreply.github.com>
-snadampal <87143774+snadampal@users.noreply.github.com>
-st-gr <38470677+st-gr@users.noreply.github.com>
-texmex76 <40733439+texmex76@users.noreply.github.com>
-thefinaldegree <thefinaldegree@gmail.com>
-trixirt <trix@redhat.com>
-ulatekh <ulatekh@yahoo.com>
-undef <undefdev@gmail.com>
-venkr <venkateshrameshkumar+1@gmail.com>
-vicalloy <zbirder@gmail.com>
-xdrudis <xavierdrudis@yahoo.es>
-zhouwg <6889919+zhouwg@users.noreply.github.com>
-布客飞龙 <562826179@qq.com>
-Артём Земляк <azemlyak@smart-consulting.ru>
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,31 +1,21 @@
-cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
-project("whisper.cpp" C CXX)
-project("whisper.cpp" VERSION 1.7.3)
-include(CheckIncludeFileCXX)
+cmake_minimum_required (VERSION 3.0)

-set(SOVERSION 1)
-
-#set(CMAKE_WARN_DEPRECATED YES)
-set(CMAKE_WARN_UNUSED_CLI YES)
-
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-
-if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
-    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
-endif()
+project(whisper.cpp VERSION 1.2.1)

 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")

 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
    set(WHISPER_STANDALONE ON)
-
-    include(git-vars)
+    include(GitVars)
+    include(BuildTypes)

    # configure project version
+    if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
+        configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
+    endif()
    configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
 else()
    set(WHISPER_STANDALONE OFF)
@ -35,11 +25,6 @@ if (EMSCRIPTEN)
    set(BUILD_SHARED_LIBS_DEFAULT OFF)

    option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
-
-    # TODO: without these, we get the following error:
-    #       wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
-    set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -s TOTAL_STACK=5242880")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
 else()
    if (MINGW)
        set(BUILD_SHARED_LIBS_DEFAULT OFF)
@ -48,136 +33,221 @@ else()
    endif()
 endif()

-option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
+# options

-#
-# option list
-#
+option(BUILD_SHARED_LIBS               "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})

-# general
-option(WHISPER_CCACHE "whisper: use ccache if available" ON)
+option(WHISPER_ALL_WARNINGS            "whisper: enable all compiler warnings"                   ON)
+option(WHISPER_ALL_WARNINGS_3RD_PARTY  "whisper: enable all compiler warnings in 3rd party libs" OFF)

-# debug
-option(WHISPER_ALL_WARNINGS           "whisper: enable all compiler warnings"                   ON)
-option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
+option(WHISPER_SANITIZE_THREAD         "whisper: enable thread sanitizer"    OFF)
+option(WHISPER_SANITIZE_ADDRESS        "whisper: enable address sanitizer"   OFF)
+option(WHISPER_SANITIZE_UNDEFINED      "whisper: enable undefined sanitizer" OFF)

-# build
-option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
+option(WHISPER_BUILD_TESTS             "whisper: build tests"    ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_EXAMPLES          "whisper: build examples" ${WHISPER_STANDALONE})
+
+option(WHISPER_SUPPORT_SDL2            "whisper: support for libSDL2" OFF)
+
+if (APPLE)
+    option(WHISPER_NO_ACCELERATE       "whisper: disable Accelerate framework" OFF)
+    option(WHISPER_NO_AVX              "whisper: disable AVX" OFF)
+    option(WHISPER_NO_AVX2             "whisper: disable AVX2" OFF)
+    option(WHISPER_NO_FMA              "whisper: disable FMA" OFF)
+else()
+    option(WHISPER_SUPPORT_OPENBLAS    "whisper: support for OpenBLAS" OFF)
+endif()
+
+option(WHISPER_PERF                    "whisper: enable perf timings" OFF)

 # sanitizers
-option(WHISPER_SANITIZE_THREAD    "whisper: enable thread sanitizer"    OFF)
-option(WHISPER_SANITIZE_ADDRESS   "whisper: enable address sanitizer"   OFF)
-option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)

-# extra artifacts
-option(WHISPER_BUILD_TESTS    "whisper: build tests"          ${WHISPER_STANDALONE})
-option(WHISPER_BUILD_EXAMPLES "whisper: build examples"       ${WHISPER_STANDALONE})
-option(WHISPER_BUILD_SERVER   "whisper: build server example" ${WHISPER_STANDALONE})
-
-# 3rd party libs
-option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
-option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
-
-if (CMAKE_SYSTEM_NAME MATCHES "Linux")
-    option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
-endif()
-
-option(WHISPER_COREML                "whisper: enable Core ML framework"  OFF)
-option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
-option(WHISPER_OPENVINO              "whisper: support for OpenVINO"      OFF)
-
-# Required for relocatable CMake package
-include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
-
-# override ggml options
-set(GGML_CCACHE             ${WHISPER_CCACHE})
-set(GGML_SANITIZE_THREAD    ${WHISPER_SANITIZE_THREAD})
-set(GGML_SANITIZE_ADDRESS   ${WHISPER_SANITIZE_ADDRESS})
-set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
-set(GGML_ALL_WARNINGS       ${WHISPER_ALL_WARNINGS})
-set(GGML_FATAL_WARNINGS     ${WHISPER_FATAL_WARNINGS})
-
-# transition helpers
-function (whisper_option_depr TYPE OLD NEW)
-    if (${OLD})
-        message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
-        set(${NEW} ON)
+if (NOT MSVC)
+    if (WHISPER_SANITIZE_THREAD)
+        set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -fsanitize=thread")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
    endif()
-endfunction()

-whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS              GGML_CUDA)
-whisper_option_depr(WARNING     WHISPER_CUDA                GGML_CUDA)
-whisper_option_depr(WARNING     WHISPER_KOMPUTE             GGML_KOMPUTE)
-whisper_option_depr(WARNING     WHISPER_METAL               GGML_METAL)
-whisper_option_depr(WARNING     WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
-whisper_option_depr(WARNING     WHISPER_NATIVE              GGML_NATIVE)
-whisper_option_depr(WARNING     WHISPER_OPENMP              GGML_OPENMP)
-whisper_option_depr(WARNING     WHISPER_RPC                 GGML_RPC)
-whisper_option_depr(WARNING     WHISPER_SYCL                GGML_SYCL)
-whisper_option_depr(WARNING     WHISPER_SYCL_F16            GGML_SYCL_F16)
+    if (WHISPER_SANITIZE_ADDRESS)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=address -fno-omit-frame-pointer")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+    endif()

-#
-# build the library
-#
-
-if (NOT TARGET ggml)
-    add_subdirectory(ggml)
-    # ... otherwise assume ggml is added by a parent CMakeLists.txt
+    if (WHISPER_SANITIZE_UNDEFINED)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=undefined")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+    endif()
+endif()
+
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
+#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
+
+# dependencies
+
+find_package(Threads REQUIRED)
+
+# on APPLE - include Accelerate framework
+if (APPLE AND NOT WHISPER_NO_ACCELERATE)
+    find_library(ACCELERATE_FRAMEWORK Accelerate)
+    if (ACCELERATE_FRAMEWORK)
+        message(STATUS "Accelerate framework found")
+
+        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${ACCELERATE_FRAMEWORK})
+        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
+    else()
+        message(WARNING "Accelerate framework not found")
+    endif()
+endif()
+
+if (WHISPER_SUPPORT_OPENBLAS)
+    find_library(OPENBLAS_LIB
+        NAMES openblas libopenblas
+        )
+    if (OPENBLAS_LIB)
+        message(STATUS "OpenBLAS found")
+
+        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${OPENBLAS_LIB})
+        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
+    else()
+        message(WARNING "OpenBLAS not found")
+    endif()
+endif()
+
+# compiler flags
+
+if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
+endif ()
+
+if (WHISPER_ALL_WARNINGS)
+    if (NOT MSVC)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
+            -Wall                           \
+            -Wextra                         \
+            -Wpedantic                      \
+            -Wshadow                        \
+            -Wcast-qual                     \
+            -Wstrict-prototypes             \
+            -Wpointer-arith                 \
+            -Wno-unused-function            \
+        ")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
+            -Wall                           \
+            -Wextra                         \
+            -Wpedantic                      \
+            -Wcast-qual                     \
+        ")
+    else()
+        # todo : msvc
+    endif()
+endif()
+
+if (NOT MSVC)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
+    #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
+endif()
+
+message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
+
+if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
+    message(STATUS "ARM detected")
+else()
+    message(STATUS "x86 detected")
+    if (MSVC)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
+        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
+    else()
+        if (EMSCRIPTEN)
+            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread")
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+        else()
+            if(NOT WHISPER_NO_AVX)
+                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
+            endif()
+            if(NOT WHISPER_NO_AVX2)
+                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
+            endif()
+            if(NOT WHISPER_NO_FMA)
+                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
+            endif()
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
+        endif()
+    endif()
+endif()
+
+if (WHISPER_PERF)
+    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
 endif()
-add_subdirectory(src)

 #
-# install
+# whisper - this is the main library of the project
 #

-include(GNUInstallDirs)
-include(CMakePackageConfigHelpers)
+set(TARGET whisper)

-set(WHISPER_BUILD_NUMBER        ${BUILD_NUMBER})
-set(WHISPER_BUILD_COMMIT        ${BUILD_COMMIT})
-set(WHISPER_INSTALL_VERSION     ${CMAKE_PROJECT_VERSION})
+add_library(${TARGET}
+    ggml.h
+    ggml.c
+    whisper.h
+    whisper.cpp
+    )

-set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header  files")
-set(WHISPER_LIB_INSTALL_DIR     ${CMAKE_INSTALL_LIBDIR}     CACHE PATH "Location of library files")
-set(WHISPER_BIN_INSTALL_DIR     ${CMAKE_INSTALL_BINDIR}     CACHE PATH "Location of binary  files")
+include(DefaultTargetOptions)

-get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
+target_include_directories(${TARGET} PUBLIC
+    .
+    )

-set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
-install(TARGETS whisper LIBRARY PUBLIC_HEADER)
+if (MSVC)
+    target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})

-configure_package_config_file(
-        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
-        ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
-    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
-    PATH_VARS
-    WHISPER_INCLUDE_INSTALL_DIR
-    WHISPER_LIB_INSTALL_DIR
-    WHISPER_BIN_INSTALL_DIR )
+    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
+else()
+    target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
+endif()

-write_basic_package_version_file(
-    ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
-    VERSION ${WHISPER_INSTALL_VERSION}
-    COMPATIBILITY SameMajorVersion)
+if (BUILD_SHARED_LIBS)
+    target_link_libraries(${TARGET} PUBLIC
+        ${CMAKE_DL_LIBS}
+        )

-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
-              ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
-        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
+    target_compile_definitions(${TARGET} PUBLIC
+        WHISPER_SHARED
+        )
+endif()

-configure_file(cmake/whisper.pc.in
-        "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
-        @ONLY)
+if (EMSCRIPTEN)
+    set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
+endif()

-install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
-        DESTINATION lib/pkgconfig)
+target_compile_definitions(${TARGET} PUBLIC
+    ${WHISPER_EXTRA_FLAGS}
+    )
+
+set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h")
+
+install(TARGETS ${TARGET}
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION lib/static
+    RUNTIME DESTINATION bin
+    PUBLIC_HEADER DESTINATION include
+    )
+
+#
+# bindings
+#
+
+add_subdirectory(bindings)

 #
 # programs, examples and tests
 #

 if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
-    #include(CTest)
-    #add_subdirectory(tests)
+    enable_testing()
+    add_subdirectory(tests)
 endif ()

 if (WHISPER_BUILD_EXAMPLES)
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) 2023-2024 The ggml authors
+Copyright (c) 2022 Georgi Gerganov

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/250
+++ b/250
@ -1,12 +1,227 @@
+ifndef UNAME_S
+UNAME_S := $(shell uname -s)
+endif
+
+ifndef UNAME_P
+UNAME_P := $(shell uname -p)
+endif
+
+ifndef UNAME_M
+UNAME_M := $(shell uname -m)
+endif
+
+CCV := $(shell $(CC) --version | head -n 1)
+CXXV := $(shell $(CXX) --version | head -n 1)
+
+# Mac OS + Arm can report x86_64
+# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
+ifeq ($(UNAME_S),Darwin)
+	ifneq ($(UNAME_P),arm)
+		SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
+		ifeq ($(SYSCTL_M),1)
+			# UNAME_P := arm
+			# UNAME_M := arm64
+			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
+		endif
+	endif
+endif
+
+#
+# Compile flags
+#
+
+CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
+CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
+LDFLAGS  =
+
+# OS specific
+# TODO: support Windows
+ifeq ($(UNAME_S),Linux)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),Darwin)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),FreeBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+ifeq ($(UNAME_S),Haiku)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif
+
+# Architecture specific
+# TODO: probably these flags need to be tweaked on some architectures
+#       feel free to update the Makefile for your architecture and send a pull request or issue
+ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
+	ifeq ($(UNAME_S),Darwin)
+		CFLAGS += -mf16c
+		AVX1_M := $(shell sysctl machdep.cpu.features)
+		ifneq (,$(findstring FMA,$(AVX1_M)))
+			CFLAGS += -mfma
+		endif
+		ifneq (,$(findstring AVX1.0,$(AVX1_M)))
+			CFLAGS += -mavx
+		endif
+		AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
+		ifneq (,$(findstring AVX2,$(AVX2_M)))
+			CFLAGS += -mavx2
+		endif
+	else ifeq ($(UNAME_S),Linux)
+		AVX1_M := $(shell grep "avx " /proc/cpuinfo)
+		ifneq (,$(findstring avx,$(AVX1_M)))
+			CFLAGS += -mavx
+		endif
+		AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
+		ifneq (,$(findstring avx2,$(AVX2_M)))
+			CFLAGS += -mavx2
+		endif
+		FMA_M := $(shell grep "fma " /proc/cpuinfo)
+		ifneq (,$(findstring fma,$(FMA_M)))
+			CFLAGS += -mfma
+		endif
+		F16C_M := $(shell grep "f16c " /proc/cpuinfo)
+		ifneq (,$(findstring f16c,$(F16C_M)))
+			CFLAGS += -mf16c
+		endif
+		SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
+		ifneq (,$(findstring sse3,$(SSE3_M)))
+			CFLAGS += -msse3
+		endif
+	else ifeq ($(UNAME_S),Haiku)
+		AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
+		ifneq (,$(findstring avx,$(AVX1_M)))
+			CFLAGS += -mavx
+		endif
+		AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ")
+		ifneq (,$(findstring avx2,$(AVX2_M)))
+			CFLAGS += -mavx2
+		endif
+		FMA_M := $(shell sysinfo -cpu | grep "FMA ")
+		ifneq (,$(findstring fma,$(FMA_M)))
+			CFLAGS += -mfma
+		endif
+		F16C_M := $(shell sysinfo -cpu | grep "F16C ")
+		ifneq (,$(findstring f16c,$(F16C_M)))
+			CFLAGS += -mf16c
+		endif
+	else
+		CFLAGS += -mfma -mf16c -mavx -mavx2
+	endif
+endif
+ifeq ($(UNAME_M),amd64)
+	CFLAGS += -mavx -mavx2 -mfma -mf16c
+endif
+ifneq ($(filter ppc64%,$(UNAME_M)),)
+	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
+	ifneq (,$(findstring POWER9,$(POWER9_M)))
+		CFLAGS += -mpower9-vector
+	endif
+	# Require c++23's std::byteswap for big-endian support.
+	ifeq ($(UNAME_M),ppc64)
+		CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
+	endif
+endif
+ifndef WHISPER_NO_ACCELERATE
+	# Mac M1 - include Accelerate framework
+	ifeq ($(UNAME_S),Darwin)
+		CFLAGS  += -DGGML_USE_ACCELERATE
+		LDFLAGS += -framework Accelerate
+	endif
+endif
+ifdef WHISPER_OPENBLAS
+	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
+	LDFLAGS += -lopenblas
+endif
+ifdef WHISPER_GPROF
+	CFLAGS   += -pg
+	CXXFLAGS += -pg
+endif
+ifneq ($(filter aarch64%,$(UNAME_M)),)
+	CFLAGS += -mcpu=native
+	CXXFLAGS += -mcpu=native
+endif
+ifneq ($(filter armv6%,$(UNAME_M)),)
+	# Raspberry Pi 1, 2, 3
+	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
+endif
+ifneq ($(filter armv7%,$(UNAME_M)),)
+	# Raspberry Pi 4
+	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
+endif
+ifneq ($(filter armv8%,$(UNAME_M)),)
+	# Raspberry Pi 4
+	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
+endif
+
+#
+# Print build information
+#
+
+$(info I whisper.cpp build info: )
+$(info I UNAME_S:  $(UNAME_S))
+$(info I UNAME_P:  $(UNAME_P))
+$(info I UNAME_M:  $(UNAME_M))
+$(info I CFLAGS:   $(CFLAGS))
+$(info I CXXFLAGS: $(CXXFLAGS))
+$(info I LDFLAGS:  $(LDFLAGS))
+$(info I CC:       $(CCV))
+$(info I CXX:      $(CXXV))
+$(info )
+
+default: main
+
+#
+# Build library
+#
+
+ggml.o: ggml.c ggml.h
+	$(CC)  $(CFLAGS)   -c ggml.c -o ggml.o
+
+whisper.o: whisper.cpp whisper.h
+	$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
+
+libwhisper.a: ggml.o whisper.o
+	$(AR) rcs libwhisper.a ggml.o whisper.o
+
+libwhisper.so: ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
+
+clean:
+	rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
+
+#
+# Examples
+#
+
+CC_SDL=`sdl2-config --cflags --libs`
+
+SRC_COMMON = examples/common.cpp
+SRC_COMMON_SDL = examples/common-sdl.cpp
+
+main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
+	./main -h
+
+stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
+
+command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
+
+talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
+
+bench: examples/bench/bench.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
+
 #
 # Audio samples
 #

-.PHONY: build
-build:
-	cmake -B build
-	cmake --build build --config Release
-
 # download a few audio samples into folder "./samples":
 .PHONY: samples
 samples:
@ -16,19 +231,12 @@ samples:
 	@wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
 	@wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
 	@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
-	@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
-	@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
 	@echo "Converting to 16-bit WAV ..."
 	@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
 	@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
 	@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
-	@rm samples/*.ogg
 	@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
 	@rm samples/mm1.wav
-	@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
-	@rm samples/a13.mp3
-	@ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
-	@rm samples/diffusion2023-07-03.flac

 #
 # Models
@ -46,14 +254,10 @@ samples:
 .PHONY: medium.en
 .PHONY: medium
 .PHONY: large-v1
-.PHONY: large-v2
-.PHONY: large-v3
-.PHONY: large-v3-turbo
+.PHONY: large

-tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
+tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
 	bash ./models/download-ggml-model.sh $@
-	cmake -B build
-	cmake --build build --config Release
 	@echo ""
 	@echo "==============================================="
 	@echo "Running $@ on all samples in ./samples ..."
@ -64,6 +268,14 @@ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 larg
 		echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
 	    echo "----------------------------------------------" ; \
 		echo "" ; \
-		./build/bin/main -m models/ggml-$@.bin -f $$f ; \
+		./main -m models/ggml-$@.bin -f $$f ; \
 		echo "" ; \
 	done
+
+#
+# Tests
+#
+
+.PHONY: tests
+tests:
+	bash ./tests/run-tests.sh
--- a/Package.swift
+++ b/Package.swift
@ -1,19 +0,0 @@
-// swift-tools-version:5.5
-
-import PackageDescription
-
-let package = Package(
-    name: "whisper",
-    platforms: [
-        .macOS(.v12),
-        .iOS(.v14),
-        .watchOS(.v4),
-        .tvOS(.v14)
-    ],
-    products: [
-        .library(name: "whisper", targets: ["whisper"]),
-    ],
-    targets: [
-        .systemLibrary(name: "whisper", pkgConfig: "whisper"),
-    ]
-)
--- a/README.md
+++ b/README.md
@ -1,44 +1,37 @@
 # whisper.cpp

-![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
-
 [![Actions Status](https://github.com/ggerganov/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/whisper.cpp/actions)
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
-[![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)

-Stable: [v1.7.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.3) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

 - Plain C/C++ implementation without dependencies
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
+- Apple silicon first-class citizen - optimized via Arm Neon and Accelerate framework
 - AVX intrinsics support for x86 architectures
 - VSX intrinsics support for POWER architectures
 - Mixed F16 / F32 precision
- [Integer quantization support](#quantization)
+- Low memory usage (Flash Attention)
 - Zero memory allocations at runtime
- [Vulkan support](#vulkan-gpu-support)
- Support for CPU-only inference
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
- [OpenVINO Support](#openvino-support)
- [Ascend NPU Support](#ascend-npu-support)
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
+- Runs on the CPU
+- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)

 Supported platforms:

 - [x] Mac OS (Intel and Arm)
 - [x] [iOS](examples/whisper.objc)
 - [x] [Android](examples/whisper.android)
- [x] [Java](bindings/java/README.md)
 - [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
 - [x] [WebAssembly](examples/whisper.wasm)
 - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
 - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)

-The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
-The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
+The entire implementation of the model is contained in 2 source files:
+
+- Tensor operations: [ggml.h](ggml.h) / [ggml.c](ggml.c)
+- Transformer inference: [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)

 Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
 As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
@ -49,53 +42,149 @@ You can also easily make your own offline voice assistant application: [command]

 https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4

-On Apple Silicon, the inference runs fully on the GPU via Metal:
+Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)

-https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
+## Implementation details
+
+- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
+- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
+- Sample usage is demonstrated in [main.cpp](examples/main)
+- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
+- Various other examples are available in the [examples](examples) folder
+
+The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
+instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
+the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.

 ## Quick start

-First clone the repository:
+First, download one of the Whisper models converted in [ggml format](models). For example:

 ```bash
-git clone https://github.com/ggerganov/whisper.cpp.git
+bash ./models/download-ggml-model.sh base.en
 ```

-Navigate into the directory:
-
-```
-cd whisper.cpp
-```
-
-Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
+Now build the [main](examples/main) example and transcribe an audio file like this:

 ```bash
-sh ./models/download-ggml-model.sh base.en
-```
-
-Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
-
-```bash
-# build the project
-cmake -B build
-cmake --build build --config Release
+# build the main example
+make

 # transcribe an audio file
-./build/bin/whisper-cli -f samples/jfk.wav
+./main -f samples/jfk.wav
 ```

 ---

-For a quick demo, simply run `make base.en`.
+For a quick demo, simply run `make base.en`:
+
+```java
+$ make base.en
+
+cc  -I.              -O3 -std=c11   -pthread -DGGML_USE_ACCELERATE   -c ggml.c -o ggml.o
+c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
+c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main  -framework Accelerate
+./main -h
+
+usage: ./main [options] file0.wav file1.wav ...
+
+options:
+  -h,        --help              [default] show this help message and exit
+  -t N,      --threads N         [4      ] number of threads to use during computation
+  -p N,      --processors N      [1      ] number of processors to use during computation
+  -ot N,     --offset-t N        [0      ] time offset in milliseconds
+  -on N,     --offset-n N        [0      ] segment index offset
+  -d  N,     --duration N        [0      ] duration of audio to process in milliseconds
+  -mc N,     --max-context N     [-1     ] maximum number of text context tokens to store
+  -ml N,     --max-len N         [0      ] maximum segment length in characters
+  -bo N,     --best-of N         [5      ] number of best candidates to keep
+  -bs N,     --beam-size N       [-1     ] beam size for beam search
+  -wt N,     --word-thold N      [0.01   ] word timestamp probability threshold
+  -et N,     --entropy-thold N   [2.40   ] entropy threshold for decoder fail
+  -lpt N,    --logprob-thold N   [-1.00  ] log probability threshold for decoder fail
+  -su,       --speed-up          [false  ] speed up audio by x2 (reduced accuracy)
+  -tr,       --translate         [false  ] translate from source language to english
+  -di,       --diarize           [false  ] stereo audio diarization
+  -nf,       --no-fallback       [false  ] do not use temperature fallback while decoding
+  -otxt,     --output-txt        [false  ] output result in a text file
+  -ovtt,     --output-vtt        [false  ] output result in a vtt file
+  -osrt,     --output-srt        [false  ] output result in a srt file
+  -owts,     --output-words      [false  ] output script for generating karaoke video
+  -ocsv,     --output-csv        [false  ] output result in a CSV file
+  -of FNAME, --output-file FNAME [       ] output file path (without file extension)
+  -ps,       --print-special     [false  ] print special tokens
+  -pc,       --print-colors      [false  ] print colors
+  -pp,       --print-progress    [false  ] print progress
+  -nt,       --no-timestamps     [true   ] do not print timestamps
+  -l LANG,   --language LANG     [en     ] spoken language ('auto' for auto-detect)
+             --prompt PROMPT     [       ] initial prompt
+  -m FNAME,  --model FNAME       [models/ggml-base.en.bin] model path
+  -f FNAME,  --file FNAME        [       ] input WAV file path
+
+
+bash ./models/download-ggml-model.sh base.en
+Downloading ggml model base.en ...
+ggml-base.en.bin               100%[========================>] 141.11M  6.34MB/s    in 24s
+Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
+You can now use it like this:
+
+  $ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
+
+
+===============================================
+Running base.en on all samples in ./samples ...
+===============================================
+
+----------------------------------------------
+[+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
+----------------------------------------------
+
+whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
+whisper_model_load: loading model
+whisper_model_load: n_vocab       = 51864
+whisper_model_load: n_audio_ctx   = 1500
+whisper_model_load: n_audio_state = 512
+whisper_model_load: n_audio_head  = 8
+whisper_model_load: n_audio_layer = 6
+whisper_model_load: n_text_ctx    = 448
+whisper_model_load: n_text_state  = 512
+whisper_model_load: n_text_head   = 8
+whisper_model_load: n_text_layer  = 6
+whisper_model_load: n_mels        = 80
+whisper_model_load: f16           = 1
+whisper_model_load: type          = 2
+whisper_model_load: mem required  =  215.00 MB (+    6.00 MB per decoder)
+whisper_model_load: kv self size  =    5.25 MB
+whisper_model_load: kv cross size =   17.58 MB
+whisper_model_load: adding 1607 extra tokens
+whisper_model_load: model ctx     =  140.60 MB
+whisper_model_load: model size    =  140.54 MB
+
+system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
+
+main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
+
+
+[00:00:00.000 --> 00:00:11.000]   And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
+
+
+whisper_print_timings:     fallbacks =   0 p /   0 h
+whisper_print_timings:     load time =   113.81 ms
+whisper_print_timings:      mel time =    15.40 ms
+whisper_print_timings:   sample time =    11.58 ms /    27 runs (    0.43 ms per run)
+whisper_print_timings:   encode time =   266.60 ms /     1 runs (  266.60 ms per run)
+whisper_print_timings:   decode time =    66.11 ms /    27 runs (    2.45 ms per run)
+whisper_print_timings:    total time =   476.31 ms
+```

 The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.

-For detailed usage instructions, run: `./build/bin/whisper-cli -h`
+For detailed usage instructions, run: `./main -h`

-Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
+Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
 For example, you can use `ffmpeg` like this:

-```bash
+```java
 ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
 ```

@ -104,7 +193,7 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
 If you want some extra audio samples to play with, simply run:

 ```
-make -j samples
+make samples
 ```

 This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
@ -112,278 +201,124 @@ This will download a few more audio files from Wikipedia and convert them to 16-
 You can download and run the other models as follows:

 ```
-make -j tiny.en
-make -j tiny
-make -j base.en
-make -j base
-make -j small.en
-make -j small
-make -j medium.en
-make -j medium
-make -j large-v1
-make -j large-v2
-make -j large-v3
-make -j large-v3-turbo
+make tiny.en
+make tiny
+make base.en
+make base
+make small.en
+make small
+make medium.en
+make medium
+make large-v1
+make large
 ```

 ## Memory usage

-| Model  | Disk    | Mem     |
-| ------ | ------- | ------- |
-| tiny   | 75 MiB  | ~273 MB |
-| base   | 142 MiB | ~388 MB |
-| small  | 466 MiB | ~852 MB |
-| medium | 1.5 GiB | ~2.1 GB |
-| large  | 2.9 GiB | ~3.9 GB |
-
-## Quantization
-
-`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
-Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
-
-Here are the steps for creating and using a quantized model:
-
-```bash
-# quantize a model with Q5_0 method
-cmake -B build
-cmake --build build --config Release
-./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
-
-# run the examples as usual, specifying the quantized model file
-./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
-```
-
-## Core ML support
-
-On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
-speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
-
- Install Python dependencies needed for the creation of the Core ML model:
-
-  ```bash
-  pip install ane_transformers
-  pip install openai-whisper
-  pip install coremltools
-  ```
-
-  - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
-  - Python 3.10 is recommended.
-  - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
-  - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
-    - To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
-    - To activate the environment, use: `conda activate py310-whisper`
-
- Generate a Core ML model. For example, to generate a `base.en` model, use:
-
-  ```bash
-  ./models/generate-coreml-model.sh base.en
-  ```
-
-  This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
-
- Build `whisper.cpp` with Core ML support:
-
-  ```bash
-  # using CMake
-  cmake -B build -DWHISPER_COREML=1
-  cmake --build build -j --config Release
-  ```
-
- Run the examples as usual. For example:
-
-  ```text
-  $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
-
-  ...
-
-  whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
-  whisper_init_state: first run on a device may take a while ...
-  whisper_init_state: Core ML model loaded
-
-  system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
-
-  ...
-  ```
-
-  The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
-  Next runs are faster.
-
-For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
-
-## OpenVINO support
-
-On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
-on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
-
-This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
-
- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
-
-  Windows:
-
-  ```powershell
-  cd models
-  python -m venv openvino_conv_env
-  openvino_conv_env\Scripts\activate
-  python -m pip install --upgrade pip
-  pip install -r requirements-openvino.txt
-  ```
-
-  Linux and macOS:
-
-  ```bash
-  cd models
-  python3 -m venv openvino_conv_env
-  source openvino_conv_env/bin/activate
-  python -m pip install --upgrade pip
-  pip install -r requirements-openvino.txt
-  ```
-
- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
-
-  ```
-  python convert-whisper-to-openvino.py --model base.en
-  ```
-
-  This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
-  is the default location that the OpenVINO extension will search at runtime.
-
- Build `whisper.cpp` with OpenVINO support:
-
-  Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
-
-  After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
-
-  Linux:
-
-  ```bash
-  source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
-  ```
-
-  Windows (cmd):
-
-  ```powershell
-  C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
-  ```
-
-  And then build the project using cmake:
-
-  ```bash
-  cmake -B build -DWHISPER_OPENVINO=1
-  cmake --build build -j --config Release
-  ```
-
- Run the examples as usual. For example:
-
-  ```text
-  $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
-
-  ...
-
-  whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
-  whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
-  whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
-  whisper_ctx_init_openvino_encoder: OpenVINO model loaded
-
-  system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
-
-  ...
-  ```
-
-  The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
-  cached for the next run.
-
-For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
-
-## NVIDIA GPU support
-
-With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
-First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
-
-Now build `whisper.cpp` with CUDA support:
-
-```
-cmake -B build -DGGML_CUDA=1
-cmake --build build -j --config Release
-```
-
-## Vulkan GPU support
-Cross-vendor solution which allows you to accelerate workload on your GPU.
-First, make sure your graphics card driver provides support for Vulkan API.
-
-Now build `whisper.cpp` with Vulkan support:
-```
-cmake -B build -DGGML_VULKAN=1
-cmake --build build -j --config Release
-```
-
-## BLAS CPU support via OpenBLAS
-
-Encoder processing can be accelerated on the CPU via OpenBLAS.
-First, make sure you have installed `openblas`: https://www.openblas.net/
-
-Now build `whisper.cpp` with OpenBLAS support:
-
-```
-cmake -B build -DGGML_BLAS=1
-cmake --build build -j --config Release
-```
-
-## Ascend NPU support
-
-Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
-
-First, check if your Ascend NPU device is supported:
-
-**Verified devices**
-| Ascend NPU                    | Status  |
-|:-----------------------------:|:-------:|
-| Atlas 300T A2                 | Support |
-
-Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
-
-Now build `whisper.cpp` with CANN support:
-
-```
-cmake -B build -DGGML_CANN=1
-cmake --build build -j --config Release
-```
-
-Run the inference examples as usual, for example:
-
-```
-./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
-```
-
-*Notes:*
-
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
-
-## Installing with Conan
-
-You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
-
-```
-conan install --requires="whisper-cpp/[*]" --build=missing
-```
-
-For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
+| Model  | Disk   | Mem     | SHA                                        |
+| ---    | ---    | ---     | ---                                        |
+| tiny   |  75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
+| base   | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
+| small  | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
+| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
+| large  | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |

 ## Limitations

 - Inference only
+- No GPU support (yet)
+
+## Another example
+
+Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
+in about half a minute on a MacBook M1 Pro, using `medium.en` model:
+
+<details>
+  <summary>Expand to see the result</summary>
+
+```java
+$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
+
+whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
+whisper_model_load: loading model
+whisper_model_load: n_vocab       = 51864
+whisper_model_load: n_audio_ctx   = 1500
+whisper_model_load: n_audio_state = 1024
+whisper_model_load: n_audio_head  = 16
+whisper_model_load: n_audio_layer = 24
+whisper_model_load: n_text_ctx    = 448
+whisper_model_load: n_text_state  = 1024
+whisper_model_load: n_text_head   = 16
+whisper_model_load: n_text_layer  = 24
+whisper_model_load: n_mels        = 80
+whisper_model_load: f16           = 1
+whisper_model_load: type          = 4
+whisper_model_load: mem required  = 1720.00 MB (+   43.00 MB per decoder)
+whisper_model_load: kv self size  =   42.00 MB
+whisper_model_load: kv cross size =  140.62 MB
+whisper_model_load: adding 1607 extra tokens
+whisper_model_load: model ctx     = 1462.35 MB
+whisper_model_load: model size    = 1462.12 MB
+
+system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
+
+main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
+
+
+[00:00:00.000 --> 00:00:08.000]   My fellow Americans, this day has brought terrible news and great sadness to our country.
+[00:00:08.000 --> 00:00:17.000]   At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
+[00:00:17.000 --> 00:00:23.000]   A short time later, debris was seen falling from the skies above Texas.
+[00:00:23.000 --> 00:00:29.000]   The Columbia's lost. There are no survivors.
+[00:00:29.000 --> 00:00:32.000]   On board was a crew of seven.
+[00:00:32.000 --> 00:00:39.000]   Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
+[00:00:39.000 --> 00:00:48.000]   Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
+[00:00:48.000 --> 00:00:52.000]   a colonel in the Israeli Air Force.
+[00:00:52.000 --> 00:00:58.000]   These men and women assumed great risk in the service to all humanity.
+[00:00:58.000 --> 00:01:03.000]   In an age when space flight has come to seem almost routine,
+[00:01:03.000 --> 00:01:07.000]   it is easy to overlook the dangers of travel by rocket
+[00:01:07.000 --> 00:01:12.000]   and the difficulties of navigating the fierce outer atmosphere of the Earth.
+[00:01:12.000 --> 00:01:18.000]   These astronauts knew the dangers, and they faced them willingly,
+[00:01:18.000 --> 00:01:23.000]   knowing they had a high and noble purpose in life.
+[00:01:23.000 --> 00:01:31.000]   Because of their courage and daring and idealism, we will miss them all the more.
+[00:01:31.000 --> 00:01:36.000]   All Americans today are thinking as well of the families of these men and women
+[00:01:36.000 --> 00:01:40.000]   who have been given this sudden shock and grief.
+[00:01:40.000 --> 00:01:45.000]   You're not alone. Our entire nation grieves with you,
+[00:01:45.000 --> 00:01:52.000]   and those you love will always have the respect and gratitude of this country.
+[00:01:52.000 --> 00:01:56.000]   The cause in which they died will continue.
+[00:01:56.000 --> 00:02:04.000]   Mankind is led into the darkness beyond our world by the inspiration of discovery
+[00:02:04.000 --> 00:02:11.000]   and the longing to understand. Our journey into space will go on.
+[00:02:11.000 --> 00:02:16.000]   In the skies today, we saw destruction and tragedy.
+[00:02:16.000 --> 00:02:22.000]   Yet farther than we can see, there is comfort and hope.
+[00:02:22.000 --> 00:02:29.000]   In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
+[00:02:29.000 --> 00:02:35.000]   who created all these. He who brings out the starry hosts one by one
+[00:02:35.000 --> 00:02:39.000]   and calls them each by name."
+[00:02:39.000 --> 00:02:46.000]   Because of His great power and mighty strength, not one of them is missing.
+[00:02:46.000 --> 00:02:55.000]   The same Creator who names the stars also knows the names of the seven souls we mourn today.
+[00:02:55.000 --> 00:03:01.000]   The crew of the shuttle Columbia did not return safely to earth,
+[00:03:01.000 --> 00:03:05.000]   yet we can pray that all are safely home.
+[00:03:05.000 --> 00:03:13.000]   May God bless the grieving families, and may God continue to bless America.
+[00:03:13.000 --> 00:03:19.000]   [Silence]
+
+
+whisper_print_timings:     fallbacks =   1 p /   0 h
+whisper_print_timings:     load time =   569.03 ms
+whisper_print_timings:      mel time =   146.85 ms
+whisper_print_timings:   sample time =   238.66 ms /   553 runs (    0.43 ms per run)
+whisper_print_timings:   encode time = 18665.10 ms /     9 runs ( 2073.90 ms per run)
+whisper_print_timings:   decode time = 13090.93 ms /   549 runs (   23.85 ms per run)
+whisper_print_timings:    total time = 32733.52 ms
+```
+</details>

 ## Real-time audio input example

 This is a naive example of performing real-time inference on audio from your microphone.
-The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
+The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
 More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).

-```bash
-cmake -B build
-cmake --build build --config Release
-./build/bin/stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
+```java
+make stream
+./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
 ```

 https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
@ -393,18 +328,14 @@ https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a
 Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
 to highlight words with high or low confidence:

-```bash
-./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
-```
-
 <img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">

 ## Controlling the length of the generated text segments (experimental)

 For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:

-```text
-$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
+```java
+./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16

 whisper_model_load: loading model from './models/ggml-base.en.bin'
 ...
@ -423,12 +354,12 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr
 [00:00:10.020 --> 00:00:11.000]   country.
 ```

-## Word-level timestamp (experimental)
+## Word-level timestamp

 The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:

-```text
-$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
+```java
+./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1

 whisper_model_load: loading model from './models/ggml-base.en.bin'
 ...
@ -436,7 +367,7 @@ system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1

 main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...

-[00:00:00.000 --> 00:00:00.320]
+[00:00:00.000 --> 00:00:00.320]  
 [00:00:00.320 --> 00:00:00.370]   And
 [00:00:00.370 --> 00:00:00.690]   so
 [00:00:00.690 --> 00:00:00.850]   my
@ -464,42 +395,16 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr
 [00:00:10.510 --> 00:00:11.000]  .
 ```

-## Speaker segmentation via tinydiarize (experimental)
-
-More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
-
-Sample usage:
-
-```py
-# download a tinydiarize compatible model
-./models/download-ggml-model.sh small.en-tdrz
-
-# run as usual, adding the "-tdrz" command-line argument
-./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
-...
-main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
-...
-[00:00:00.000 --> 00:00:03.800]   Okay Houston, we've had a problem here. [SPEAKER_TURN]
-[00:00:03.800 --> 00:00:06.200]   This is Houston. Say again please. [SPEAKER_TURN]
-[00:00:06.200 --> 00:00:08.260]   Uh Houston we've had a problem.
-[00:00:08.260 --> 00:00:11.320]   We've had a main beam up on a volt. [SPEAKER_TURN]
-[00:00:11.320 --> 00:00:13.820]   Roger main beam interval. [SPEAKER_TURN]
-[00:00:13.820 --> 00:00:15.100]   Uh uh [SPEAKER_TURN]
-[00:00:15.100 --> 00:00:18.020]   So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
-[00:00:18.020 --> 00:00:25.740]   Okay uh right now uh Houston the uh voltage is uh is looking good um.
-[00:00:27.620 --> 00:00:29.940]   And we had a a pretty large bank or so.
-```
-
 ## Karaoke-style movie generation (experimental)

-The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
+The [main](examples/main) example provides support for output of karaoke-style movies, where the
 currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
 This requires to have `ffmpeg` installed.

-Here are a few _"typical"_ examples:
+Here are a few *"typical"* examples:

-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
+```java
+./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
 source ./samples/jfk.wav.wts
 ffplay ./samples/jfk.wav.mp4
 ```
@ -508,8 +413,8 @@ https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b

 ---

-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
+```java
+./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
 source ./samples/mm0.wav.wts
 ffplay ./samples/mm0.wav.mp4
 ```
@ -518,8 +423,8 @@ https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-9

 ---

-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
+```java
+./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
 source ./samples/gb0.wav.wts
 ffplay ./samples/gb0.wav.mp4
 ```
@ -530,10 +435,10 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a

 ## Video comparison of different models

-Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
+Use the [extra/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/extra/bench-wts.sh) script to generate a video in the following format:

-```bash
-./scripts/bench-wts.sh samples/jfk.wav
+```java
+./extra/bench-wts.sh samples/jfk.wav
 ffplay ./samples/jfk.wav.all.mp4
 ```

@ -544,24 +449,12 @@ https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8
 ## Benchmarks

 In order to have an objective comparison of the performance of the inference across different system configurations,
-use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
+use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
 took to execute it. The results are summarized in the following Github issue:

 [Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)

-Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
-
-You can run it with the following command, by default it will run against any standard model in the models folder.
-
-```bash
-python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
-```
-
-It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
-
-It outputs a csv file with the results of the benchmarking.
-
-## `ggml` format
+## ggml format

 The original models are converted to a custom binary format. This allows to pack everything needed into a single file:

@ -573,54 +466,45 @@ The original models are converted to a custom binary format. This allows to pack
 You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
 or manually from here:

- https://huggingface.co/ggerganov/whisper.cpp
+- https://huggingface.co/datasets/ggerganov/whisper.cpp
 - https://ggml.ggerganov.com

-For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
+For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README
+in [models](models).

 ## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)

- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
-  - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
- [x] Java:
-  - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
-  - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
+- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
+- [X] Javascript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
+- [X] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
+- [X] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
+- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
+- [X] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
  - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
  - [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
+- [X] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
  - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
-  - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
  - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
-  - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)

 ## Examples

 There are various examples of using the library for different projects in the [examples](examples) folder.
 Some of the examples are even ported to run in the browser using WebAssembly. Check them out!

-| Example                                             | Web                                   | Description                                                                                                                     |
-| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| [whisper-cli](examples/cli)                         | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper                                                                       |
-| [whisper-bench](examples/bench)                     | [bench.wasm](examples/bench.wasm)     | Benchmark the performance of Whisper on your machine                                                                            |
-| [whisper-stream](examples/stream)                   | [stream.wasm](examples/stream.wasm)   | Real-time transcription of raw microphone capture                                                                               |
-| [whisper-command](examples/command)                 | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic                                                         |
-| [whisper-server](examples/server)                   |                                       | HTTP transcription server with OAI-like API                                                                                     |
-| [whisper-talk-llama](examples/talk-llama)           |                                       | Talk with a LLaMA bot                                                                                                           |
-| [whisper.objc](examples/whisper.objc)               |                                       | iOS mobile application using whisper.cpp                                                                                        |
-| [whisper.swiftui](examples/whisper.swiftui)         |                                       | SwiftUI iOS / macOS application using whisper.cpp                                                                               |
-| [whisper.android](examples/whisper.android)         |                                       | Android mobile application using whisper.cpp                                                                                    |
-| [whisper.nvim](examples/whisper.nvim)               |                                       | Speech-to-text plugin for Neovim                                                                                                |
-| [generate-karaoke.sh](examples/generate-karaoke.sh) |                                       | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture                           |
-| [livestream.sh](examples/livestream.sh)             |                                       | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185)                                           |
-| [yt-wsp.sh](examples/yt-wsp.sh)                     |                                       | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
-| [wchess](examples/wchess)                           | [wchess.wasm](examples/wchess)        | Voice-controlled chess                                                                                                          |
+| Example | Web | Description |
+| ---     | --- | ---         |
+| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
+| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
+| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
+| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
+| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
+| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
+| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
+| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
+| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
+| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
+| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
+| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |

 ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)

--- a/README_sycl.md
+++ b/README_sycl.md
@ -1,249 +0,0 @@
-# whisper.cpp for SYCL
-
-[Background](#background)
-
-[OS](#os)
-
-[Intel GPU](#intel-gpu)
-
-[Linux](#linux)
-
-[Environment Variable](#environment-variable)
-
-[Known Issue](#known-issue)
-
-[Todo](#todo)
-
-## Background
-
-SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators<72>such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
-
-oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
-
-Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
-
-To avoid  re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel<EFBFBD> DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
-
-The whisper.cpp for SYCL is used to support Intel GPUs.
-
-For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
-
-## OS
-
-|OS|Status|Verified|
-|-|-|-|
-|Linux|Support|Ubuntu 22.04|
-|Windows|Ongoing| |
-
-
-## Intel GPU
-
-|Intel GPU| Status | Verified Model|
-|-|-|-|
-|Intel Data Center Max Series| Support| Max 1550|
-|Intel Data Center Flex Series| Support| Flex 170|
-|Intel Arc Series| Support| Arc 770|
-|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
-|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
-
-
-## Linux
-
-### Setup Environment
-
-1. Install Intel GPU driver.
-
-a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
-
-Note: for iGPU, please install the client GPU driver.
-
-b. Add user to group: video, render.
-
-```
-sudo usermod -aG render username
-sudo usermod -aG video username
-```
-
-Note: re-login to enable it.
-
-c. Check
-
-```
-sudo apt install clinfo
-sudo clinfo -l
-```
-
-Output (example):
-
-```
-Platform #0: Intel(R) OpenCL Graphics
- `-- Device #0: Intel(R) Arc(TM) A770 Graphics
-
-
-Platform #0: Intel(R) OpenCL HD Graphics
- `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
-```
-
-2. Install Intel<65> oneAPI Base toolkit.
-
-
-a. Please follow the procedure in [Get the Intel<65> oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
-
-Recommend to install to default folder: **/opt/intel/oneapi**.
-
-Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
-
-b. Check
-
-```
-source /opt/intel/oneapi/setvars.sh
-
-sycl-ls
-```
-
-There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
-
-Output (example):
-```
-[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2  [2023.16.10.0.17_160000]
-[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
-[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO  [23.30.26918.50]
-[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
-
-```
-
-2. Build locally:
-
-```
-mkdir -p build
-cd build
-source /opt/intel/oneapi/setvars.sh
-
-#for FP16
-#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON 
-
-#for FP32
-cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-
-#build example/main only
-#cmake --build . --config Release --target main
-
-#build all binary
-cmake --build . --config Release -v
-
-```
-
-or
-
-```
-./examples/sycl/build.sh
-```
-
-Note:
-
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
-
-### Run
-
-1. Put model file to folder **models**
-
-2. Enable oneAPI running environment
-
-```
-source /opt/intel/oneapi/setvars.sh
-```
-
-3. List device ID
-
-Run without parameter:
-
-```
-./build/bin/ls-sycl-device
-
-or
-
-./build/bin/main
-```
-
-Check the ID in startup log, like:
-
-```
-found 4 SYCL devices:
-  Device 0: Intel(R) Arc(TM) A770 Graphics,	compute capability 1.3,
-    max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136
-  Device 1: Intel(R) FPGA Emulation Device,	compute capability 1.2,
-    max compute_units 24,	max work group size 67108864,	max sub group size 64,	global mem size 67065057280
-  Device 2: 13th Gen Intel(R) Core(TM) i7-13700K,	compute capability 3.0,
-    max compute_units 24,	max work group size 8192,	max sub group size 64,	global mem size 67065057280
-  Device 3: Intel(R) Arc(TM) A770 Graphics,	compute capability 3.0,
-    max compute_units 512,	max work group size 1024,	max sub group size 32,	global mem size 16225243136
-
-```
-
-|Attribute|Note|
-|-|-|
-|compute capability 1.3|Level-zero running time, recommended |
-|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
-
-4. Set device ID and execute whisper.cpp
-
-Set device ID = 0 by **GGML_SYCL_DEVICE=0**
-
-```
-GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
-```
-or run by script:
-
-```
-./examples/sycl/run_whisper.sh
-```
-
-
-
-5. Check the device ID in output
-
-Like:
-```
-Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
-```
-
-
-## Environment Variable
-
-#### Build
-
-|Name|Value|Function|
-|-|-|-|
-|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
-|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
-|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
-|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
-
-#### Running
-
-
-|Name|Value|Function|
-|-|-|-|
-|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
-|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
-
-## Known Issue
-
- Error:  `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
-
-  Miss to enable oneAPI running environment.
-
-  Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
-
-
- Hang during startup
-
-  llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
-
-  Solution: add **--no-mmap**.
-
-## Todo
-
- Support to build in Windows.
-
- Support multiple cards.
--- a/Sources/whisper/module.modulemap
+++ b/Sources/whisper/module.modulemap
@ -1,5 +0,0 @@
-module whisper [system] {
-    header "whisper.h"
-    link "whisper"
-    export *
-}
--- a/Sources/whisper/whisper.h
+++ b/Sources/whisper/whisper.h
@ -1,4 +0,0 @@
-#pragma once
-
-#include <whisper.h>
-
--- a/bindings/go/Makefile
+++ b/bindings/go/Makefile
@ -1,31 +1,9 @@
-ifndef UNAME_S
-UNAME_S := $(shell uname -s)
-endif
-
-ifndef UNAME_P
-UNAME_P := $(shell uname -p)
-endif
-
-ifndef UNAME_M
-UNAME_M := $(shell uname -m)
-endif
-
-GGML_METAL_PATH_RESOURCES := $(abspath ../..)
 BUILD_DIR := build
 MODELS_DIR := models
 EXAMPLES_DIR := $(wildcard examples/*)
-INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
+INCLUDE_PATH := $(abspath ../..)
 LIBRARY_PATH := $(abspath ../..)

-ifeq ($(GGML_CUDA),1)
-	LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
-	BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
-endif
-
-ifeq ($(UNAME_S),Darwin)
-	EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
-endif
-
 all: clean whisper examples

 whisper: mkdir
@ -33,13 +11,8 @@ whisper: mkdir
 	@${MAKE} -C ../.. libwhisper.a

 test: model-small whisper modtidy
-ifeq ($(UNAME_S),Darwin)
-	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
-	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
-else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
-endif

 examples: $(EXAMPLES_DIR)

@ -48,11 +21,7 @@ model-small: mkdir examples/go-model-download

 $(EXAMPLES_DIR): mkdir whisper modtidy
 	@echo Build example $(notdir $@)
-ifeq ($(UNAME_S),Darwin)
-	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
-else
 	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
-endif

 mkdir:
 	@echo Mkdir ${BUILD_DIR}
@ -63,7 +32,7 @@ mkdir:
 modtidy:
 	@go mod tidy

-clean:
+clean: 
 	@echo Clean
 	@rm -fr $(BUILD_DIR)
 	@go clean
--- a/bindings/go/README.md
+++ b/bindings/go/README.md
@ -31,7 +31,7 @@ func main() {
 	if err != nil {
 		panic(err)
 	}
-	if err := context.Process(samples, nil, nil); err != nil {
+	if err := context.Process(samples, nil); err != nil {
 		return err
 	}

@ -62,12 +62,6 @@ This will compile a static `libwhisper.a` in a `build` folder, download a model
 make examples
 ```

-To build using cuda support add `GGML_CUDA=1`:
-
-```bash
-GGML_CUDA=1 make examples
-```
-
 The examples are placed in the `build` directory. Once built, you can download all the models with the following command:

 ```bash
@ -77,7 +71,7 @@ The examples are placed in the `build` directory. Once built, you can download a
 And you can then test a model against samples with the following command:

 ```bash
-./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
+./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav 
 ```

 ## Using the bindings
--- a/bindings/go/examples/go-model-download/main.go
+++ b/bindings/go/examples/go-model-download/main.go
@ -17,14 +17,14 @@ import (
 // CONSTANTS

 const (
-	srcUrl  = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
-	srcExt  = ".bin"                                                      // Filename extension
-	bufSize = 1024 * 64                                                   // Size of the buffer used for downloading the model
+	srcUrl  = "https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main" // The location of the models
+	srcExt  = ".bin"                                                               // Filename extension
+	bufSize = 1024 * 64                                                            // Size of the buffer used for downloading the model
 )

 var (
 	// The models which will be downloaded, if no model is specified as an argument
-	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
+	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
 )

 var (
--- a/bindings/go/examples/go-whisper/flags.go
+++ b/bindings/go/examples/go-whisper/flags.go
@ -68,6 +68,10 @@ func (flags *Flags) GetOut() string {
 	return strings.ToLower(flags.Lookup("out").Value.String())
 }

+func (flags *Flags) IsSpeedup() bool {
+	return flags.Lookup("speedup").Value.String() == "true"
+}
+
 func (flags *Flags) IsTokens() bool {
 	return flags.Lookup("tokens").Value.String() == "true"
 }
@ -107,6 +111,10 @@ func (flags *Flags) SetParams(context whisper.Context) error {
 		fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
 		context.SetDuration(duration)
 	}
+	if flags.IsSpeedup() {
+		fmt.Fprintf(flags.Output(), "Setting speedup to true\n")
+		context.SetSpeedup(true)
+	}
 	if threads := flags.GetThreads(); threads != 0 {
 		fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
 		context.SetThreads(threads)
@ -138,6 +146,7 @@ func registerFlags(flag *Flags) {
 	flag.Duration("offset", 0, "Time offset")
 	flag.Duration("duration", 0, "Duration of audio to process")
 	flag.Uint("threads", 0, "Number of threads to use")
+	flag.Bool("speedup", false, "Enable speedup")
 	flag.Uint("max-len", 0, "Maximum segment length in characters")
 	flag.Uint("max-tokens", 0, "Maximum tokens per segment")
 	flag.Float64("word-thold", 0, "Maximum segment score")
--- a/bindings/go/examples/go-whisper/process.go
+++ b/bindings/go/examples/go-whisper/process.go
@ -67,7 +67,7 @@ func Process(model whisper.Model, path string, flags *Flags) error {
 	// Process the data
 	fmt.Fprintf(flags.Output(), "  ...processing %q\n", path)
 	context.ResetTimings()
-	if err := context.Process(data, cb, nil); err != nil {
+	if err := context.Process(data, cb); err != nil {
 		return err
 	}

--- a/bindings/go/go.mod
+++ b/bindings/go/go.mod
@ -1,10 +1,10 @@
 module github.com/ggerganov/whisper.cpp/bindings/go

-go 1.23
+go 1.19

 require (
 	github.com/go-audio/wav v1.1.0
-	github.com/stretchr/testify v1.9.0
+	github.com/stretchr/testify v1.8.1
 )

 require (
--- a/bindings/go/go.sum
+++ b/bindings/go/go.sum
@ -1,3 +1,4 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
@ -8,9 +9,15 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
 github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
-github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/bindings/go/params.go
+++ b/bindings/go/params.go
@ -19,10 +19,6 @@ func (p *Params) SetTranslate(v bool) {
 	p.translate = toBool(v)
 }

-func (p *Params) SetSplitOnWord(v bool) {
-	p.split_on_word = toBool(v)
-}
-
 func (p *Params) SetNoContext(v bool) {
 	p.no_context = toBool(v)
 }
@ -47,6 +43,10 @@ func (p *Params) SetPrintTimestamps(v bool) {
 	p.print_timestamps = toBool(v)
 }

+func (p *Params) SetSpeedup(v bool) {
+	p.speed_up = toBool(v)
+}
+
 // Set language id
 func (p *Params) SetLanguage(lang int) error {
 	if lang == -1 {
@ -105,47 +105,11 @@ func (p *Params) SetMaxSegmentLength(n int) {
 	p.max_len = C.int(n)
 }

-func (p *Params) SetTokenTimestamps(b bool) {
-	p.token_timestamps = toBool(b)
-}
-
 // Set max tokens per segment (0 = no limit)
 func (p *Params) SetMaxTokensPerSegment(n int) {
 	p.max_tokens = C.int(n)
 }

-// Set audio encoder context
-func (p *Params) SetAudioCtx(n int) {
-	p.audio_ctx = C.int(n)
-}
-
-func (p *Params) SetMaxContext(n int) {
-	p.n_max_text_ctx = C.int(n)
-}
-
-func (p *Params) SetBeamSize(n int) {
-	p.beam_search.beam_size = C.int(n)
-}
-
-func (p *Params) SetEntropyThold(t float32) {
-	p.entropy_thold = C.float(t)
-}
-
-func (p *Params) SetTemperature(t float32) {
-	p.temperature = C.float(t)
-}
-
-// Sets the fallback temperature incrementation
-// Pass -1.0 to disable this feature
-func (p *Params) SetTemperatureFallback(t float32) {
-	p.temperature_inc = C.float(t)
-}
-
-// Set initial prompt
-func (p *Params) SetInitialPrompt(prompt string) {
-	p.initial_prompt = C.CString(prompt)
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 // PRIVATE METHODS

@ -169,12 +133,6 @@ func (p *Params) String() string {
 	str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
 	str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
 	str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
-	str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
-	str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
-	str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
-	str += fmt.Sprintf(" temperature=%f", p.temperature)
-	str += fmt.Sprintf(" temperature_inc=%f", p.temperature_inc)
-	str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
 	if p.translate {
 		str += " translate"
 	}
@ -199,6 +157,9 @@ func (p *Params) String() string {
 	if p.token_timestamps {
 		str += " token_timestamps"
 	}
+	if p.speed_up {
+		str += " speed_up"
+	}

 	return str + ">"
 }
--- a/bindings/go/pkg/whisper/context.go
+++ b/bindings/go/pkg/whisper/context.go
@ -76,8 +76,9 @@ func (context *context) SetTranslate(v bool) {
 	context.params.SetTranslate(v)
 }

-func (context *context) SetSplitOnWord(v bool) {
-	context.params.SetSplitOnWord(v)
+// Set speedup flag
+func (context *context) SetSpeedup(v bool) {
+	context.params.SetSpeedup(v)
 }

 // Set number of threads to use
@ -92,7 +93,7 @@ func (context *context) SetOffset(v time.Duration) {

 // Set duration of audio to process
 func (context *context) SetDuration(v time.Duration) {
-	context.params.SetDuration(int(v.Milliseconds()))
+	context.params.SetOffset(int(v.Milliseconds()))
 }

 // Set timestamp token probability threshold (~0.01)
@ -110,52 +111,11 @@ func (context *context) SetMaxSegmentLength(n uint) {
 	context.params.SetMaxSegmentLength(int(n))
 }

-// Set token timestamps flag
-func (context *context) SetTokenTimestamps(b bool) {
-	context.params.SetTokenTimestamps(b)
-}
-
 // Set max tokens per segment (0 = no limit)
 func (context *context) SetMaxTokensPerSegment(n uint) {
 	context.params.SetMaxTokensPerSegment(int(n))
 }

-// Set audio encoder context
-func (context *context) SetAudioCtx(n uint) {
-	context.params.SetAudioCtx(int(n))
-}
-
-// Set maximum number of text context tokens to store
-func (context *context) SetMaxContext(n int) {
-	context.params.SetMaxContext(n)
-}
-
-// Set Beam Size
-func (context *context) SetBeamSize(n int) {
-	context.params.SetBeamSize(n)
-}
-
-// Set Entropy threshold
-func (context *context) SetEntropyThold(t float32) {
-	context.params.SetEntropyThold(t)
-}
-
-// Set Temperature
-func (context *context) SetTemperature(t float32) {
-	context.params.SetTemperature(t)
-}
-
-// Set the fallback temperature incrementation
-// Pass -1.0 to disable this feature
-func (context *context) SetTemperatureFallback(t float32) {
-	context.params.SetTemperatureFallback(t)
-}
-
-// Set initial prompt
-func (context *context) SetInitialPrompt(prompt string) {
-	context.params.SetInitialPrompt(prompt)
-}
-
 // ResetTimings resets the mode timings. Should be called before processing
 func (context *context) ResetTimings() {
 	context.model.ctx.Whisper_reset_timings()
@ -187,16 +147,12 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f
 }

 // Process new sample data and return any errors
-func (context *context) Process(
-	data []float32,
-	callNewSegment SegmentCallback,
-	callProgress ProgressCallback,
-) error {
+func (context *context) Process(data []float32, cb SegmentCallback) error {
 	if context.model.ctx == nil {
 		return ErrInternalAppError
 	}
 	// If the callback is defined then we force on single_segment mode
-	if callNewSegment != nil {
+	if cb != nil {
 		context.params.SetSingleSegment(true)
 	}

@ -204,28 +160,24 @@ func (context *context) Process(
 	processors := 0
 	if processors > 1 {
 		if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
-			if callNewSegment != nil {
+			if cb != nil {
 				num_segments := context.model.ctx.Whisper_full_n_segments()
 				s0 := num_segments - new
 				for i := s0; i < num_segments; i++ {
-					callNewSegment(toSegment(context.model.ctx, i))
+					cb(toSegment(context.model.ctx, i))
 				}
 			}
 		}); err != nil {
 			return err
 		}
 	} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
-		if callNewSegment != nil {
+		if cb != nil {
 			num_segments := context.model.ctx.Whisper_full_n_segments()
 			s0 := num_segments - new
 			for i := s0; i < num_segments; i++ {
-				callNewSegment(toSegment(context.model.ctx, i))
+				cb(toSegment(context.model.ctx, i))
 			}
 		}
-	}, func(progress int) {
-		if callProgress != nil {
-			callProgress(progress)
-		}
 	}); err != nil {
 		return err
 	}
@ -328,14 +280,10 @@ func toSegment(ctx *whisper.Context, n int) Segment {
 func toTokens(ctx *whisper.Context, n int) []Token {
 	result := make([]Token, ctx.Whisper_full_n_tokens(n))
 	for i := 0; i < len(result); i++ {
-		data := ctx.Whisper_full_get_token_data(n, i)
-
 		result[i] = Token{
-			Id:    int(ctx.Whisper_full_get_token_id(n, i)),
-			Text:  ctx.Whisper_full_get_token_text(n, i),
-			P:     ctx.Whisper_full_get_token_p(n, i),
-			Start: time.Duration(data.T0()) * time.Millisecond * 10,
-			End:   time.Duration(data.T1()) * time.Millisecond * 10,
+			Id:   int(ctx.Whisper_full_get_token_id(n, i)),
+			Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
+			P:    ctx.Whisper_full_get_token_p(n, i),
 		}
 	}
 	return result
--- a/bindings/go/pkg/whisper/context_test.go
+++ b/bindings/go/pkg/whisper/context_test.go
@ -4,90 +4,52 @@ import (
 	"os"
 	"testing"

-	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
-	"github.com/go-audio/wav"
+	// Packages
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
 	assert "github.com/stretchr/testify/assert"
 )

-func TestSetLanguage(t *testing.T) {
-	assert := assert.New(t)
+const (
+	ModelPath  = "../../models/ggml-tiny.bin"
+	SamplePath = "../../samples/jfk.wav"
+)

+func Test_Whisper_000(t *testing.T) {
+	assert := assert.New(t)
+	if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
+		t.Skip("Skipping test, model not found:", ModelPath)
+	}
+	if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
+		t.Skip("Skipping test, sample not found:", SamplePath)
+	}
+
+	// Load model
+	model, err := whisper.New(ModelPath)
+	assert.NoError(err)
+	assert.NotNil(model)
+	assert.NoError(model.Close())
+
+	t.Log("languages=", model.Languages())
+}
+
+func Test_Whisper_001(t *testing.T) {
+	assert := assert.New(t)
+	if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
+		t.Skip("Skipping test, model not found:", ModelPath)
+	}
+	if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
+		t.Skip("Skipping test, sample not found:", SamplePath)
+	}
+
+	// Load model
 	model, err := whisper.New(ModelPath)
 	assert.NoError(err)
 	assert.NotNil(model)
 	defer model.Close()

-	context, err := model.NewContext()
+	// Get context for decoding
+	ctx, err := model.NewContext()
 	assert.NoError(err)
+	assert.NotNil(ctx)

-	// This returns an error since
-	// the model 'models/ggml-small.en.bin'
-	// that is loaded is not multilingual
-	err = context.SetLanguage("en")
-	assert.Error(err)
-}
-
-func TestContextModelIsMultilingual(t *testing.T) {
-	assert := assert.New(t)
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-	defer model.Close()
-
-	context, err := model.NewContext()
-	assert.NoError(err)
-
-	isMultilingual := context.IsMultilingual()
-
-	// This returns false since
-	// the model 'models/ggml-small.en.bin'
-	// that is loaded is not multilingual
-	assert.False(isMultilingual)
-}
-
-func TestLanguage(t *testing.T) {
-	assert := assert.New(t)
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-	defer model.Close()
-
-	context, err := model.NewContext()
-	assert.NoError(err)
-
-	// This always returns en since
-	// the model 'models/ggml-small.en.bin'
-	// that is loaded is not multilingual
-	expectedLanguage := "en"
-	actualLanguage := context.Language()
-	assert.Equal(expectedLanguage, actualLanguage)
-}
-
-func TestProcess(t *testing.T) {
-	assert := assert.New(t)
-
-	fh, err := os.Open(SamplePath)
-	assert.NoError(err)
-	defer fh.Close()
-
-	// Decode the WAV file - load the full buffer
-	dec := wav.NewDecoder(fh)
-	buf, err := dec.FullPCMBuffer()
-	assert.NoError(err)
-	assert.Equal(uint16(1), dec.NumChans)
-
-	data := buf.AsFloat32Buffer().Data
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-	defer model.Close()
-
-	context, err := model.NewContext()
-	assert.NoError(err)
-
-	err = context.Process(data, nil, nil)
-	assert.NoError(err)
 }
--- a/bindings/go/pkg/whisper/interface.go
+++ b/bindings/go/pkg/whisper/interface.go
@ -12,10 +12,6 @@ import (
 // time. It is called during the Process function
 type SegmentCallback func(Segment)

-// ProgressCallback is the callback function for reporting progress during
-// processing. It is called during the Process function
-type ProgressCallback func(int)
-
 // Model is the interface to a whisper model. Create a new model with the
 // function whisper.New(string)
 type Model interface {
@ -38,27 +34,19 @@ type Context interface {
 	IsMultilingual() bool     // Return true if the model is multilingual.
 	Language() string         // Get language

-	SetOffset(time.Duration)          // Set offset
-	SetDuration(time.Duration)        // Set duration
-	SetThreads(uint)                  // Set number of threads to use
-	SetSplitOnWord(bool)              // Set split on word flag
-	SetTokenThreshold(float32)        // Set timestamp token probability threshold
-	SetTokenSumThreshold(float32)     // Set timestamp token sum probability threshold
-	SetMaxSegmentLength(uint)         // Set max segment length in characters
-	SetTokenTimestamps(bool)          // Set token timestamps flag
-	SetMaxTokensPerSegment(uint)      // Set max tokens per segment (0 = no limit)
-	SetAudioCtx(uint)                 // Set audio encoder context
-	SetMaxContext(n int)              // Set maximum number of text context tokens to store
-	SetBeamSize(n int)                // Set Beam Size
-	SetEntropyThold(t float32)        // Set Entropy threshold
-	SetInitialPrompt(prompt string)   // Set initial prompt
-	SetTemperature(t float32)         // Set temperature
-	SetTemperatureFallback(t float32) // Set temperature incrementation
+	SetOffset(time.Duration)      // Set offset
+	SetDuration(time.Duration)    // Set duration
+	SetThreads(uint)              // Set number of threads to use
+	SetSpeedup(bool)              // Set speedup flag
+	SetTokenThreshold(float32)    // Set timestamp token probability threshold
+	SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
+	SetMaxSegmentLength(uint)     // Set max segment length in characters
+	SetMaxTokensPerSegment(uint)  // Set max tokens per segment (0 = no limit)

 	// Process mono audio data and return any errors.
 	// If defined, newly generated segments are passed to the
 	// callback function during processing.
-	Process([]float32, SegmentCallback, ProgressCallback) error
+	Process([]float32, SegmentCallback) error

 	// After process is called, return segments until the end of the stream
 	// is reached, when io.EOF is returned.
@ -97,8 +85,7 @@ type Segment struct {

 // Token is a text or special token
 type Token struct {
-	Id         int
-	Text       string
-	P          float32
-	Start, End time.Duration
+	Id   int
+	Text string
+	P    float32
 }
--- a/bindings/go/pkg/whisper/model_test.go
+++ b/bindings/go/pkg/whisper/model_test.go
@ -1,91 +0,0 @@
-package whisper_test
-
-import (
-	"testing"
-
-	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
-	assert "github.com/stretchr/testify/assert"
-)
-
-func TestNew(t *testing.T) {
-	assert := assert.New(t)
-	t.Run("valid model path", func(t *testing.T) {
-		model, err := whisper.New(ModelPath)
-		assert.NoError(err)
-		assert.NotNil(model)
-		defer model.Close()
-
-	})
-
-	t.Run("invalid model path", func(t *testing.T) {
-		invalidModelPath := "invalid-model-path.bin"
-		model, err := whisper.New(invalidModelPath)
-		assert.Error(err)
-		assert.Nil(model)
-	})
-}
-
-func TestClose(t *testing.T) {
-	assert := assert.New(t)
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-
-	err = model.Close()
-	assert.NoError(err)
-}
-
-func TestNewContext(t *testing.T) {
-	assert := assert.New(t)
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-	defer model.Close()
-
-	context, err := model.NewContext()
-	assert.NoError(err)
-	assert.NotNil(context)
-}
-
-func TestIsMultilingual(t *testing.T) {
-	assert := assert.New(t)
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-	defer model.Close()
-
-	isMultilingual := model.IsMultilingual()
-
-	// This returns false since
-	// the model 'models/ggml-small.en.bin'
-	// that is loaded is not multilingual
-	assert.False(isMultilingual)
-}
-
-func TestLanguages(t *testing.T) {
-	assert := assert.New(t)
-
-	model, err := whisper.New(ModelPath)
-	assert.NoError(err)
-	assert.NotNil(model)
-	defer model.Close()
-
-	expectedLanguages := []string{
-		"en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl",
-		"ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk",
-		"el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr",
-		"bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn",
-		"sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne",
-		"mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn",
-		"yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi",
-		"lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my",
-		"bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su",
-	}
-
-	actualLanguages := model.Languages()
-
-	assert.Equal(expectedLanguages, actualLanguages)
-}
--- a/bindings/go/pkg/whisper/util_test.go
+++ b/bindings/go/pkg/whisper/util_test.go
@ -1,6 +0,0 @@
-package whisper_test
-
-const (
-	ModelPath  = "../../models/ggml-small.en.bin"
-	SamplePath = "../../samples/jfk.wav"
-)
--- a/bindings/go/whisper.go
+++ b/bindings/go/whisper.go
@ -9,13 +9,12 @@ import (
 // CGO

 /*
-#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
-#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
+#cgo LDFLAGS: -lwhisper -lm -lstdc++
+#cgo darwin LDFLAGS: -framework Accelerate
 #include <whisper.h>
 #include <stdlib.h>

 extern void callNewSegment(void* user_data, int new);
-extern void callProgress(void* user_data, int progress);
 extern bool callEncoderBegin(void* user_data);

 // Text segment callback
@ -27,15 +26,6 @@ static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_s
    }
 }

-// Progress callback
-// Called on every newly generated text segment
-// Use the whisper_full_...() functions to obtain the text segments
-static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
-    if(user_data != NULL && ctx != NULL) {
-        callProgress(user_data, progress);
-    }
-}
-
 // Encoder begin callback
 // If not NULL, called before the encoder starts
 // If it returns false, the computation is aborted
@ -53,8 +43,6 @@ static struct whisper_full_params whisper_full_default_params_cb(struct whisper_
 	params.new_segment_callback_user_data = (void*)(ctx);
 	params.encoder_begin_callback = whisper_encoder_begin_cb;
 	params.encoder_begin_callback_user_data = (void*)(ctx);
-	params.progress_callback = whisper_progress_cb;
-	params.progress_callback_user_data = (void*)(ctx);
 	return params;
 }
 */
@ -83,6 +71,7 @@ const (
 	SampleRate = C.WHISPER_SAMPLE_RATE                 // Expected sample rate, samples per second
 	SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
 	NumFFT     = C.WHISPER_N_FFT
+	NumMEL     = C.WHISPER_N_MEL
 	HopLength  = C.WHISPER_HOP_LENGTH
 	ChunkSize  = C.WHISPER_CHUNK_SIZE
 )
@ -102,7 +91,7 @@ var (
 func Whisper_init(path string) *Context {
 	cPath := C.CString(path)
 	defer C.free(unsafe.Pointer(cPath))
-	if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
+	if ctx := C.whisper_init_from_file(cPath); ctx != nil {
 		return (*Context)(ctx)
 	} else {
 		return nil
@ -269,13 +258,13 @@ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
 }

 // Task tokens
-func (ctx *Context) Whisper_token_translate() Token {
-	return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
+func Whisper_token_translate() Token {
+	return Token(C.whisper_token_translate())
 }

 // Task tokens
-func (ctx *Context) Whisper_token_transcribe() Token {
-	return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
+func Whisper_token_transcribe() Token {
+	return Token(C.whisper_token_transcribe())
 }

 // Performance information
@ -301,19 +290,11 @@ func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Param

 // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
 // Uses the specified decoding strategy to obtain the text.
-func (ctx *Context) Whisper_full(
-	params Params,
-	samples []float32,
-	encoderBeginCallback func() bool,
-	newSegmentCallback func(int),
-	progressCallback func(int),
-) error {
+func (ctx *Context) Whisper_full(params Params, samples []float32, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
 	registerEncoderBeginCallback(ctx, encoderBeginCallback)
 	registerNewSegmentCallback(ctx, newSegmentCallback)
-	registerProgressCallback(ctx, progressCallback)
 	defer registerEncoderBeginCallback(ctx, nil)
 	defer registerNewSegmentCallback(ctx, nil)
-	defer registerProgressCallback(ctx, nil)
 	if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
 		return nil
 	} else {
@ -337,18 +318,6 @@ func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, proc
 	}
 }

-// Return the id of the autodetected language, returns -1 if not found
-// Added to whisper.cpp in
-// https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
-//
-// Examples:
-//
-//	"de" -> 2
-//	"german" -> 2
-func (ctx *Context) Whisper_full_lang_id() int {
-	return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
-}
-
 // Number of generated text segments.
 // A segment can be a few words, a sentence, or even a paragraph.
 func (ctx *Context) Whisper_full_n_segments() int {
@ -387,7 +356,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {

 // Get token data for the specified token in the specified segment.
 // This contains probabilities, timestamps, etc.
-func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
+func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
 	return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
 }

@ -401,7 +370,6 @@ func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {

 var (
 	cbNewSegment   = make(map[unsafe.Pointer]func(int))
-	cbProgress     = make(map[unsafe.Pointer]func(int))
 	cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
 )

@ -413,14 +381,6 @@ func registerNewSegmentCallback(ctx *Context, fn func(int)) {
 	}
 }

-func registerProgressCallback(ctx *Context, fn func(int)) {
-	if fn == nil {
-		delete(cbProgress, unsafe.Pointer(ctx))
-	} else {
-		cbProgress[unsafe.Pointer(ctx)] = fn
-	}
-}
-
 func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
 	if fn == nil {
 		delete(cbEncoderBegin, unsafe.Pointer(ctx))
@ -436,13 +396,6 @@ func callNewSegment(user_data unsafe.Pointer, new C.int) {
 	}
 }

-//export callProgress
-func callProgress(user_data unsafe.Pointer, progress C.int) {
-	if fn, ok := cbProgress[user_data]; ok {
-		fn(int(progress))
-	}
-}
-
 //export callEncoderBegin
 func callEncoderBegin(user_data unsafe.Pointer) C.bool {
 	if fn, ok := cbEncoderBegin[user_data]; ok {
@ -454,15 +407,3 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
 	}
 	return true
 }
-
-func (t TokenData) T0() int64 {
-	return int64(t.t0)
-}
-
-func (t TokenData) T1() int64 {
-	return int64(t.t1)
-}
-
-func (t TokenData) Id() Token {
-	return Token(t.id)
-}
--- a/bindings/go/whisper_test.go
+++ b/bindings/go/whisper_test.go
@ -52,7 +52,7 @@ func Test_Whisper_001(t *testing.T) {
 	defer ctx.Whisper_free()
 	params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
 	data := buf.AsFloat32Buffer().Data
-	err = ctx.Whisper_full(params, data, nil, nil, nil)
+	err = ctx.Whisper_full(params, data, nil, nil)
 	assert.NoError(err)

 	// Print out tokens
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/java/.idea/uiDesigner.xml
+++ b/bindings/java/.idea/uiDesigner.xml
@ -1,124 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Palette2">
-    <group name="Swing">
-      <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
-      </item>
-      <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
-      </item>
-      <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
-      </item>
-      <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
-        <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
-      </item>
-      <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
-        <initial-values>
-          <property name="text" value="Button" />
-        </initial-values>
-      </item>
-      <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
-        <initial-values>
-          <property name="text" value="RadioButton" />
-        </initial-values>
-      </item>
-      <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
-        <initial-values>
-          <property name="text" value="CheckBox" />
-        </initial-values>
-      </item>
-      <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
-        <initial-values>
-          <property name="text" value="Label" />
-        </initial-values>
-      </item>
-      <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
-          <preferred-size width="150" height="-1" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
-          <preferred-size width="150" height="-1" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
-          <preferred-size width="150" height="-1" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
-          <preferred-size width="150" height="50" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
-          <preferred-size width="150" height="50" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
-          <preferred-size width="150" height="50" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
-      </item>
-      <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
-          <preferred-size width="150" height="50" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
-          <preferred-size width="150" height="50" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
-          <preferred-size width="150" height="50" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
-          <preferred-size width="200" height="200" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
-          <preferred-size width="200" height="200" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
-        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
-      </item>
-      <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
-      </item>
-      <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
-      </item>
-      <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
-      </item>
-      <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
-          <preferred-size width="-1" height="20" />
-        </default-constraints>
-      </item>
-      <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
-        <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
-      </item>
-      <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
-        <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
-      </item>
-    </group>
-  </component>
-</project>
--- a/bindings/java/README.md
+++ b/bindings/java/README.md
@ -1,71 +0,0 @@
-# Java JNI bindings for Whisper
-
-This package provides Java JNI bindings for whisper.cpp. They have been tested on:
-
-  * <strike>Darwin (OS X) 12.6 on x64_64</strike>
-  * Ubuntu on x86_64
-  * Windows on x86_64
-
-The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
-
-JNA will attempt to load the `whispercpp` shared library from:
-
- jna.library.path
- jna.platform.library
- ~/Library/Frameworks
- /Library/Frameworks
- /System/Library/Frameworks
- classpath
-
-```java
-import io.github.ggerganov.whispercpp.WhisperCpp;
-
-public class Example {
-
-    public static void main(String[] args) {
-        WhisperCpp whisper = new WhisperCpp();
-        // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
-        // or you can provide the absolute path to the model file.
-        long context = whisper.initContext("base.en");
-        try {
-            var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
-            // custom configuration if required
-            whisperParams.temperature_inc = 0f;
-            
-            var samples = readAudio(); // divide each value by 32767.0f
-            whisper.fullTranscribe(whisperParams, samples);
-            
-            int segmentCount = whisper.getTextSegmentCount(context);
-            for (int i = 0; i < segmentCount; i++) {
-                String text = whisper.getTextSegment(context, i);
-                System.out.println(segment.getText());
-            }
-        } finally {
-             whisper.freeContext(context);
-        }
-     }
-}
-```
-
-## Building & Testing
-
-In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
-
-```bash
-git clone https://github.com/ggerganov/whisper.cpp.git
-cd whisper.cpp/bindings/java
-
-./gradlew build
-```
-
-You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
-
-```bash
-copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
-```
-
-
-## License
-
-The license for the Java bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
-
--- a/bindings/java/build.gradle
+++ b/bindings/java/build.gradle
@ -1,133 +0,0 @@
-plugins {
-    id 'java'
-    id 'java-library'
-    id 'maven-publish'
-    id 'signing'
-}
-
-archivesBaseName = 'whispercpp'
-group = 'io.github.ggerganov'
-version = '1.4.0'
-
-
-sourceCompatibility = 1.8
-targetCompatibility = 1.8
-
-sourceSets {
-    main {
-        resources {
-            srcDirs = ['src/main/resources', 'build/generated/resources/main']
-        }
-    }
-    test {
-        runtimeClasspath += files('build/generated/resources/main')
-    }
-}
-
-tasks.register('copyLibwhisperDynlib', Copy) {
-    from '../../build'
-    include 'libwhisper.dynlib'
-    into 'build/generated/resources/main/darwin'
-}
-
-tasks.register('copyLibwhisperSo', Copy) {
-    from '../../build'
-    include 'libwhisper.so'
-    into 'build/generated/resources/main/linux-x86-64'
-}
-
-tasks.register('copyWhisperDll', Copy) {
-    from '../../build/Release'
-    include 'whisper.dll'
-    into 'build/generated/resources/main/windows-x86-64'
-}
-
-tasks.register('copyLibs') {
-    dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll
-}
-
-test {
-    systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
-}
-
-java {
-    withSourcesJar()
-    withJavadocJar()
-}
-
-jar {
-    exclude '**/whisper_java.exp', '**/whisper_java.lib'
-}
-
-javadoc {
-    options.addStringOption('Xdoclint:none', '-quiet')
-}
-
-tasks.withType(Test) {
-    useJUnitPlatform()
-}
-
-dependencies {
-    implementation "net.java.dev.jna:jna:5.13.0"
-    testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
-    testImplementation "org.assertj:assertj-core:3.24.2"
-}
-
-repositories {
-    mavenCentral()
-}
-
-publishing {
-    publications {
-        mavenJava(MavenPublication) {
-            artifactId = 'whispercpp'
-            from components.java
-            pom {
-                name = 'whispercpp'
-                description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
-                url = 'https://github.com/ggerganov/whisper.cpp'
-                licenses {
-                    license {
-                        name = 'MIT licence'
-                        url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
-                    }
-                }
-                developers {
-                    developer {
-                        id = 'ggerganov'
-                        name = 'Georgi Gerganov'
-                        email = 'ggerganov@gmail.com'
-                    }
-                    developer {
-                        id = 'nalbion'
-                        name = 'Nicholas Albion'
-                        email = 'nalbion@yahoo.com'
-                    }
-                }
-                scm {
-                    connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
-                    url = 'https://github.com/ggerganov/whisper.cpp'
-                }
-            }
-        }
-    }
-
-    repositories {
-        maven {
-            def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
-            def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
-            url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
-            credentials {
-                username = System.getenv("MAVEN_USERNAME")
-                password = System.getenv("MAVEN_PASSWORD")
-            }
-        }
-    }
-}
-
-signing {
-    def signingKey = System.getenv("PGP_SECRET")
-    def signingPassword = System.getenv("PGP_PASSPHRASE")
-    useInMemoryPgpKeys(signingKey, signingPassword)
-    sign publishing.publications.mavenJava
-}
--- a/bindings/java/gradle.properties
+++ b/bindings/java/gradle.properties
@ -1,6 +0,0 @@
-org.gradle.jvmargs=-Xms256m -Xmx1024m
-system.include.dir=/usr/include
-#system.local.include.dir=../../include
-system.local.include.dir=./build/generated/sources/headers/java/main
-jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
-jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
--- a/bindings/java/gradle/wrapper/gradle-wrapper.jar
+++ b/bindings/java/gradle/wrapper/gradle-wrapper.jar
--- a/bindings/java/gradle/wrapper/gradle-wrapper.properties
+++ b/bindings/java/gradle/wrapper/gradle-wrapper.properties
@ -1,6 +0,0 @@
-distributionBase=GRADLE_USER_HOME
-distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
-networkTimeout=10000
-zipStoreBase=GRADLE_USER_HOME
-zipStorePath=wrapper/dists
--- a/bindings/java/gradlew
+++ b/bindings/java/gradlew
@ -1,244 +0,0 @@
-#!/bin/sh
-
-#
-# Copyright © 2015-2021 the original authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-##############################################################################
-#
-#   Gradle start up script for POSIX generated by Gradle.
-#
-#   Important for running:
-#
-#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
-#       noncompliant, but you have some other compliant shell such as ksh or
-#       bash, then to run this script, type that shell name before the whole
-#       command line, like:
-#
-#           ksh Gradle
-#
-#       Busybox and similar reduced shells will NOT work, because this script
-#       requires all of these POSIX shell features:
-#         * functions;
-#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
-#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
-#         * compound commands having a testable exit status, especially «case»;
-#         * various built-in commands including «command», «set», and «ulimit».
-#
-#   Important for patching:
-#
-#   (2) This script targets any POSIX shell, so it avoids extensions provided
-#       by Bash, Ksh, etc; in particular arrays are avoided.
-#
-#       The "traditional" practice of packing multiple parameters into a
-#       space-separated string is a well documented source of bugs and security
-#       problems, so this is (mostly) avoided, by progressively accumulating
-#       options in "$@", and eventually passing that to Java.
-#
-#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
-#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
-#       see the in-line comments for details.
-#
-#       There are tweaks for specific operating systems such as AIX, CygWin,
-#       Darwin, MinGW, and NonStop.
-#
-#   (3) This script is generated from the Groovy template
-#       https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
-#       within the Gradle project.
-#
-#       You can find Gradle at https://github.com/gradle/gradle/.
-#
-##############################################################################
-
-# Attempt to set APP_HOME
-
-# Resolve links: $0 may be a link
-app_path=$0
-
-# Need this for daisy-chained symlinks.
-while
-    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
-    [ -h "$app_path" ]
-do
-    ls=$( ls -ld "$app_path" )
-    link=${ls#*' -> '}
-    case $link in             #(
-      /*)   app_path=$link ;; #(
-      *)    app_path=$APP_HOME$link ;;
-    esac
-done
-
-# This is normally unused
-# shellcheck disable=SC2034
-APP_BASE_NAME=${0##*/}
-APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
-
-# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
-
-# Use the maximum available, or set MAX_FD != -1 to use that value.
-MAX_FD=maximum
-
-warn () {
-    echo "$*"
-} >&2
-
-die () {
-    echo
-    echo "$*"
-    echo
-    exit 1
-} >&2
-
-# OS specific support (must be 'true' or 'false').
-cygwin=false
-msys=false
-darwin=false
-nonstop=false
-case "$( uname )" in                #(
-  CYGWIN* )         cygwin=true  ;; #(
-  Darwin* )         darwin=true  ;; #(
-  MSYS* | MINGW* )  msys=true    ;; #(
-  NONSTOP* )        nonstop=true ;;
-esac
-
-CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
-
-
-# Determine the Java command to use to start the JVM.
-if [ -n "$JAVA_HOME" ] ; then
-    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
-        # IBM's JDK on AIX uses strange locations for the executables
-        JAVACMD=$JAVA_HOME/jre/sh/java
-    else
-        JAVACMD=$JAVA_HOME/bin/java
-    fi
-    if [ ! -x "$JAVACMD" ] ; then
-        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
-
-Please set the JAVA_HOME variable in your environment to match the
-location of your Java installation."
-    fi
-else
-    JAVACMD=java
-    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
-
-Please set the JAVA_HOME variable in your environment to match the
-location of your Java installation."
-fi
-
-# Increase the maximum file descriptors if we can.
-if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
-    case $MAX_FD in #(
-      max*)
-        # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
-        # shellcheck disable=SC3045
-        MAX_FD=$( ulimit -H -n ) ||
-            warn "Could not query maximum file descriptor limit"
-    esac
-    case $MAX_FD in  #(
-      '' | soft) :;; #(
-      *)
-        # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
-        # shellcheck disable=SC3045
-        ulimit -n "$MAX_FD" ||
-            warn "Could not set maximum file descriptor limit to $MAX_FD"
-    esac
-fi
-
-# Collect all arguments for the java command, stacking in reverse order:
-#   * args from the command line
-#   * the main class name
-#   * -classpath
-#   * -D...appname settings
-#   * --module-path (only if needed)
-#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
-
-# For Cygwin or MSYS, switch paths to Windows format before running java
-if "$cygwin" || "$msys" ; then
-    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
-    CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
-
-    JAVACMD=$( cygpath --unix "$JAVACMD" )
-
-    # Now convert the arguments - kludge to limit ourselves to /bin/sh
-    for arg do
-        if
-            case $arg in                                #(
-              -*)   false ;;                            # don't mess with options #(
-              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
-                    [ -e "$t" ] ;;                      #(
-              *)    false ;;
-            esac
-        then
-            arg=$( cygpath --path --ignore --mixed "$arg" )
-        fi
-        # Roll the args list around exactly as many times as the number of
-        # args, so each arg winds up back in the position where it started, but
-        # possibly modified.
-        #
-        # NB: a `for` loop captures its iteration list before it begins, so
-        # changing the positional parameters here affects neither the number of
-        # iterations, nor the values presented in `arg`.
-        shift                   # remove old arg
-        set -- "$@" "$arg"      # push replacement arg
-    done
-fi
-
-# Collect all arguments for the java command;
-#   * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
-#     shell script including quotes and variable substitutions, so put them in
-#     double quotes to make sure that they get re-expanded; and
-#   * put everything else in single quotes, so that it's not re-expanded.
-
-set -- \
-        "-Dorg.gradle.appname=$APP_BASE_NAME" \
-        -classpath "$CLASSPATH" \
-        org.gradle.wrapper.GradleWrapperMain \
-        "$@"
-
-# Stop when "xargs" is not available.
-if ! command -v xargs >/dev/null 2>&1
-then
-    die "xargs is not available"
-fi
-
-# Use "xargs" to parse quoted args.
-#
-# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
-#
-# In Bash we could simply go:
-#
-#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
-#   set -- "${ARGS[@]}" "$@"
-#
-# but POSIX shell has neither arrays nor command substitution, so instead we
-# post-process each arg (as a line of input to sed) to backslash-escape any
-# character that might be a shell metacharacter, then use eval to reverse
-# that process (while maintaining the separation between arguments), and wrap
-# the whole thing up as a single "set" statement.
-#
-# This will of course break if any of these variables contains a newline or
-# an unmatched quote.
-#
-
-eval "set -- $(
-        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
-        xargs -n1 |
-        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
-        tr '\n' ' '
-    )" '"$@"'
-
-exec "$JAVACMD" "$@"
--- a/bindings/java/gradlew.bat
+++ b/bindings/java/gradlew.bat
@ -1,92 +0,0 @@
-@rem
-@rem Copyright 2015 the original author or authors.
-@rem
-@rem Licensed under the Apache License, Version 2.0 (the "License");
-@rem you may not use this file except in compliance with the License.
-@rem You may obtain a copy of the License at
-@rem
-@rem      https://www.apache.org/licenses/LICENSE-2.0
-@rem
-@rem Unless required by applicable law or agreed to in writing, software
-@rem distributed under the License is distributed on an "AS IS" BASIS,
-@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@rem See the License for the specific language governing permissions and
-@rem limitations under the License.
-@rem
-
-@if "%DEBUG%"=="" @echo off
-@rem ##########################################################################
-@rem
-@rem  Gradle startup script for Windows
-@rem
-@rem ##########################################################################
-
-@rem Set local scope for the variables with windows NT shell
-if "%OS%"=="Windows_NT" setlocal
-
-set DIRNAME=%~dp0
-if "%DIRNAME%"=="" set DIRNAME=.
-@rem This is normally unused
-set APP_BASE_NAME=%~n0
-set APP_HOME=%DIRNAME%
-
-@rem Resolve any "." and ".." in APP_HOME to make it shorter.
-for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
-
-@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
-set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
-
-@rem Find java.exe
-if defined JAVA_HOME goto findJavaFromJavaHome
-
-set JAVA_EXE=java.exe
-%JAVA_EXE% -version >NUL 2>&1
-if %ERRORLEVEL% equ 0 goto execute
-
-echo.
-echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
-
-goto fail
-
-:findJavaFromJavaHome
-set JAVA_HOME=%JAVA_HOME:"=%
-set JAVA_EXE=%JAVA_HOME%/bin/java.exe
-
-if exist "%JAVA_EXE%" goto execute
-
-echo.
-echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
-
-goto fail
-
-:execute
-@rem Setup the command line
-
-set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
-
-
-@rem Execute Gradle
-"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
-
-:end
-@rem End local scope for the variables with windows NT shell
-if %ERRORLEVEL% equ 0 goto mainEnd
-
-:fail
-rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
-rem the _cmd.exe /c_ return code!
-set EXIT_CODE=%ERRORLEVEL%
-if %EXIT_CODE% equ 0 set EXIT_CODE=1
-if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
-exit /b %EXIT_CODE%
-
-:mainEnd
-if "%OS%"=="Windows_NT" endlocal
-
-:omega
--- a/bindings/java/settings.gradle
+++ b/bindings/java/settings.gradle
@ -1 +0,0 @@
-rootProject.name = "whispercpp"
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java
@ -1,41 +0,0 @@
-package io.github.ggerganov.whispercpp;
-
-import com.sun.jna.Structure;
-import com.sun.jna.ptr.PointerByReference;
-import io.github.ggerganov.whispercpp.ggml.GgmlType;
-import io.github.ggerganov.whispercpp.WhisperModel;
-import io.github.ggerganov.whispercpp.params.WhisperContextParams;
-
-import java.util.List;
-
-public class WhisperContext extends Structure {
-    int t_load_us = 0;
-    int t_start_us = 0;
-
-    /** weight type (FP32 / FP16 / QX) */
-    GgmlType wtype = GgmlType.GGML_TYPE_F16;
-    /** intermediate type (FP32 or FP16) */
-    GgmlType itype = GgmlType.GGML_TYPE_F16;
-
-//    WhisperModel model;
-    public PointerByReference model;
-//    whisper_vocab vocab;
-//    whisper_state * state = nullptr;
-    public PointerByReference vocab;
-    public PointerByReference state;
-
-    /** populated by whisper_init_from_file_with_params() */
-    String path_model;
-    WhisperContextParams params;
-
-//    public static class ByReference extends WhisperContext implements Structure.ByReference {
-//    }
-//
-//    public static class ByValue extends WhisperContext implements Structure.ByValue {
-//    }
-//
-//    @Override
-//    protected List<String> getFieldOrder() {
-//        return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
-//    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java
@ -1,207 +0,0 @@
-package io.github.ggerganov.whispercpp;
-
-import com.sun.jna.Native;
-import com.sun.jna.Pointer;
-import io.github.ggerganov.whispercpp.bean.WhisperSegment;
-import io.github.ggerganov.whispercpp.params.WhisperContextParams;
-import io.github.ggerganov.whispercpp.params.WhisperFullParams;
-import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer.
- */
-public class WhisperCpp implements AutoCloseable {
-    private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
-    private Pointer ctx = null;
-    private Pointer paramsPointer = null;
-    private Pointer greedyParamsPointer = null;
-    private Pointer beamParamsPointer = null;
-
-    public File modelDir() {
-        String modelDirPath = System.getenv("XDG_CACHE_HOME");
-        if (modelDirPath == null) {
-            modelDirPath = System.getProperty("user.home") + "/.cache";
-        }
-
-        return new File(modelDirPath, "whisper");
-    }
-
-    /**
-     * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
-     */
-    public void initContext(String modelPath) throws FileNotFoundException {
-        initContextImpl(modelPath, getContextDefaultParams());
-    }
-
-    /**
-     * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
-     * @param params - params to use when initialising the context
-     */
-    public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
-        initContextImpl(modelPath, params);
-    }
-
-    private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
-        if (ctx != null) {
-            lib.whisper_free(ctx);
-        }
-
-        if (!modelPath.contains("/") && !modelPath.contains("\\")) {
-            if (!modelPath.endsWith(".bin")) {
-                modelPath = "ggml-" + modelPath.replace("-", ".") + ".bin";
-            }
-
-            modelPath = new File(modelDir(), modelPath).getAbsolutePath();
-        }
-
-        ctx = lib.whisper_init_from_file_with_params(modelPath, params);
-
-        if (ctx == null) {
-            throw new FileNotFoundException(modelPath);
-        }
-    }
-
-    /**
-     * Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
-     * Because this function allocates memory for the params, the caller must call either:
-     * - call `whisper_free_context_params()`
-     * - `Native.free(Pointer.nativeValue(pointer));`
-     */
-    public WhisperContextParams getContextDefaultParams() {
-        paramsPointer = lib.whisper_context_default_params_by_ref();
-        WhisperContextParams params = new WhisperContextParams(paramsPointer);
-        params.read();
-        return params;
-    }
-    
-    /**
-     * Provides default params which can be used with `whisper_full()` etc.
-     * Because this function allocates memory for the params, the caller must call either:
-     * - call `whisper_free_params()`
-     * - `Native.free(Pointer.nativeValue(pointer));`
-     *
-     * @param strategy - GREEDY
-     */
-    public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
-        Pointer pointer;
-
-        // whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
-        if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
-            if (greedyParamsPointer == null) {
-                greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
-            }
-            pointer = greedyParamsPointer;
-        } else {
-            if (beamParamsPointer == null) {
-                beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
-            }
-            pointer = beamParamsPointer;
-        }
-
-        WhisperFullParams params = new WhisperFullParams(pointer);
-        params.read();
-        return params;
-    }
-
-    @Override
-    public void close() {
-        freeContext();
-        freeParams();
-        System.out.println("Whisper closed");
-    }
-
-    private void freeContext() {
-        if (ctx != null) {
-            lib.whisper_free(ctx);
-        }
-    }
-
-    private void freeParams() {
-        if (paramsPointer != null) {
-            Native.free(Pointer.nativeValue(paramsPointer));
-            paramsPointer = null;
-        }
-        if (greedyParamsPointer != null) {
-            Native.free(Pointer.nativeValue(greedyParamsPointer));
-            greedyParamsPointer = null;
-        }
-        if (beamParamsPointer != null) {
-            Native.free(Pointer.nativeValue(beamParamsPointer));
-            beamParamsPointer = null;
-        }
-    }
-
-    /**
-     * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
-     * Not thread safe for same context
-     * Uses the specified decoding strategy to obtain the text.
-     */
-    public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
-        if (ctx == null) {
-            throw new IllegalStateException("Model not initialised");
-        }
-
-        if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
-            throw new IOException("Failed to process audio");
-        }
-
-        int nSegments = lib.whisper_full_n_segments(ctx);
-
-        StringBuilder str = new StringBuilder();
-
-        for (int i = 0; i < nSegments; i++) {
-            String text = lib.whisper_full_get_segment_text(ctx, i);
-            System.out.println("Segment:" + text);
-            str.append(text);
-        }
-
-        return str.toString().trim();
-    }
-    public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
-        if (ctx == null) {
-            throw new IllegalStateException("Model not initialised");
-        }
-
-        if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
-            throw new IOException("Failed to process audio");
-        }
-
-        int nSegments = lib.whisper_full_n_segments(ctx);
-        List<WhisperSegment> segments= new ArrayList<>(nSegments);
-
-
-        for (int i = 0; i < nSegments; i++) {
-            long t0 = lib.whisper_full_get_segment_t0(ctx, i);
-            String text = lib.whisper_full_get_segment_text(ctx, i);
-            long t1 = lib.whisper_full_get_segment_t1(ctx, i);
-            segments.add(new WhisperSegment(t0,t1,text));
-        }
-
-        return segments;
-    }
-
-//    public int getTextSegmentCount(Pointer ctx) {
-//        return lib.whisper_full_n_segments(ctx);
-//    }
-//    public String getTextSegment(Pointer ctx, int index) {
-//        return lib.whisper_full_get_segment_text(ctx, index);
-//    }
-
-    public String getSystemInfo() {
-        return lib.whisper_print_system_info();
-    }
-
-    public int benchMemcpy(int nthread) {
-        return lib.whisper_bench_memcpy(nthread);
-    }
-
-    public int benchGgmlMulMat(int nthread) {
-        return lib.whisper_bench_ggml_mul_mat(nthread);
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java
@ -1,388 +0,0 @@
-package io.github.ggerganov.whispercpp;
-
-import com.sun.jna.Library;
-import com.sun.jna.Native;
-import com.sun.jna.Pointer;
-import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
-import io.github.ggerganov.whispercpp.model.WhisperTokenData;
-import io.github.ggerganov.whispercpp.params.WhisperContextParams;
-import io.github.ggerganov.whispercpp.params.WhisperFullParams;
-
-public interface WhisperCppJnaLibrary extends Library {
-    WhisperCppJnaLibrary instance = Native.load("whisper", WhisperCppJnaLibrary.class);
-
-    String whisper_print_system_info();
-
-    /**
-     * DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
-     *
-     * @param path_model Path to the model file
-     * @return Whisper context on success, null on failure
-     */
-    Pointer whisper_init_from_file(String path_model);
-
-    /**
-     * Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
-     * Because this function allocates memory for the params, the caller must call either:
-     * - call `whisper_free_context_params()`
-     * - `Native.free(Pointer.nativeValue(pointer));`
-     */
-    Pointer whisper_context_default_params_by_ref();
-
-    void whisper_free_context_params(Pointer params);
-
-    /**
-     * Allocate (almost) all memory needed for the model by loading from a file.
-     *
-     * @param path_model Path to the model file
-     * @param params     Pointer to whisper_context_params
-     * @return Whisper context on success, null on failure
-     */
-    Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
-
-    /**
-     * Allocate (almost) all memory needed for the model by loading from a buffer.
-     *
-     * @param buffer       Model buffer
-     * @param buffer_size  Size of the model buffer
-     * @return Whisper context on success, null on failure
-     */
-    Pointer whisper_init_from_buffer(Pointer buffer, int buffer_size);
-
-    /**
-     * Allocate (almost) all memory needed for the model using a model loader.
-     *
-     * @param loader Model loader
-     * @return Whisper context on success, null on failure
-     */
-    Pointer whisper_init(WhisperModelLoader loader);
-
-    /**
-     * Allocate (almost) all memory needed for the model by loading from a file without allocating the state.
-     *
-     * @param path_model Path to the model file
-     * @return Whisper context on success, null on failure
-     */
-    Pointer whisper_init_from_file_no_state(String path_model);
-
-    /**
-     * Allocate (almost) all memory needed for the model by loading from a buffer without allocating the state.
-     *
-     * @param buffer       Model buffer
-     * @param buffer_size  Size of the model buffer
-     * @return Whisper context on success, null on failure
-     */
-    Pointer whisper_init_from_buffer_no_state(Pointer buffer, int buffer_size);
-
-//    Pointer whisper_init_from_buffer_no_state(Pointer buffer, long buffer_size);
-
-    /**
-     * Allocate (almost) all memory needed for the model using a model loader without allocating the state.
-     *
-     * @param loader Model loader
-     * @return Whisper context on success, null on failure
-     */
-    Pointer whisper_init_no_state(WhisperModelLoader loader);
-
-    /**
-     * Allocate memory for the Whisper state.
-     *
-     * @param ctx Whisper context
-     * @return Whisper state on success, null on failure
-     */
-    Pointer whisper_init_state(Pointer ctx);
-
-    /**
-     * Free all allocated memory associated with the Whisper context.
-     *
-     * @param ctx Whisper context
-     */
-    void whisper_free(Pointer ctx);
-
-    /**
-     * Free all allocated memory associated with the Whisper state.
-     *
-     * @param state Whisper state
-     */
-    void whisper_free_state(Pointer state);
-
-
-    /**
-     * Convert RAW PCM audio to log mel spectrogram.
-     * The resulting spectrogram is stored inside the default state of the provided whisper context.
-     *
-     * @param ctx - Pointer to a WhisperContext
-     * @return 0 on success
-     */
-    int whisper_pcm_to_mel(Pointer ctx, final float[] samples, int n_samples, int n_threads);
-
-    /**
-     * @param ctx Pointer to a WhisperContext
-     * @param state Pointer to WhisperState
-     * @param n_samples
-     * @param n_threads
-     * @return 0 on success
-     */
-    int whisper_pcm_to_mel_with_state(Pointer ctx, Pointer state, final float[] samples, int n_samples, int n_threads);
-
-    /**
-     * This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
-     * Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
-     * n_mel must be 80
-     * @return 0 on success
-     */
-    int whisper_set_mel(Pointer ctx, final float[] data, int n_len, int n_mel);
-    int whisper_set_mel_with_state(Pointer ctx, Pointer state, final float[] data, int n_len, int n_mel);
-
-    /**
-     * Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context.
-     * Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
-     * Offset can be used to specify the offset of the first frame in the spectrogram.
-     * @return 0 on success
-     */
-    int whisper_encode(Pointer ctx, int offset, int n_threads);
-
-    int whisper_encode_with_state(Pointer ctx, Pointer state, int offset, int n_threads);
-
-    /**
-     * Run the Whisper decoder to obtain the logits and probabilities for the next token.
-     * Make sure to call whisper_encode() first.
-     * tokens + n_tokens is the provided context for the decoder.
-     * n_past is the number of tokens to use from previous decoder calls.
-     * Returns 0 on success
-     * TODO: add support for multiple decoders
-     */
-    int whisper_decode(Pointer ctx, Pointer tokens, int n_tokens, int n_past, int n_threads);
-
-    /**
-     * @param ctx
-     * @param state
-     * @param tokens Pointer to int tokens
-     * @param n_tokens
-     * @param n_past
-     * @param n_threads
-     * @return
-     */
-    int whisper_decode_with_state(Pointer ctx, Pointer state, Pointer tokens, int n_tokens, int n_past, int n_threads);
-
-    /**
-     * Convert the provided text into tokens.
-     * The tokens pointer must be large enough to hold the resulting tokens.
-     * Returns the number of tokens on success, no more than n_max_tokens
-     * Returns -1 on failure
-     * TODO: not sure if correct
-     */
-    int whisper_tokenize(Pointer ctx, String text, Pointer tokens, int n_max_tokens);
-
-    /** Largest language id (i.e. number of available languages - 1) */
-    int whisper_lang_max_id();
-
-    /**
-     * @return the id of the specified language, returns -1 if not found.
-     * Examples:
-     *   "de" -> 2
-     *   "german" -> 2
-     */
-    int whisper_lang_id(String lang);
-
-    /** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
-    String whisper_lang_str(int id);
-
-    /**
-     * Use mel data at offset_ms to try and auto-detect the spoken language.
-     * Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
-     * Returns the top language id or negative on failure
-     * If not null, fills the lang_probs array with the probabilities of all languages
-     * The array must be whisper_lang_max_id() + 1 in size
-     *
-     * ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
-     */
-    int whisper_lang_auto_detect(Pointer ctx, int offset_ms, int n_threads, float[] lang_probs);
-
-    int whisper_lang_auto_detect_with_state(Pointer ctx, Pointer state, int offset_ms, int n_threads, float[] lang_probs);
-
-    int whisper_n_len           (Pointer ctx); // mel length
-    int whisper_n_len_from_state(Pointer state); // mel length
-    int whisper_n_vocab         (Pointer ctx);
-    int whisper_n_text_ctx      (Pointer ctx);
-    int whisper_n_audio_ctx     (Pointer ctx);
-    int whisper_is_multilingual (Pointer ctx);
-
-    int whisper_model_n_vocab      (Pointer ctx);
-    int whisper_model_n_audio_ctx  (Pointer ctx);
-    int whisper_model_n_audio_state(Pointer ctx);
-    int whisper_model_n_audio_head (Pointer ctx);
-    int whisper_model_n_audio_layer(Pointer ctx);
-    int whisper_model_n_text_ctx   (Pointer ctx);
-    int whisper_model_n_text_state (Pointer ctx);
-    int whisper_model_n_text_head  (Pointer ctx);
-    int whisper_model_n_text_layer (Pointer ctx);
-    int whisper_model_n_mels       (Pointer ctx);
-    int whisper_model_ftype        (Pointer ctx);
-    int whisper_model_type         (Pointer ctx);
-
-    /**
-     * Token logits obtained from the last call to whisper_decode().
-     * The logits for the last token are stored in the last row
-     * Rows: n_tokens
-     * Cols: n_vocab
-     */
-    float[] whisper_get_logits           (Pointer ctx);
-    float[] whisper_get_logits_from_state(Pointer state);
-
-    // Token Id -> String. Uses the vocabulary in the provided context
-    String whisper_token_to_str(Pointer ctx, int token);
-    String whisper_model_type_readable(Pointer ctx);
-
-    // Special tokens
-    int whisper_token_eot (Pointer ctx);
-    int whisper_token_sot (Pointer ctx);
-    int whisper_token_prev(Pointer ctx);
-    int whisper_token_solm(Pointer ctx);
-    int whisper_token_not (Pointer ctx);
-    int whisper_token_beg (Pointer ctx);
-    int whisper_token_lang(Pointer ctx, int lang_id);
-
-    // Task tokens
-    int whisper_token_translate (Pointer ctx);
-    int whisper_token_transcribe(Pointer ctx);
-
-    // Performance information from the default state.
-    void whisper_print_timings(Pointer ctx);
-    void whisper_reset_timings(Pointer ctx);
-
-    // Note: Even if `whisper_full_params is stripped back to just 4 ints, JNA throws "Invalid memory access"
-    //       when `whisper_full_default_params()` tries to return a struct.
-    // WhisperFullParams whisper_full_default_params(int strategy);
-
-    /**
-     * Provides default params which can be used with `whisper_full()` etc.
-     * Because this function allocates memory for the params, the caller must call either:
-     * - call `whisper_free_params()`
-     * - `Native.free(Pointer.nativeValue(pointer));`
-     *
-     * @param strategy - WhisperSamplingStrategy.value
-     */
-    Pointer whisper_full_default_params_by_ref(int strategy);
-
-    void whisper_free_params(Pointer params);
-
-    /**
-     * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
-     * Not thread safe for same context
-     * Uses the specified decoding strategy to obtain the text.
-     */
-    int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
-
-    int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
-
-    // Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
-    // Result is stored in the default state of the context
-    // Not thread safe if executed in parallel on the same context.
-    // It seems this approach can offer some speedup in some cases.
-    // However, the transcription accuracy can be worse at the beginning and end of each chunk.
-    int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
-
-    /**
-     * Number of generated text segments.
-     * A segment can be a few words, a sentence, or even a paragraph.
-     * @param ctx Pointer to WhisperContext
-     */
-    int whisper_full_n_segments (Pointer ctx);
-
-    /**
-     * @param state Pointer to WhisperState
-     */
-    int whisper_full_n_segments_from_state(Pointer state);
-
-    /**
-     * Language id associated with the context's default state.
-     * @param ctx Pointer to WhisperContext
-     */
-    int whisper_full_lang_id(Pointer ctx);
-
-    /** Language id associated with the provided state */
-    int whisper_full_lang_id_from_state(Pointer state);
-
-
-    /** Get the start time of the specified segment. */
-    long whisper_full_get_segment_t0(Pointer ctx, int i_segment);
-
-    /** Get the start time of the specified segment from the state. */
-    long whisper_full_get_segment_t0_from_state(Pointer state, int i_segment);
-
-    /** Get the end time of the specified segment. */
-    long whisper_full_get_segment_t1(Pointer ctx, int i_segment);
-
-    /** Get the end time of the specified segment from the state. */
-    long whisper_full_get_segment_t1_from_state(Pointer state, int i_segment);
-
-    /** Get the text of the specified segment. */
-    String whisper_full_get_segment_text(Pointer ctx, int i_segment);
-
-    /** Get the text of the specified segment from the state. */
-    String whisper_full_get_segment_text_from_state(Pointer state, int i_segment);
-
-    /** Get the number of tokens in the specified segment. */
-    int whisper_full_n_tokens(Pointer ctx, int i_segment);
-
-    /** Get the number of tokens in the specified segment from the state. */
-    int whisper_full_n_tokens_from_state(Pointer state, int i_segment);
-
-    /** Get the token text of the specified token in the specified segment. */
-    String whisper_full_get_token_text(Pointer ctx, int i_segment, int i_token);
-
-
-    /** Get the token text of the specified token in the specified segment from the state. */
-    String whisper_full_get_token_text_from_state(Pointer ctx, Pointer state, int i_segment, int i_token);
-
-    /** Get the token ID of the specified token in the specified segment. */
-    int whisper_full_get_token_id(Pointer ctx, int i_segment, int i_token);
-
-    /** Get the token ID of the specified token in the specified segment from the state. */
-    int whisper_full_get_token_id_from_state(Pointer state, int i_segment, int i_token);
-
-    /** Get token data for the specified token in the specified segment. */
-    WhisperTokenData whisper_full_get_token_data(Pointer ctx, int i_segment, int i_token);
-
-    /** Get token data for the specified token in the specified segment from the state. */
-    WhisperTokenData whisper_full_get_token_data_from_state(Pointer state, int i_segment, int i_token);
-
-    /** Get the probability of the specified token in the specified segment. */
-    float whisper_full_get_token_p(Pointer ctx, int i_segment, int i_token);
-
-    /** Get the probability of the specified token in the specified segment from the state. */
-    float whisper_full_get_token_p_from_state(Pointer state, int i_segment, int i_token);
-
-    /**
-     * Benchmark function for memcpy.
-     *
-     * @param nThreads Number of threads to use for the benchmark.
-     * @return The result of the benchmark.
-     */
-    int whisper_bench_memcpy(int nThreads);
-
-    /**
-     * Benchmark function for memcpy as a string.
-     *
-     * @param nThreads Number of threads to use for the benchmark.
-     * @return The result of the benchmark as a string.
-     */
-    String whisper_bench_memcpy_str(int nThreads);
-
-    /**
-     * Benchmark function for ggml_mul_mat.
-     *
-     * @param nThreads Number of threads to use for the benchmark.
-     * @return The result of the benchmark.
-     */
-    int whisper_bench_ggml_mul_mat(int nThreads);
-
-    /**
-     * Benchmark function for ggml_mul_mat as a string.
-     *
-     * @param nThreads Number of threads to use for the benchmark.
-     * @return The result of the benchmark as a string.
-     */
-    String whisper_bench_ggml_mul_mat_str(int nThreads);
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java
@ -1,47 +0,0 @@
-package io.github.ggerganov.whispercpp.bean;
-
-/**
- * Created by litonglinux@qq.com on 10/21/2023_7:48 AM
- */
-public class WhisperSegment {
-  private long start, end;
-  private String sentence;
-
-  public WhisperSegment() {
-  }
-
-  public WhisperSegment(long start, long end, String sentence) {
-    this.start = start;
-    this.end = end;
-    this.sentence = sentence;
-  }
-
-  public long getStart() {
-    return start;
-  }
-
-  public long getEnd() {
-    return end;
-  }
-
-  public String getSentence() {
-    return sentence;
-  }
-
-  public void setStart(long start) {
-    this.start = start;
-  }
-
-  public void setEnd(long end) {
-    this.end = end;
-  }
-
-  public void setSentence(String sentence) {
-    this.sentence = sentence;
-  }
-
-  @Override
-  public String toString() {
-    return "[" + start + " --> " + end + "]:" + sentence;
-  }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.java
@ -1,24 +0,0 @@
-package io.github.ggerganov.whispercpp.callbacks;
-
-import com.sun.jna.Callback;
-import com.sun.jna.Pointer;
-import io.github.ggerganov.whispercpp.WhisperContext;
-import io.github.ggerganov.whispercpp.model.WhisperState;
-
-/**
- * Callback before the encoder starts.
- * If not null, called before the encoder starts.
- * If it returns false, the computation is aborted.
- */
-public interface WhisperEncoderBeginCallback extends Callback {
-
-    /**
-     * Callback method before the encoder starts.
-     *
-     * @param ctx        The whisper context.
-     * @param state      The whisper state.
-     * @param user_data  User data.
-     * @return True if the computation should proceed, false otherwise.
-     */
-    boolean callback(Pointer ctx, Pointer state, Pointer user_data);
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.java
@ -1,25 +0,0 @@
-package io.github.ggerganov.whispercpp.callbacks;
-
-import com.sun.jna.Callback;
-import com.sun.jna.Pointer;
-import io.github.ggerganov.whispercpp.model.WhisperTokenData;
-
-/**
- * Callback to filter logits.
- * Can be used to modify the logits before sampling.
- * If not null, called after applying temperature to logits.
- */
-public interface WhisperLogitsFilterCallback extends Callback {
-
-    /**
-     * Callback method to filter logits.
-     *
-     * @param ctx        The whisper context.
-     * @param state      The whisper state.
-     * @param tokens     The array of whisper_token_data.
-     * @param n_tokens   The number of tokens.
-     * @param logits     The array of logits.
-     * @param user_data  User data.
-     */
-    void callback(Pointer ctx, Pointer state, WhisperTokenData[] tokens, int n_tokens, float[] logits, Pointer user_data);
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.java
@ -1,24 +0,0 @@
-package io.github.ggerganov.whispercpp.callbacks;
-
-import com.sun.jna.Callback;
-import com.sun.jna.Pointer;
-import io.github.ggerganov.whispercpp.WhisperContext;
-import io.github.ggerganov.whispercpp.model.WhisperState;
-
-/**
- * Callback for the text segment.
- * Called on every newly generated text segment.
- * Use the whisper_full_...() functions to obtain the text segments.
- */
-public interface WhisperNewSegmentCallback extends Callback {
-
-    /**
-     * Callback method for the text segment.
-     *
-     * @param ctx        The whisper context.
-     * @param state      The whisper state.
-     * @param n_new      The number of newly generated text segments.
-     * @param user_data  User data.
-     */
-    void callback(Pointer ctx, Pointer state, int n_new, Pointer user_data);
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperProgressCallback.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperProgressCallback.java
@ -1,22 +0,0 @@
-package io.github.ggerganov.whispercpp.callbacks;
-
-import com.sun.jna.Callback;
-import com.sun.jna.Pointer;
-import io.github.ggerganov.whispercpp.WhisperContext;
-import io.github.ggerganov.whispercpp.model.WhisperState;
-
-/**
- * Callback for progress updates.
- */
-public interface WhisperProgressCallback extends Callback {
-
-    /**
-     * Callback method for progress updates.
-     *
-     * @param ctx        The whisper context.
-     * @param state      The whisper state.
-     * @param progress   The progress value.
-     * @param user_data  User data.
-     */
-    void callback(Pointer ctx, Pointer state, int progress, Pointer user_data);
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlTensor.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlTensor.java
@ -1,4 +0,0 @@
-package io.github.ggerganov.whispercpp.ggml;
-
-public class GgmlTensor {
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlType.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlType.java
@ -1,18 +0,0 @@
-package io.github.ggerganov.whispercpp.ggml;
-
-public enum GgmlType {
-    GGML_TYPE_F32,
-    GGML_TYPE_F16,
-    GGML_TYPE_Q4_0,
-    GGML_TYPE_Q4_1,
-    REMOVED_GGML_TYPE_Q4_2,  // support has been removed
-    REMOVED_GGML_TYPE_Q4_3, // support has been removed
-    GGML_TYPE_Q5_0,
-    GGML_TYPE_Q5_1,
-    GGML_TYPE_Q8_0,
-    GGML_TYPE_Q8_1,
-    GGML_TYPE_I8,
-    GGML_TYPE_I16,
-    GGML_TYPE_I32,
-    GGML_TYPE_COUNT,
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/EModel.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/EModel.java
@ -1,10 +0,0 @@
-package io.github.ggerganov.whispercpp.model;
-
-public enum EModel {
-    MODEL_UNKNOWN,
-    MODEL_TINY,
-    MODEL_BASE,
-    MODEL_SMALL,
-    MODEL_MEDIUM,
-    MODEL_LARGE,
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModel.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModel.java
@ -1,49 +0,0 @@
-package io.github.ggerganov.whispercpp;
-
-import io.github.ggerganov.whispercpp.ggml.GgmlTensor;
-import io.github.ggerganov.whispercpp.model.EModel;
-
-public class WhisperModel {
-//    EModel type = EModel.MODEL_UNKNOWN;
-//
-//    WhisperHParams hparams;
-//    WhisperFilters filters;
-//
-//    // encoder.positional_embedding
-//    GgmlTensor e_pe;
-//
-//    // encoder.conv1
-//    GgmlTensor e_conv_1_w;
-//    GgmlTensor e_conv_1_b;
-//
-//    // encoder.conv2
-//    GgmlTensor e_conv_2_w;
-//    GgmlTensor e_conv_2_b;
-//
-//    // encoder.ln_post
-//    GgmlTensor e_ln_w;
-//    GgmlTensor e_ln_b;
-//
-//    // decoder.positional_embedding
-//    GgmlTensor d_pe;
-//
-//    // decoder.token_embedding
-//    GgmlTensor d_te;
-//
-//    // decoder.ln
-//    GgmlTensor d_ln_w;
-//    GgmlTensor d_ln_b;
-//
-//    std::vector<whisper_layer_encoder> layers_encoder;
-//    std::vector<whisper_layer_decoder> layers_decoder;
-//
-//    // context
-//    struct ggml_context * ctx;
-//
-//    // the model memory buffer is read-only and can be shared between processors
-//    std::vector<uint8_t> * buf;
-//
-//    // tensors
-//    int n_loaded;
-//    Map<String, GgmlTensor> tensors;
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModelLoader.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModelLoader.java
@ -1,62 +0,0 @@
-package io.github.ggerganov.whispercpp.model;
-
-import com.sun.jna.Callback;
-import com.sun.jna.Pointer;
-import com.sun.jna.Structure;
-
-
-public class WhisperModelLoader extends Structure {
-    public Pointer context;
-    public ReadFunction read;
-    public EOFFunction eof;
-    public CloseFunction close;
-
-    public static class ReadFunction implements Callback {
-        public Pointer invoke(Pointer ctx, Pointer output, int readSize) {
-            // TODO
-            return ctx;
-        }
-    }
-
-    public static class EOFFunction implements Callback {
-        public boolean invoke(Pointer ctx) {
-            // TODO
-            return false;
-        }
-    }
-
-    public static class CloseFunction implements Callback {
-        public void invoke(Pointer ctx) {
-            // TODO
-        }
-    }
-
-//    public WhisperModelLoader(Pointer p) {
-//        super(p);
-//        read = new ReadFunction();
-//        eof = new EOFFunction();
-//        close = new CloseFunction();
-//        read.setCallback(this);
-//        eof.setCallback(this);
-//        close.setCallback(this);
-//        read.write();
-//        eof.write();
-//        close.write();
-//    }
-
-    public WhisperModelLoader() {
-        super();
-    }
-
-    public interface ReadCallback extends Callback {
-        Pointer invoke(Pointer ctx, Pointer output, int readSize);
-    }
-
-    public interface EOFCallback extends Callback {
-        boolean invoke(Pointer ctx);
-    }
-
-    public interface CloseCallback extends Callback {
-        void invoke(Pointer ctx);
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperState.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperState.java
@ -1,4 +0,0 @@
-package io.github.ggerganov.whispercpp.model;
-
-public class WhisperState {
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperTokenData.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperTokenData.java
@ -1,50 +0,0 @@
-package io.github.ggerganov.whispercpp.model;
-
-import com.sun.jna.Structure;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Structure representing token data.
- */
-public class WhisperTokenData extends Structure {
-
-    /** Token ID. */
-    public int id;
-
-    /** Forced timestamp token ID. */
-    public int tid;
-
-    /** Probability of the token. */
-    public float p;
-
-    /** Log probability of the token. */
-    public float plog;
-
-    /** Probability of the timestamp token. */
-    public float pt;
-
-    /** Sum of probabilities of all timestamp tokens. */
-    public float ptsum;
-
-    /**
-     * Start time of the token (token-level timestamp data).
-     * Do not use if you haven't computed token-level timestamps.
-     */
-    public long t0;
-
-    /**
-     * End time of the token (token-level timestamp data).
-     * Do not use if you haven't computed token-level timestamps.
-     */
-    public long t1;
-
-    /** Voice length of the token. */
-    public float vlen;
-
-    @Override
-    protected List<String> getFieldOrder() {
-        return Arrays.asList("id", "tid", "p", "plog", "pt", "ptsum", "t0", "t1", "vlen");
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/BeamSearchParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/BeamSearchParams.java
@ -1,19 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-import com.sun.jna.Structure;
-
-import java.util.Arrays;
-import java.util.List;
-
-public class BeamSearchParams extends Structure {
-    /** ref: <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L265">...</a> */
-    public int beam_size;
-
-    /** ref: <a href="https://arxiv.org/pdf/2204.05424.pdf">...</a> */
-    public float patience;
-
-    @Override
-    protected List<String> getFieldOrder() {
-        return Arrays.asList("beam_size", "patience");
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/CBool.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/CBool.java
@ -1,30 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-import com.sun.jna.IntegerType;
-
-import java.util.function.BooleanSupplier;
-
-public class CBool extends IntegerType implements BooleanSupplier {
-    public static final int SIZE = 1;
-    public static final CBool FALSE = new CBool(0);
-    public static final CBool TRUE = new CBool(1);
-
-
-    public CBool() {
-        this(0);
-    }
-
-    public CBool(long value) {
-        super(SIZE, value, true);
-    }
-
-    @Override
-    public boolean getAsBoolean() {
-        return intValue() == 1;
-    }
-
-    @Override
-    public String toString() {
-        return intValue() == 1 ? "true" : "false";
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/GreedyParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/GreedyParams.java
@ -1,16 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-import com.sun.jna.Structure;
-
-import java.util.Collections;
-import java.util.List;
-
-public class GreedyParams extends Structure {
-    /** <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264">...</a> */
-    public int best_of;
-
-    @Override
-    protected List<String> getFieldOrder() {
-        return Collections.singletonList("best_of");
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java
@ -1,31 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-import com.sun.jna.*;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Parameters for the whisper_init_from_file_with_params() function.
- * If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
- * whisper_context_default_params()
- */
-public class WhisperContextParams extends Structure {
-
-    public WhisperContextParams(Pointer p) {
-        super(p);
-    }
-
-    /** Use GPU for inference Number (default = true) */
-    public CBool use_gpu;
-
-    /** Use GPU for inference Number (default = true) */
-    public void useGpu(boolean enable) {
-        use_gpu = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    @Override
-    protected List<String> getFieldOrder() {
-        return Arrays.asList("use_gpu");
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFilters.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFilters.java
@ -1,10 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-import java.util.List;
-
-public class WhisperFilters {
-    int n_mel;
-    int n_fft;
-
-    List<Float> data;
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java
@ -1,326 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-import com.sun.jna.*;
-import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
-import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
-import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
-import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Parameters for the whisper_full() function.
- * If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
- * whisper_full_default_params()
- */
-public class WhisperFullParams extends Structure {
-
-    public WhisperFullParams(Pointer p) {
-        super(p);
-//        super(p, ALIGN_MSVC);
-//        super(p, ALIGN_GNUC);
-    }
-
-    /** Sampling strategy for whisper_full() function. */
-    public int strategy;
-
-    /** Number of threads. (default = 4) */
-    public int n_threads;
-
-    /** Maximum tokens to use from past text as a prompt for the decoder. (default = 16384) */
-    public int n_max_text_ctx;
-
-    /** Start offset in milliseconds. (default = 0) */
-    public int offset_ms;
-
-    /** Audio duration to process in milliseconds. (default = 0) */
-    public int duration_ms;
-
-    /** Translate flag. (default = false) */
-    public CBool translate;
-
-    /** The compliment of translateMode() */
-    public void transcribeMode() {
-        translate = CBool.FALSE;
-    }
-
-    /** The compliment of transcribeMode() */
-    public void translateMode() {
-        translate = CBool.TRUE;
-    }
-
-    /** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
-    public CBool no_context;
-
-    /** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
-    public void enableContext(boolean enable) {
-        no_context = enable ? CBool.FALSE : CBool.TRUE;
-    }
-
-    /** Generate timestamps or not? */
-    public CBool no_timestamps;
-
-    /** Flag to force single segment output (useful for streaming). (default = false) */
-    public CBool single_segment;
-
-    /** Flag to force single segment output (useful for streaming). (default = false) */
-    public void singleSegment(boolean single) {
-        single_segment = single ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Flag to print special tokens (e.g., &lt;SOT>, &lt;EOT>, &lt;BEG>, etc.). (default = false) */
-    public CBool print_special;
-
-    /** Flag to print special tokens (e.g., &lt;SOT>, &lt;EOT>, &lt;BEG>, etc.). (default = false) */
-    public void printSpecial(boolean enable) {
-        print_special = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Flag to print progress information. (default = true) */
-    public CBool print_progress;
-
-    /** Flag to print progress information. (default = true) */
-    public void printProgress(boolean enable) {
-        print_progress = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
-    public CBool print_realtime;
-
-    /** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
-    public void printRealtime(boolean enable) {
-        print_realtime = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Flag to print timestamps for each text segment when printing realtime. (default = true) */
-    public CBool print_timestamps;
-
-    /** Flag to print timestamps for each text segment when printing realtime. (default = true) */
-    public void printTimestamps(boolean enable) {
-        print_timestamps = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
-    public CBool token_timestamps;
-
-    /** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
-    public void tokenTimestamps(boolean enable) {
-        token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** [EXPERIMENTAL] Timestamp token probability threshold (~0.01). (default = 0.01) */
-    public float thold_pt;
-
-    /** [EXPERIMENTAL] Timestamp token sum probability threshold (~0.01). */
-    public float thold_ptsum;
-
-    /** Maximum segment length in characters. (default = 0) */
-    public int max_len;
-
-    /** Flag to split on word rather than on token (when used with max_len). (default = false) */
-    public CBool split_on_word;
-
-    /** Flag to split on word rather than on token (when used with max_len). (default = false) */
-    public void splitOnWord(boolean enable) {
-        split_on_word = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Maximum tokens per segment (0, default = no limit) */
-    public int max_tokens;
-
-    /** Overwrite the audio context size (0 = use default). */
-    public int audio_ctx;
-
-    /** Enable tinydiarize (default = false) */
-    public CBool tdrz_enable;
-
-    /** Enable tinydiarize (default = false) */
-    public void tdrzEnable(boolean enable) {
-        tdrz_enable = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Regular expression matching tokens to suppress. */
-    public String suppress_regex;
-
-    /** Tokens to provide to the whisper decoder as an initial prompt.
-     * These are prepended to any existing text context from a previous call. */
-    public String initial_prompt;
-
-    /** Prompt tokens. (int*) */
-    public Pointer prompt_tokens;
-
-    public void setPromptTokens(int[] tokens) {
-        Memory mem = new Memory(tokens.length * 4L);
-        mem.write(0, tokens, 0, tokens.length);
-        prompt_tokens = mem;
-    }
-
-    /** Number of prompt tokens. */
-    public int prompt_n_tokens;
-
-    /** Language for auto-detection.
-     * For auto-detection, set to `null`, `""`, or "auto". */
-    public String language;
-
-    /** Flag to indicate whether to detect language automatically. */
-    public CBool detect_language;
-
-    /** Flag to indicate whether to detect language automatically. */
-    public void detectLanguage(boolean enable) {
-        detect_language = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    // Common decoding parameters.
-
-    /** Flag to suppress blank tokens. */
-    public CBool suppress_blank;
-
-    public void suppressBlanks(boolean enable) {
-        suppress_blank = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Flag to suppress non-speech tokens. */
-    public CBool suppress_nst;
-
-    /** Flag to suppress non-speech tokens. */
-    public void suppressNonSpeechTokens(boolean enable) {
-        suppress_nst = enable ? CBool.TRUE : CBool.FALSE;
-    }
-
-    /** Initial decoding temperature. */
-    public float temperature;
-
-    /** Maximum initial timestamp. */
-    public float max_initial_ts;
-
-    /** Length penalty. */
-    public float length_penalty;
-
-    // Fallback parameters.
-
-    /** Temperature increment. */
-    public float temperature_inc;
-
-    /** Entropy threshold (similar to OpenAI's "compression_ratio_threshold"). */
-    public float entropy_thold;
-
-    /** Log probability threshold. */
-    public float logprob_thold;
-
-    /** No speech threshold. */
-    public float no_speech_thold;
-
-    /** Greedy decoding parameters. */
-    public GreedyParams greedy;
-
-    /**
-     * Beam search decoding parameters.
-     */
-    public BeamSearchParams beam_search;
-
-    public void setBestOf(int bestOf) {
-        if (greedy == null) {
-            greedy = new GreedyParams();
-        }
-        greedy.best_of = bestOf;
-    }
-
-    public void setBeamSize(int beamSize) {
-        if (beam_search == null) {
-            beam_search = new BeamSearchParams();
-        }
-        beam_search.beam_size = beamSize;
-    }
-
-    public void setBeamSizeAndPatience(int beamSize, float patience) {
-        if (beam_search == null) {
-            beam_search = new BeamSearchParams();
-        }
-        beam_search.beam_size = beamSize;
-        beam_search.patience = patience;
-    }
-
-    /**
-     * Callback for every newly generated text segment.
-     * WhisperNewSegmentCallback
-     */
-    public Pointer new_segment_callback;
-
-    /**
-     * User data for the new_segment_callback.
-     */
-    public Pointer new_segment_callback_user_data;
-
-    /**
-     * Callback on each progress update.
-     * WhisperProgressCallback
-     */
-    public Pointer progress_callback;
-
-    /**
-     * User data for the progress_callback.
-     */
-    public Pointer progress_callback_user_data;
-
-    /**
-     * Callback each time before the encoder starts.
-     * WhisperEncoderBeginCallback
-     */
-    public Pointer encoder_begin_callback;
-
-    /**
-     * User data for the encoder_begin_callback.
-     */
-    public Pointer encoder_begin_callback_user_data;
-
-    /**
-     * Callback by each decoder to filter obtained logits.
-     * WhisperLogitsFilterCallback
-     */
-    public Pointer logits_filter_callback;
-
-    /**
-     * User data for the logits_filter_callback.
-     */
-    public Pointer logits_filter_callback_user_data;
-
-
-    public void setNewSegmentCallback(WhisperNewSegmentCallback callback) {
-        new_segment_callback = CallbackReference.getFunctionPointer(callback);
-    }
-
-    public void setProgressCallback(WhisperProgressCallback callback) {
-        progress_callback = CallbackReference.getFunctionPointer(callback);
-    }
-
-    public void setEncoderBeginCallbackeginCallbackCallback(WhisperEncoderBeginCallback callback) {
-        encoder_begin_callback = CallbackReference.getFunctionPointer(callback);
-    }
-
-    public void setLogitsFilterCallback(WhisperLogitsFilterCallback callback) {
-        logits_filter_callback = CallbackReference.getFunctionPointer(callback);
-    }
-
-    /** Grammar stuff */
-    public Pointer grammar_rules;
-    public long n_grammar_rules;
-    public long i_start_rule;
-    public float grammar_penalty;
-
-    @Override
-    protected List<String> getFieldOrder() {
-        return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
-                "no_context", "single_segment", "no_timestamps",
-                "print_special", "print_progress", "print_realtime", "print_timestamps",  "token_timestamps",
-                "thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
-                "tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
-                "suppress_blank", "suppress_nst", "temperature", "max_initial_ts", "length_penalty",
-                "temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
-                "new_segment_callback", "new_segment_callback_user_data",
-                "progress_callback", "progress_callback_user_data",
-                "encoder_begin_callback", "encoder_begin_callback_user_data",
-                "logits_filter_callback", "logits_filter_callback_user_data",
-                "grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
-    }
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperHParams.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperHParams.java
@ -1,15 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-public class WhisperHParams {
-    int n_vocab       = 51864;
-    int n_audio_ctx   = 1500;
-    int n_audio_state = 384;
-    int n_audio_head  = 6;
-    int n_audio_layer = 4;
-    int n_text_ctx    = 448;
-    int n_text_state  = 384;
-    int n_text_head   = 6;
-    int n_text_layer  = 4;
-    int n_mels        = 80;
-    int ftype         = 1;
-}
--- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperSamplingStrategy.java
+++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperSamplingStrategy.java
@ -1,10 +0,0 @@
-package io.github.ggerganov.whispercpp.params;
-
-/** Available sampling strategies */
-public enum WhisperSamplingStrategy {
-    /** similar to OpenAI's GreedyDecoder */
-    WHISPER_SAMPLING_GREEDY,
-
-    /** similar to OpenAI's BeamSearchDecoder */
-    WHISPER_SAMPLING_BEAM_SEARCH
-}
--- a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java
+++ b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java
@ -1,144 +0,0 @@
-package io.github.ggerganov.whispercpp;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import io.github.ggerganov.whispercpp.bean.WhisperSegment;
-import io.github.ggerganov.whispercpp.params.CBool;
-import io.github.ggerganov.whispercpp.params.WhisperFullParams;
-import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-import javax.sound.sampled.AudioInputStream;
-import javax.sound.sampled.AudioSystem;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.util.List;
-
-class WhisperCppTest {
-    private static WhisperCpp whisper = new WhisperCpp();
-    private static boolean modelInitialised = false;
-
-    @BeforeAll
-    static void init() throws FileNotFoundException {
-        // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
-        // or you can provide the absolute path to the model file.
-        //String modelName = "../../models/ggml-tiny.bin";
-        String modelName = "../../models/ggml-tiny.en.bin";
-        try {
-            whisper.initContext(modelName);
-            //whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
-            //whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
-            modelInitialised = true;
-        } catch (FileNotFoundException ex) {
-            System.out.println("Model " + modelName + " not found");
-        }
-    }
-
-    @Test
-    void testGetDefaultFullParams_BeamSearch() {
-        // When
-        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
-
-        // Then
-        assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal(), params.strategy);
-        assertNotEquals(0, params.n_threads);
-        assertEquals(16384, params.n_max_text_ctx);
-        assertFalse(params.translate);
-        assertEquals(0.01f, params.thold_pt);
-        assertEquals(5, params.beam_search.beam_size);
-        assertEquals(-1.0f, params.beam_search.patience);
-    }
-
-    @Test
-    void testGetDefaultFullParams_Greedy() {
-        // When
-        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
-
-        // Then
-        assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY.ordinal(), params.strategy);
-        assertNotEquals(0, params.n_threads);
-        assertEquals(16384, params.n_max_text_ctx);
-        assertEquals(5, params.greedy.best_of);
-    }
-
-    @Test
-    void testFullTranscribe() throws Exception {
-        if (!modelInitialised) {
-            System.out.println("Model not initialised, skipping test");
-            return;
-        }
-
-        // Given
-        File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
-        AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
-
-        byte[] b = new byte[audioInputStream.available()];
-        float[] floats = new float[b.length / 2];
-
-        //WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
-        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
-        params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
-        params.print_progress = CBool.FALSE;
-        //params.initial_prompt = "and so my fellow Americans um, like";
-
-
-        try {
-            audioInputStream.read(b);
-
-            for (int i = 0, j = 0; i < b.length; i += 2, j++) {
-                int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
-                floats[j] = intSample / 32767.0f;
-            }
-
-            // When
-            String result = whisper.fullTranscribe(params, floats);
-
-            // Then
-            System.err.println(result);
-            assertEquals("And so my fellow Americans ask not what your country can do for you " +
-                    "ask what you can do for your country.",
-                    result.replace(",", ""));
-        } finally {
-            audioInputStream.close();
-        }
-    }
-
-    @Test
-    void testFullTranscribeWithTime() throws Exception {
-        if (!modelInitialised) {
-            System.out.println("Model not initialised, skipping test");
-            return;
-        }
-
-        // Given
-        File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
-        AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
-
-        byte[] b = new byte[audioInputStream.available()];
-        float[] floats = new float[b.length / 2];
-
-        //WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
-        WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
-        params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
-        params.print_progress = CBool.FALSE;
-        //params.initial_prompt = "and so my fellow Americans um, like";
-
-        try {
-            audioInputStream.read(b);
-
-            for (int i = 0, j = 0; i < b.length; i += 2, j++) {
-                int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
-                floats[j] = intSample / 32767.0f;
-            }
-
-            List<WhisperSegment> segments = whisper.fullTranscribeWithTime(params, floats);
-            assertTrue(segments.size() > 0, "The size of segments should be greater than 0");
-            for (WhisperSegment segment : segments) {
-                System.out.println(segment);
-            }
-        } finally {
-            audioInputStream.close();
-        }
-    }
-
-}
--- a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperJnaLibraryTest.java
+++ b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperJnaLibraryTest.java
@ -1,17 +0,0 @@
-package io.github.ggerganov.whispercpp;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import org.junit.jupiter.api.Test;
-
-class WhisperJnaLibraryTest {
-
-    @Test
-    void testWhisperPrint_system_info() {
-        String systemInfo = WhisperCppJnaLibrary.instance.whisper_print_system_info();
-        // eg: "AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0
-        //    | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | "
-        System.out.println("System info: " + systemInfo);
-        assertTrue(systemInfo.length() > 10);
-    }
-}
--- a/bindings/javascript/README.md
+++ b/bindings/javascript/README.md
@ -41,7 +41,7 @@ make publish-npm

 ## Sample run

-```text
+```java
 $ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js

 whisper_model_load: loading model from 'whisper.bin'
@ -63,7 +63,7 @@ whisper_model_load: ggml ctx size =  140.60 MB
 whisper_model_load: memory size   =   22.83 MB
 whisper_model_load: model size    =  140.54 MB

-system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
+system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 | 

 operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...

--- a/bindings/javascript/emscripten.cpp
+++ b/bindings/javascript/emscripten.cpp
@ -20,7 +20,7 @@ struct whisper_context * g_context;
 EMSCRIPTEN_BINDINGS(whisper) {
    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
        if (g_context == nullptr) {
-            g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
+            g_context = whisper_init_from_file(path_model.c_str());
            if (g_context != nullptr) {
                return true;
            } else {
--- a/bindings/javascript/libwhisper.worker.js
+++ b/bindings/javascript/libwhisper.worker.js
@ -1 +1 @@
-"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:f=>(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f),postMessage:msg=>parentPort.postMessage(msg),performance:global.performance||{now:Date.now}})}var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err(`worker.js received unknown command ${e.data.cmd}`);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}}self.onmessage=handleMessage;
+"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance||{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};
--- a/bindings/javascript/package.json
+++ b/bindings/javascript/package.json
@ -1,6 +1,6 @@
 {
  "name": "whisper.cpp",
-  "version": "1.7.3",
+  "version": "1.2.1",
  "description": "Whisper speech recognition",
  "main": "whisper.js",
  "scripts": {
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/bindings/ruby/.gitignore
+++ b/bindings/ruby/.gitignore
@ -1,5 +0,0 @@
-LICENSE
-pkg/
-lib/whisper.so
-lib/whisper.bundle
-lib/whisper.dll
--- a/bindings/ruby/README.md
+++ b/bindings/ruby/README.md
@ -1,231 +0,0 @@
-whispercpp
-==========
-
-![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
-
-Ruby bindings for [whisper.cpp][], an interface of automatic speech recognition model.
-
-Installation
------------
-
-Install the gem and add to the application's Gemfile by executing:
-
-    $ bundle add whispercpp
-
-If bundler is not being used to manage dependencies, install the gem by executing:
-
-    $ gem install whispercpp
-
-Usage
-----
-
-```ruby
-require "whisper"
-
-whisper = Whisper::Context.new("base")
-
-params = Whisper::Params.new
-params.language = "en"
-params.offset = 10_000
-params.duration = 60_000
-params.max_text_tokens = 300
-params.translate = true
-params.print_timestamps = false
-params.initial_prompt = "Initial prompt here."
-
-whisper.transcribe("path/to/audio.wav", params) do |whole_text|
-  puts whole_text
-end
-
-```
-
-### Preparing model ###
-
-Some models are prepared up-front:
-
-```ruby
-base_en = Whisper::Model.pre_converted_models["base.en"]
-whisper = Whisper::Context.new(base_en)
-```
-
-At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`:
-
-```ruby
-Whisper::Model.pre_converted_models["base"].clear_cache
-```
-
-You also can use shorthand for pre-converted models:
-
-```ruby
-whisper = Whisper::Context.new("base.en")
-```
-
-You can see the list of prepared model names by `Whisper::Model.preconverted_models.keys`:
-
-```ruby
-puts Whisper::Model.preconverted_models.keys
-# tiny
-# tiny.en
-# tiny-q5_1
-# tiny.en-q5_1
-# tiny-q8_0
-# base
-# base.en
-# base-q5_1
-# base.en-q5_1
-# base-q8_0
-#   :
-#   :
-```
-
-You can also use local model files you prepared:
-
-```ruby
-whisper = Whisper::Context.new("path/to/your/model.bin")
-```
-
-Or, you can download model files:
-
-```ruby
-model_uri = Whisper::Model::URI.new("http://example.net/uri/of/your/model.bin")
-whisper = Whisper::Context.new(model_uri)
-```
-
-See [models][] page for details.
-
-### Preparing audio file ###
-
-Currently, whisper.cpp accepts only 16-bit WAV files.
-
-API
---
-
-### Segments ###
-
-Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`:
-
-```ruby
-def format_time(time_ms)
-  sec, decimal_part = time_ms.divmod(1000)
-  min, sec = sec.divmod(60)
-  hour, min = min.divmod(60)
-  "%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
-end
-
-whisper.transcribe("path/to/audio.wav", params)
-
-whisper.each_segment.with_index do |segment, index|
-  line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
-    nth: index + 1,
-    st: format_time(segment.start_time),
-    ed: format_time(segment.end_time),
-    text: segment.text
-  }
-  line << " (speaker turned)" if segment.speaker_next_turn?
-  puts line
-end
-
-```
-
-You can also add hook to params called on new segment:
-
-```ruby
-# Add hook before calling #transcribe
-params.on_new_segment do |segment|
-  line = "[%{st} --> %{ed}] %{text}" % {
-    st: format_time(segment.start_time),
-    ed: format_time(segment.end_time),
-    text: segment.text
-  }
-  line << " (speaker turned)" if segment.speaker_next_turn?
-  puts line
-end
-
-whisper.transcribe("path/to/audio.wav", params)
-
-```
-
-### Models ###
-
-You can see model information:
-
-```ruby
-whisper = Whisper::Context.new("base")
-model = whisper.model
-
-model.n_vocab # => 51864
-model.n_audio_ctx # => 1500
-model.n_audio_state # => 512
-model.n_audio_head # => 8
-model.n_audio_layer # => 6
-model.n_text_ctx # => 448
-model.n_text_state # => 512
-model.n_text_head # => 8
-model.n_text_layer # => 6
-model.n_mels # => 80
-model.ftype # => 1
-model.type # => "base"
-
-```
-
-### Logging ###
-
-You can set log callback:
-
-```ruby
-prefix = "[MyApp] "
-log_callback = ->(level, buffer, user_data) {
-  case level
-  when Whisper::LOG_LEVEL_NONE
-    puts "#{user_data}none: #{buffer}"
-  when Whisper::LOG_LEVEL_INFO
-    puts "#{user_data}info: #{buffer}"
-  when Whisper::LOG_LEVEL_WARN
-    puts "#{user_data}warn: #{buffer}"
-  when Whisper::LOG_LEVEL_ERROR
-    puts "#{user_data}error: #{buffer}"
-  when Whisper::LOG_LEVEL_DEBUG
-    puts "#{user_data}debug: #{buffer}"
-  when Whisper::LOG_LEVEL_CONT
-    puts "#{user_data}same to previous: #{buffer}"
-  end
-}
-Whisper.log_set log_callback, prefix
-```
-
-Using this feature, you are also able to suppress log:
-
-```ruby
-Whisper.log_set ->(level, buffer, user_data) {
-  # do nothing
-}, nil
-Whisper::Context.new("base")
-```
-
-### Low-level API to transcribe ###
-
-You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility.
-
-```ruby
-require "whisper"
-require "wavefile"
-
-reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
-samples = reader.enum_for(:each_buffer).map(&:samples).flatten
-
-whisper = Whisper::Context.new("base")
-whisper.full(Whisper::Params.new, samples)
-whisper.each_segment do |segment|
-  puts segment.text
-end
-```
-
-The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
-
-License
-------
-
-The same to [whisper.cpp][].
-
-[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
-[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models
--- a/bindings/ruby/Rakefile
+++ b/bindings/ruby/Rakefile
@ -1,64 +0,0 @@
-require 'rake/clean'
-require "bundler/gem_tasks"
-require "rake/testtask"
-require_relative "extsources"
-
-SOURCES = FileList[]
-
-EXTSOURCES.each do |src|
-  basename = src.pathmap("%f")
-  dest = basename == "LICENSE" ? basename : src.pathmap("%{../..,ext}p")
-  dir = dest.pathmap("%d")
-  file src
-  directory dir
-  file dest => [src, dir] do |t|
-    cp t.source, t.name
-  end
-  SOURCES.include dest
-end
-
-CLEAN.include SOURCES
-CLEAN.include FileList["ext/*.o", "ext/*.metal", "ext/whisper.{so,bundle,dll}"]
-
-task build: ["ext/Makefile", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp", "whispercpp.gemspec"]
-
-directory "pkg"
-CLOBBER.include "pkg"
-
-LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
-SO_FILE = File.join("ext", LIB_NAME)
-LIB_FILE = File.join("lib", LIB_NAME)
-
-file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
-  Dir.chdir "ext" do
-    ruby "extconf.rb"
-  end
-end
-
-file SO_FILE => "ext/Makefile" do |t|
-  Dir.chdir "ext" do
-    sh "make"
-  end
-end
-CLEAN.include SO_FILE
-
-directory "lib"
-file LIB_FILE => [SO_FILE, "lib"] do |t|
-  copy t.source, t.name
-end
-CLEAN.include LIB_FILE
-
-Rake::TestTask.new do |t|
-  t.test_files = FileList["tests/test_*.rb"]
-end
-
-TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
-file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
-  Dir.chdir "tests/jfk_reader" do
-    ruby "extconf.rb"
-    sh "make"
-  end
-end
-CLEAN.include "tests/jfk_reader/jfk_reader.{o,#{RbConfig::CONFIG['DLEXT']}}"
-
-task test: [LIB_FILE, TEST_MEMORY_VIEW]
--- a/bindings/ruby/ext/.gitignore
+++ b/bindings/ruby/ext/.gitignore
@ -1,13 +1,7 @@
 Makefile
-whisper.so
+ggml.c
+ggml.h
 whisper.bundle
-whisper.dll
-scripts/get-flags.mk
-*.o
-*.c
-*.cpp
-*.h
-*.m
-*.metal
-!ruby_whisper.cpp
-!ruby_whisper.h
+whisper.cpp
+whisper.h
+dr_wav.h
--- a/bindings/ruby/ext/cpu.mk
+++ b/bindings/ruby/ext/cpu.mk
@ -1,9 +0,0 @@
-ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
-	ggml/src/ggml-cpu/ggml-cpu.cpp \
-	ggml/include/ggml-backend.h \
-	ggml/include/ggml.h \
-	ggml/include/ggml-alloc.h \
-	ggml/src/ggml-backend-impl.h \
-	ggml/include/ggml-cpu.h \
-	ggml/src/ggml-impl.h
-	$(CXX) $(CXXFLAGS)   -c $< -o $@
--- a/bindings/ruby/ext/extconf.rb
+++ b/bindings/ruby/ext/extconf.rb
@ -1,10 +1,13 @@
 require 'mkmf'
+system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
+system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
+system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
+system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
+system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
+

 # need to use c++ compiler flags
-$CXXFLAGS << ' -std=c++17'
-
-$LDFLAGS << ' -lstdc++'
-
+$CXXFLAGS << ' -std=c++11'
 # Set to true when building binary gems
 if enable_config('static-stdlib', false)
  $LDFLAGS << ' -static-libgcc -static-libstdc++'
@ -15,185 +18,4 @@ if enable_config('march-tune-native', false)
  $CXXFLAGS << ' -march=native -mtune=native'
 end

-if ENV['WHISPER_METAL']
-  $GGML_METAL ||= true
-  $DEPRECATE_WARNING ||= true
-end
-
-$UNAME_S = `uname -s`.chomp
-$UNAME_P = `uname -p`.chomp
-$UNAME_M = `uname -m`.chomp
-
-if $UNAME_S == 'Darwin'
-  unless ENV['GGML_NO_METAL']
-    $GGML_METAL ||= true
-  end
-  $GGML_NO_OPENMP ||= true
-end
-
-if $GGML_METAL
-  $GGML_METAL_EMBED_LIBRARY = true
-end
-
-$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
-$MK_CFLAGS   = '-std=c11   -fPIC'
-$MK_CXXFLAGS = '-std=c++17 -fPIC'
-$MK_NVCCFLAGS = '-std=c++17'
-$MK_LDFLAGS = ''
-
-$OBJ_GGML = []
-$OBJ_WHISPER = []
-$OBJ_COMMON = []
-$OBJ_SDL = []
-
-$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
-
-if $UNAME_S == 'Linux'
-  $MK_CPPFLAGS << ' -D_GNU_SOURCE'
-end
-
-if $UNAME_S == 'Darwin'
-  $MK_CPPFLAGS << ' -D_DARWIN_C_SOURCE'
-end
-
-if ENV['WHISPER_DEBUG']
-  $MK_CFLAGS    << ' -O0 -g'
-  $MK_CXXFLAGS  << ' -O0 -g'
-  $MK_LDFLAGS   << ' -g'
-  $MK_NVCCFLAGS << ' -O0 -g'
-else
-  $MK_CPPFLAGS   << ' -DNDEBUG'
-  $MK_CFLAGS     << ' -O3'
-  $MK_CXXFLAGS   << ' -O3'
-  $MK_NVCCFLAGS  << ' -O3'
-end
-
-$WARN_FLAGS =
-  ' -Wall' <<
-  ' -Wextra' <<
-  ' -Wpedantic' <<
-  ' -Wcast-qual' <<
-  ' -Wno-unused-function'
-
-$MK_CFLAGS <<
-  $WARN_FLAGS <<
-  ' -Wshadow' <<
-  ' -Wstrict-prototypes' <<
-  ' -Wpointer-arith' <<
-  ' -Wmissing-prototypes' <<
-  ' -Werror=implicit-int' <<
-  ' -Werror=implicit-function-declaration'
-
-$MK_CXXFLAGS <<
-  $WARN_FLAGS <<
-  ' -Wmissing-declarations' <<
-  ' -Wmissing-noreturn'
-
-unless `#{cc_command} #{$LDFLAGS} -Wl,-v 2>&1`.chomp.include? 'dyld-1015.7'
-  $MK_CPPFLAGS << ' -DHAVE_BUGGY_APPLE_LINKER'
-end
-
-if %w[Linux Darwin FreeBSD NetBSD OpenBSD Haiku].include? $UNAME_S
-  $MK_CFLAGS   << ' -pthread'
-  $MK_CXXFLAGS << ' -pthread'
-end
-
-unless $_WIN32
-  $DSO_EXT = '.so'
-else
-  $DSO_EXT = '.dll'
-end
-
-unless ENV['RISCV']
-  if %w[x86_64 i686 amd64].include? $UNAME_M
-    $HOST_CXXFLAGS ||= ''
-
-    $MK_CFLAGS     << ' -march=native -mtune=native'
-    $HOST_CXXFLAGS << ' -march=native -mtune=native'
-  end
-else
-  $MK_CFLAGS   << ' -march=rv64gcv -mabi=lp64d'
-  $MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
-end
-
-unless ENV['GGML_NO_ACCELERATE']
-  if $UNAME_S == 'Darwin'
-    $MK_CPPFLAGS << ' -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE'
-    $MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
-    $MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
-    $MK_LDFLAGS  << ' -framework Accelerate'
-    $OBJ_GGML    << 'ggml/src/ggml-blas/ggml-blas.o'
-  end
-end
-
-if ENV['GGML_OPENBLAS']
-  $MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
-  $MK_CFLAGS   << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
-  $MK_LDFLAGS  << " #{`pkg-config --libs openblas`}"
-  $OBJ_GGML    << 'ggml/src/ggml-blas/ggml-blas.o'
-end
-
-if ENV['GGML_OPENBLAS64']
-  $MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
-  $MK_CFLAGS   << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
-  $MK_LDFLAGS  << " #{`pkg-config --libs openblas64`}"
-  $OBJ_GGML    << 'ggml/src/ggml-blas/ggml-blas.o'
-end
-
-if $GGML_METAL
-  $MK_CPPFLAGS << ' -DGGML_USE_METAL'
-  $MK_LDFLAGS  << ' -framework Foundation -framework Metal -framework MetalKit'
-  $OBJ_GGML    << 'ggml/src/ggml-metal/ggml-metal.o'
-
-  if ENV['GGML_METAL_NDEBUG']
-    $MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
-  end
-
-  if $GGML_METAL_EMBED_LIBRARY
-    $MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
-    $OBJ_GGML    << 'ggml/src/ggml-metal/ggml-metal-embed.o'
-  end
-end
-
-$OBJ_GGML <<
-  'ggml/src/ggml.o' <<
-  'ggml/src/ggml-alloc.o' <<
-  'ggml/src/ggml-backend.o' <<
-  'ggml/src/ggml-backend-reg.o' <<
-  'ggml/src/ggml-opt.o' <<
-  'ggml/src/ggml-quants.o' <<
-  'ggml/src/ggml-threading.o' <<
-  'ggml/src/ggml-cpu/ggml-cpu.o' <<
-  'ggml/src/ggml-cpu/ggml-cpu-cpp.o' <<
-  'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
-  'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
-  'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
-  'ggml/src/ggml-cpu/ggml-cpu-traits.o'
-
-$OBJ_WHISPER <<
-  'src/whisper.o'
-
-$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
-$objs << "ruby_whisper.o"
-
-$CPPFLAGS  = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
-$CFLAGS    = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
-$BASE_CXXFLAGS = "#{$MK_CXXFLAGS} #{$CXXFLAGS}"
-$CXXFLAGS  = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
-$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
-$LDFLAGS   = "#{$MK_LDFLAGS} #{$LDFLAGS}"
-
 create_makefile('whisper')
-
-File.open 'Makefile', 'a' do |file|
-  file.puts 'include scripts/get-flags.mk'
-  file.puts 'include cpu.mk'
-
-  if $GGML_METAL
-    file.puts 'include metal.mk'
-
-    if $GGML_METAL_EMBED_LIBRARY
-      file.puts 'include metal-embed.mk'
-    end
-  end
-end
--- a/bindings/ruby/ext/metal-embed.mk
+++ b/bindings/ruby/ext/metal-embed.mk
@ -1,17 +0,0 @@
-ggml/src/ggml-metal/ggml-metal-embed.o: \
-	ggml/src/ggml-metal/ggml-metal.metal \
-	ggml/src/ggml-metal/ggml-metal-impl.h \
-	ggml/src/ggml-common.h
-	@echo "Embedding Metal library"
-	@sed -e '/__embed_ggml-common.h__/r      ggml/src/ggml-common.h'                -e '/__embed_ggml-common.h__/d'      < ggml/src/ggml-metal/ggml-metal.metal           > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
-	@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
-	$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
-	@echo ".section __DATA, __ggml_metallib"                       >  $(TEMP_ASSEMBLY)/ggml-metal-embed.s
-	@echo ".globl _ggml_metallib_start"                            >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
-	@echo "_ggml_metallib_start:"                                  >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
-	@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
-	@echo ".globl _ggml_metallib_end"                              >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
-	@echo "_ggml_metallib_end:"                                    >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
-	$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
-	@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
-	@rmdir ${TEMP_ASSEMBLY}
--- a/bindings/ruby/ext/metal.mk
+++ b/bindings/ruby/ext/metal.mk
@ -1,6 +0,0 @@
-ggml/src/ggml-metal/ggml-metal.o: \
-	ggml/src/ggml-metal/ggml-metal.m \
-	ggml/src/ggml-metal/ggml-metal-impl.h \
-	ggml/include/ggml-metal.h \
-	ggml/include/ggml.h
-	$(CC) $(CFLAGS) -c $< -o $@
--- a/bindings/ruby/ext/ruby_whisper.cpp
+++ b/bindings/ruby/ext/ruby_whisper.cpp
--- a/bindings/ruby/ext/ruby_whisper.h
+++ b/bindings/ruby/ext/ruby_whisper.h
@ -3,13 +3,6 @@

 #include "whisper.h"

-typedef struct {
-  VALUE *context;
-  VALUE user_data;
-  VALUE callback;
-  VALUE callbacks;
-} ruby_whisper_callback_container;
-
 typedef struct {
  struct whisper_context *context;
 } ruby_whisper;
@ -17,9 +10,6 @@ typedef struct {
 typedef struct {
  struct whisper_full_params params;
  bool diarize;
-  ruby_whisper_callback_container *new_segment_callback_container;
-  ruby_whisper_callback_container *progress_callback_container;
-  ruby_whisper_callback_container *abort_callback_container;
 } ruby_whisper_params;

 #endif
--- a/bindings/ruby/extsources.rb
+++ b/bindings/ruby/extsources.rb
@ -1,6 +0,0 @@
-require "yaml"
-
-sources = `git ls-files -z ../..`.split("\x0")
-paths = YAML.load_file("../../.github/workflows/bindings-ruby.yml")[true]["push"]["paths"]
-paths.delete "bindings/ruby/**"
-EXTSOURCES = (Dir.glob(paths, base: "../..").collect {|path| "../../#{path}"} << "../../LICENSE") & sources
--- a/bindings/ruby/lib/whisper.rb
+++ b/bindings/ruby/lib/whisper.rb
@ -1,2 +0,0 @@
-require "whisper.so"
-require "whisper/model/uri"
--- a/bindings/ruby/lib/whisper/model/uri.rb
+++ b/bindings/ruby/lib/whisper/model/uri.rb
@ -1,163 +0,0 @@
-require "whisper.so"
-require "uri"
-require "net/http"
-require "time"
-require "pathname"
-require "io/console/size"
-
-class Whisper::Model
-  class URI
-    def initialize(uri)
-      @uri = URI(uri)
-    end
-
-    def to_path
-      cache
-      cache_path.to_path
-    end
-
-    def clear_cache
-      path = cache_path
-      path.delete if path.exist?
-    end
-
-    private
-
-    def cache_path
-      base_cache_dir/@uri.host/@uri.path[1..]
-    end
-
-    def base_cache_dir
-      base = case RUBY_PLATFORM
-             when /mswin|mingw/
-               ENV.key?("LOCALAPPDATA") ? Pathname(ENV["LOCALAPPDATA"]) : Pathname(Dir.home)/"AppData/Local"
-             when /darwin/
-               Pathname(Dir.home)/"Library/Caches"
-             else
-               ENV.key?("XDG_CACHE_HOME") ? ENV["XDG_CACHE_HOME"] : Pathname(Dir.home)/".cache"
-             end
-      base/"whisper.cpp"
-    end
-
-    def cache
-      path = cache_path
-      headers = {}
-      headers["if-modified-since"] = path.mtime.httpdate if path.exist?
-      request @uri, headers
-      path
-    end
-
-    def request(uri, headers)
-      Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == "https" do |http|
-        request = Net::HTTP::Get.new(uri, headers)
-        http.request request do |response|
-          case response
-          when Net::HTTPNotModified
-            # noop
-          when Net::HTTPOK
-            download response
-          when Net::HTTPRedirection
-            request URI(response["location"]), headers
-          else
-            return if headers.key?("if-modified-since") # Use cache file
-
-            raise "#{response.code} #{response.message}\n#{response.body}"
-          end
-        end
-      end
-    end
-
-    def download(response)
-      path = cache_path
-      path.dirname.mkpath unless path.dirname.exist?
-      downloading_path = Pathname("#{path}.downloading")
-      size = response.content_length
-      downloading_path.open "wb" do |file|
-        downloaded = 0
-        response.read_body do |chunk|
-          file << chunk
-          downloaded += chunk.bytesize
-          show_progress downloaded, size
-        end
-        $stderr.puts
-      end
-      downloading_path.rename path
-    end
-
-    def show_progress(current, size)
-      progress_rate_available = size && $stderr.tty?
-
-      unless @prev
-        @prev = Time.now
-        $stderr.puts "Downloading #{@uri} to #{cache_path}"
-      end
-
-      now = Time.now
-
-      if progress_rate_available
-        return if now - @prev < 1 && current < size
-
-        progress_width = 20
-        progress = current.to_f / size
-        arrow_length = progress * progress_width
-        arrow = "=" * (arrow_length - 1) + ">" + " " * (progress_width - arrow_length)
-        line = "[#{arrow}] (#{format_bytesize(current)} / #{format_bytesize(size)})"
-        padding = ' ' * ($stderr.winsize[1] - line.size)
-        $stderr.print "\r#{line}#{padding}"
-      else
-        return if now - @prev < 1
-
-        $stderr.print "."
-      end
-      @prev = now
-    end
-
-    def format_bytesize(bytesize)
-      return "0.0 B" if bytesize.zero?
-
-      units = %w[B KiB MiB GiB TiB]
-      exp = (Math.log(bytesize) / Math.log(1024)).to_i
-      format("%.1f %s", bytesize.to_f / 1024 ** exp, units[exp])
-    end
-  end
-
-  @pre_converted_models = {}
-  %w[
-    tiny
-    tiny.en
-    tiny-q5_1
-    tiny.en-q5_1
-    tiny-q8_0
-    base
-    base.en
-    base-q5_1
-    base.en-q5_1
-    base-q8_0
-    small
-    small.en
-    small.en-tdrz
-    small-q5_1
-    small.en-q5_1
-    small-q8_0
-    medium
-    medium.en
-    medium-q5_0
-    medium.en-q5_0
-    medium-q8_0
-    large-v1
-    large-v2
-    large-v2-q5_0
-    large-v2-q8_0
-    large-v3
-    large-v3-q5_0
-    large-v3-turbo
-    large-v3-turbo-q5_0
-    large-v3-turbo-q8_0
-  ].each do |name|
-    @pre_converted_models[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
-  end
-
-  class << self
-    attr_reader :pre_converted_models
-  end
-end
--- a/bindings/ruby/tests/helper.rb
+++ b/bindings/ruby/tests/helper.rb
@ -1,24 +0,0 @@
-require "test/unit"
-require "whisper"
-require_relative "jfk_reader/jfk_reader"
-
-class TestBase < Test::Unit::TestCase
-  AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
-
-  class << self
-    attr_reader :whisper
-
-    def startup
-      @whisper = Whisper::Context.new("base.en")
-      params = Whisper::Params.new
-      params.print_timestamps = false
-      @whisper.transcribe(TestBase::AUDIO, params)
-    end
-  end
-
-  private
-
-  def whisper
-    self.class.whisper
-  end
-end
--- a/bindings/ruby/tests/jfk_reader/.gitignore
+++ b/bindings/ruby/tests/jfk_reader/.gitignore
@ -1,5 +0,0 @@
-Makefile
-jfk_reader.o
-jfk_reader.so
-jfk_reader.bundle
-jfk_reader.dll
--- a/bindings/ruby/tests/jfk_reader/extconf.rb
+++ b/bindings/ruby/tests/jfk_reader/extconf.rb
@ -1,3 +0,0 @@
-require "mkmf"
-
-create_makefile("jfk_reader")
--- a/bindings/ruby/tests/jfk_reader/jfk_reader.c
+++ b/bindings/ruby/tests/jfk_reader/jfk_reader.c
@ -1,68 +0,0 @@
-#include <ruby.h>
-#include <ruby/memory_view.h>
-#include <ruby/encoding.h>
-
-static VALUE
-jfk_reader_initialize(VALUE self, VALUE audio_path)
-{
-  rb_iv_set(self, "audio_path", audio_path);
-  return Qnil;
-}
-
-static bool
-jfk_reader_get_memory_view(const VALUE obj, rb_memory_view_t *view, int flags)
-{
-  VALUE audio_path = rb_iv_get(obj, "audio_path");
-  const char *audio_path_str = StringValueCStr(audio_path);
-  const int n_samples = 176000;
-  float *data = (float *)malloc(n_samples * sizeof(float));
-  short *samples = (short *)malloc(n_samples * sizeof(short));
-  FILE *file = fopen(audio_path_str, "rb");
-
-  fseek(file, 78, SEEK_SET);
-  fread(samples, sizeof(short), n_samples, file);
-  fclose(file);
-  for (int i = 0; i < n_samples; i++) {
-    data[i] = samples[i]/32768.0;
-  }
-
-  view->obj = obj;
-  view->data = (void *)data;
-  view->byte_size = sizeof(float) * n_samples;
-  view->readonly = true;
-  view->format = "f";
-  view->item_size = sizeof(float);
-  view->item_desc.components = NULL;
-  view->item_desc.length = 0;
-  view->ndim = 1;
-  view->shape = NULL;
-  view->sub_offsets = NULL;
-  view->private_data = NULL;
-
-  return true;
-}
-
-static bool
-jfk_reader_release_memory_view(const VALUE obj, rb_memory_view_t *view)
-{
-  return true;
-}
-
-static bool
-jfk_reader_memory_view_available_p(const VALUE obj)
-{
-  return true;
-}
-
-static const rb_memory_view_entry_t jfk_reader_view_entry = {
-  jfk_reader_get_memory_view,
-  jfk_reader_release_memory_view,
-  jfk_reader_memory_view_available_p
-};
-
-void Init_jfk_reader(void)
-{
-  VALUE cJFKReader = rb_define_class("JFKReader", rb_cObject);
-  rb_memory_view_register(cJFKReader, &jfk_reader_view_entry);
-  rb_define_method(cJFKReader, "initialize", jfk_reader_initialize, 1);
-}
--- a/bindings/ruby/tests/test_callback.rb
+++ b/bindings/ruby/tests/test_callback.rb
@ -1,160 +0,0 @@
-require_relative "helper"
-
-class TestCallback < TestBase
-  def setup
-    GC.start
-    @params = Whisper::Params.new
-    @whisper = Whisper::Context.new("base.en")
-    @audio = File.join(AUDIO)
-  end
-
-  def test_new_segment_callback
-    @params.new_segment_callback = ->(context, state, n_new, user_data) {
-      assert_kind_of Integer, n_new
-      assert n_new > 0
-      assert_same @whisper, context
-
-      n_segments = context.full_n_segments
-      n_new.times do |i|
-        i_segment = n_segments - 1 + i
-        start_time = context.full_get_segment_t0(i_segment) * 10
-        end_time = context.full_get_segment_t1(i_segment) * 10
-        text = context.full_get_segment_text(i_segment)
-
-        assert_kind_of Integer, start_time
-        assert start_time >= 0
-        assert_kind_of Integer, end_time
-        assert end_time > 0
-        assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
-      end
-    }
-
-    @whisper.transcribe(@audio, @params)
-  end
-
-  def test_new_segment_callback_closure
-    search_word = "what"
-    @params.new_segment_callback = ->(context, state, n_new, user_data) {
-      n_segments = context.full_n_segments
-      n_new.times do |i|
-        i_segment = n_segments - 1 + i
-        text = context.full_get_segment_text(i_segment)
-        if text.include?(search_word)
-          t0 = context.full_get_segment_t0(i_segment)
-          t1 = context.full_get_segment_t1(i_segment)
-          raise "search word '#{search_word}' found at between #{t0} and #{t1}"
-        end
-      end
-    }
-
-    assert_raise RuntimeError do
-      @whisper.transcribe(@audio, @params)
-    end
-  end
-
-  def test_new_segment_callback_user_data
-    udata = Object.new
-    @params.new_segment_callback_user_data = udata
-    @params.new_segment_callback = ->(context, state, n_new, user_data) {
-      assert_same udata, user_data
-    }
-
-    @whisper.transcribe(@audio, @params)
-  end
-
-  def test_new_segment_callback_user_data_gc
-    @params.new_segment_callback_user_data = "My user data"
-    @params.new_segment_callback = ->(context, state, n_new, user_data) {
-      assert_equal "My user data", user_data
-    }
-    GC.start
-
-    assert_same @whisper, @whisper.transcribe(@audio, @params)
-  end
-
-  def test_progress_callback
-    first = nil
-    last = nil
-    @params.progress_callback = ->(context, state, progress, user_data) {
-      assert_kind_of Integer, progress
-      assert 0 <= progress && progress <= 100
-      assert_same @whisper, context
-      first = progress if first.nil?
-      last = progress
-    }
-    @whisper.transcribe(@audio, @params)
-    assert_equal 0, first
-    assert_equal 100, last
-  end
-
-  def test_progress_callback_user_data
-    udata = Object.new
-    @params.progress_callback_user_data = udata
-    @params.progress_callback = ->(context, state, n_new, user_data) {
-      assert_same udata, user_data
-    }
-
-    @whisper.transcribe(@audio, @params)
-  end
-
-  def test_on_progress
-    first = nil
-    last = nil
-    @params.on_progress do |progress|
-      assert_kind_of Integer, progress
-      assert 0 <= progress && progress <= 100
-      first = progress if first.nil?
-      last = progress
-    end
-    @whisper.transcribe(@audio, @params)
-    assert_equal 0, first
-    assert_equal 100, last
-  end
-
-  def test_abort_callback
-    i = 0
-    @params.abort_callback = ->(user_data) {
-      assert_nil user_data
-      i += 1
-      return false
-    }
-    @whisper.transcribe(@audio, @params)
-    assert i > 0
-  end
-
-  def test_abort_callback_abort
-    i = 0
-    @params.abort_callback = ->(user_data) {
-      i += 1
-      return i == 3
-    }
-    @whisper.transcribe(@audio, @params)
-    assert_equal 3, i
-  end
-
-  def test_abort_callback_user_data
-    udata = Object.new
-    @params.abort_callback_user_data = udata
-    yielded = nil
-    @params.abort_callback = ->(user_data) {
-      yielded = user_data
-    }
-    @whisper.transcribe(@audio, @params)
-    assert_same udata, yielded
-  end
-
-  def test_abort_on
-    do_abort = false
-    aborted_from_callback = false
-    @params.on_new_segment do |segment|
-      do_abort = true if segment.text.match? /ask/
-    end
-    i = 0
-    @params.abort_on do
-      i += 1
-      do_abort
-    end
-    @whisper.transcribe(@audio, @params)
-    assert i > 0
-  end
-end
--- a/bindings/ruby/tests/test_error.rb
+++ b/bindings/ruby/tests/test_error.rb
@ -1,20 +0,0 @@
-require_relative "helper"
-
-class TestError < TestBase
-  def test_error
-    error = Whisper::Error.new(-2)
-    assert_equal "failed to compute log mel spectrogram", error.message
-    assert_equal -2, error.code
-  end
-
-  def test_unknown_error
-    error = Whisper::Error.new(-20)
-    assert_equal "unknown error", error.message
-  end
-
-  def test_non_int_code
-    assert_raise TypeError do
-      error = Whisper::Error.new("non int")
-    end
-  end
-end
--- a/bindings/ruby/tests/test_model.rb
+++ b/bindings/ruby/tests/test_model.rb
@ -1,71 +0,0 @@
-require_relative "helper"
-require "pathname"
-
-class TestModel < TestBase
-  def test_model
-    whisper = Whisper::Context.new("base.en")
-    assert_instance_of Whisper::Model, whisper.model
-  end
-
-  def test_attributes
-    whisper = Whisper::Context.new("base.en")
-    model = whisper.model
-
-    assert_equal 51864, model.n_vocab
-    assert_equal 1500, model.n_audio_ctx
-    assert_equal 512, model.n_audio_state
-    assert_equal 8, model.n_audio_head
-    assert_equal 6, model.n_audio_layer
-    assert_equal 448, model.n_text_ctx
-    assert_equal 512, model.n_text_state
-    assert_equal 8, model.n_text_head
-    assert_equal 6, model.n_text_layer
-    assert_equal 80, model.n_mels
-    assert_equal 1, model.ftype
-    assert_equal "base", model.type
-  end
-
-  def test_gc
-    model = Whisper::Context.new("base.en").model
-    GC.start
-
-    assert_equal 51864, model.n_vocab
-    assert_equal 1500, model.n_audio_ctx
-    assert_equal 512, model.n_audio_state
-    assert_equal 8, model.n_audio_head
-    assert_equal 6, model.n_audio_layer
-    assert_equal 448, model.n_text_ctx
-    assert_equal 512, model.n_text_state
-    assert_equal 8, model.n_text_head
-    assert_equal 6, model.n_text_layer
-    assert_equal 80, model.n_mels
-    assert_equal 1, model.ftype
-    assert_equal "base", model.type
-  end
-
-  def test_pathname
-    path = Pathname(Whisper::Model.pre_converted_models["base.en"].to_path)
-    whisper = Whisper::Context.new(path)
-    model = whisper.model
-
-    assert_equal 51864, model.n_vocab
-    assert_equal 1500, model.n_audio_ctx
-    assert_equal 512, model.n_audio_state
-    assert_equal 8, model.n_audio_head
-    assert_equal 6, model.n_audio_layer
-    assert_equal 448, model.n_text_ctx
-    assert_equal 512, model.n_text_state
-    assert_equal 8, model.n_text_head
-    assert_equal 6, model.n_text_layer
-    assert_equal 80, model.n_mels
-    assert_equal 1, model.ftype
-    assert_equal "base", model.type
-  end
-
-  def test_auto_download
-    path = Whisper::Model.pre_converted_models["base.en"].to_path
-
-    assert_path_exist path
-    assert_equal 147964211, File.size(path)
-  end
-end
--- a/bindings/ruby/tests/test_package.rb
+++ b/bindings/ruby/tests/test_package.rb
@ -1,31 +0,0 @@
-require_relative "helper"
-require 'tempfile'
-require 'tmpdir'
-require 'shellwords'
-
-class TestPackage < TestBase
-  def test_build
-    Tempfile.create do |file|
-      assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
-      assert file.size > 0
-      assert_path_exist file.to_path
-    end
-  end
-
-  sub_test_case "Building binary on installation" do
-    def setup
-      system "rake", "build", exception: true
-    end
-
-    def test_install
-      match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
-      filename = match_data[1]
-      version = match_data[2]
-      basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
-      Dir.mktmpdir do |dir|
-        system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{filename.shellescape}", exception: true
-        assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename)
-      end
-    end
-  end
-end
--- a/bindings/ruby/tests/test_params.rb
+++ b/bindings/ruby/tests/test_params.rb
@ -1,160 +0,0 @@
-require_relative "helper"
-
-class TestParams < TestBase
-  def setup
-    @params  = Whisper::Params.new
-  end
-
-  def test_language
-    @params.language = "en"
-    assert_equal @params.language, "en"
-    @params.language = "auto"
-    assert_equal @params.language, "auto"
-  end
-
-  def test_offset
-    @params.offset = 10_000
-    assert_equal @params.offset, 10_000
-    @params.offset = 0
-    assert_equal @params.offset, 0
-  end
-
-  def test_duration
-    @params.duration = 60_000
-    assert_equal @params.duration, 60_000
-    @params.duration = 0
-    assert_equal @params.duration, 0
-  end
-
-  def test_max_text_tokens
-    @params.max_text_tokens = 300
-    assert_equal @params.max_text_tokens, 300
-    @params.max_text_tokens = 0
-    assert_equal @params.max_text_tokens, 0
-  end
-
-  def test_translate
-    @params.translate = true
-    assert @params.translate
-    @params.translate = false
-    assert !@params.translate
-  end
-
-  def test_no_context
-    @params.no_context = true
-    assert @params.no_context
-    @params.no_context = false
-    assert !@params.no_context
-  end
-
-  def test_single_segment
-    @params.single_segment = true
-    assert @params.single_segment
-    @params.single_segment = false
-    assert !@params.single_segment
-  end
-
-  def test_print_special
-    @params.print_special = true
-    assert @params.print_special
-    @params.print_special = false
-    assert !@params.print_special
-  end
-
-  def test_print_progress
-    @params.print_progress = true
-    assert @params.print_progress
-    @params.print_progress = false
-    assert !@params.print_progress
-  end
-
-  def test_print_realtime
-    @params.print_realtime = true
-    assert @params.print_realtime
-    @params.print_realtime = false
-    assert !@params.print_realtime
-  end
-
-  def test_print_timestamps
-    @params.print_timestamps = true
-    assert @params.print_timestamps
-    @params.print_timestamps = false
-    assert !@params.print_timestamps
-  end
-
-  def test_suppress_blank
-    @params.suppress_blank = true
-    assert @params.suppress_blank
-    @params.suppress_blank = false
-    assert !@params.suppress_blank
-  end
-
-  def test_suppress_nst
-    @params.suppress_nst = true
-    assert @params.suppress_nst
-    @params.suppress_nst = false
-    assert !@params.suppress_nst
-  end
-
-  def test_token_timestamps
-    @params.token_timestamps = true
-    assert @params.token_timestamps
-    @params.token_timestamps = false
-    assert !@params.token_timestamps
-  end
-
-  def test_split_on_word
-    @params.split_on_word = true
-    assert @params.split_on_word
-    @params.split_on_word = false
-    assert !@params.split_on_word
-  end
-
-  def test_initial_prompt
-    assert_nil @params.initial_prompt
-    @params.initial_prompt = "You are a polite person."
-    assert_equal "You are a polite person.", @params.initial_prompt
-  end
-
-  def test_temperature
-    assert_equal 0.0, @params.temperature
-    @params.temperature = 0.5
-    assert_equal 0.5, @params.temperature
-  end
-
-  def test_max_initial_ts
-    assert_equal 1.0, @params.max_initial_ts
-    @params.max_initial_ts = 600.0
-    assert_equal 600.0, @params.max_initial_ts
-  end
-
-  def test_length_penalty
-    assert_equal -1.0, @params.length_penalty
-    @params.length_penalty = 0.5
-    assert_equal 0.5, @params.length_penalty
-  end
-
-  def test_temperature_inc
-    assert_in_delta 0.2, @params.temperature_inc
-    @params.temperature_inc = 0.5
-    assert_in_delta 0.5, @params.temperature_inc
-  end
-
-  def test_entropy_thold
-    assert_in_delta 2.4, @params.entropy_thold
-    @params.entropy_thold = 3.0
-    assert_in_delta 3.0, @params.entropy_thold
-  end
-
-  def test_logprob_thold
-    assert_in_delta -1.0, @params.logprob_thold
-    @params.logprob_thold = -0.5
-    assert_in_delta -0.5, @params.logprob_thold
-  end
-
-  def test_no_speech_thold
-    assert_in_delta 0.6, @params.no_speech_thold
-    @params.no_speech_thold = 0.2
-    assert_in_delta 0.2, @params.no_speech_thold
-  end
-end
--- a/bindings/ruby/tests/test_segment.rb
+++ b/bindings/ruby/tests/test_segment.rb
@ -1,74 +0,0 @@
-require_relative "helper"
-
-class TestSegment < TestBase
-  def test_iteration
-    whisper.each_segment do |segment|
-      assert_instance_of Whisper::Segment, segment
-    end
-  end
-
-  def test_enumerator
-    enum = whisper.each_segment
-    assert_instance_of Enumerator, enum
-    enum.to_a.each_with_index do |segment, index|
-      assert_instance_of Whisper::Segment, segment
-      assert_kind_of Integer, index
-    end
-  end
-
-  def test_start_time
-    i = 0
-    whisper.each_segment do |segment|
-      assert_equal 0, segment.start_time if i == 0
-      i += 1
-    end
-  end
-
-  def test_end_time
-    i = 0
-    whisper.each_segment do |segment|
-      assert_equal whisper.full_get_segment_t1(i) * 10, segment.end_time
-      i += 1
-    end
-  end
-
-  def test_no_speech_prob
-    no_speech_prob = nil
-    whisper.each_segment do |segment|
-      no_speech_prob = segment.no_speech_prob
-    end
-    assert no_speech_prob > 0.0
-  end
-
-  def test_on_new_segment
-    params = Whisper::Params.new
-    seg = nil
-    index = 0
-    params.on_new_segment do |segment|
-      assert_instance_of Whisper::Segment, segment
-      if index == 0
-        seg = segment
-        assert_equal 0, segment.start_time
-        assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
-      end
-      index += 1
-    end
-    whisper.transcribe(AUDIO, params)
-    assert_equal 0, seg.start_time
-    assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
-  end
-
-  def test_on_new_segment_twice
-    params = Whisper::Params.new
-    seg = nil
-    params.on_new_segment do |segment|
-      seg = segment
-      return
-    end
-    params.on_new_segment do |segment|
-      assert_same seg, segment
-      return
-    end
-    whisper.transcribe(AUDIO, params)
-  end
-end
--- a/bindings/ruby/tests/test_whisper.rb
+++ b/bindings/ruby/tests/test_whisper.rb
@ -1,217 +1,138 @@
-require_relative "helper"
-require "stringio"
-require "etc"
+TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
+EXTDIR = File.join(TOPDIR, 'ext')
+#$LIBDIR = File.join(TOPDIR, 'lib')
+#$:.unshift(LIBDIR)
+$:.unshift(EXTDIR)

-# Exists to detect memory-related bug
-Whisper.log_set ->(level, buffer, user_data) {}, nil
+require 'whisper'
+require 'test/unit'

-class TestWhisper < TestBase
+class TestWhisper < Test::Unit::TestCase
  def setup
    @params  = Whisper::Params.new
  end

+  def test_language
+    @params.language = "en"
+    assert_equal @params.language, "en"
+    @params.language = "auto"
+    assert_equal @params.language, "auto"
+  end
+
+  def test_offset
+    @params.offset = 10_000
+    assert_equal @params.offset, 10_000
+    @params.offset = 0
+    assert_equal @params.offset, 0
+  end
+
+  def test_duration
+    @params.duration = 60_000
+    assert_equal @params.duration, 60_000
+    @params.duration = 0
+    assert_equal @params.duration, 0
+  end
+
+  def test_max_text_tokens
+    @params.max_text_tokens = 300
+    assert_equal @params.max_text_tokens, 300
+    @params.max_text_tokens = 0
+    assert_equal @params.max_text_tokens, 0
+  end
+
+  def test_translate
+    @params.translate = true
+    assert @params.translate
+    @params.translate = false
+    assert !@params.translate
+  end
+
+  def test_no_context
+    @params.no_context = true
+    assert @params.no_context
+    @params.no_context = false
+    assert !@params.no_context
+  end
+
+  def test_single_segment
+    @params.single_segment = true
+    assert @params.single_segment
+    @params.single_segment = false
+    assert !@params.single_segment
+  end
+
+  def test_print_special
+    @params.print_special = true
+    assert @params.print_special
+    @params.print_special = false
+    assert !@params.print_special
+  end
+
+  def test_print_progress
+    @params.print_progress = true
+    assert @params.print_progress
+    @params.print_progress = false
+    assert !@params.print_progress
+  end
+
+  def test_print_realtime
+    @params.print_realtime = true
+    assert @params.print_realtime
+    @params.print_realtime = false
+    assert !@params.print_realtime
+  end
+
+  def test_print_timestamps
+    @params.print_timestamps = true
+    assert @params.print_timestamps
+    @params.print_timestamps = false
+    assert !@params.print_timestamps
+  end
+
+  def test_suppress_blank
+    @params.suppress_blank = true
+    assert @params.suppress_blank
+    @params.suppress_blank = false
+    assert !@params.suppress_blank
+  end
+
+  def test_suppress_non_speech_tokens
+    @params.suppress_non_speech_tokens = true
+    assert @params.suppress_non_speech_tokens
+    @params.suppress_non_speech_tokens = false
+    assert !@params.suppress_non_speech_tokens
+  end
+
+  def test_token_timestamps
+    @params.token_timestamps = true
+    assert @params.token_timestamps
+    @params.token_timestamps = false
+    assert !@params.token_timestamps
+  end
+
+  def test_split_on_word
+    @params.split_on_word = true
+    assert @params.split_on_word
+    @params.split_on_word = false
+    assert !@params.split_on_word
+  end
+
+  def test_speed_up
+    @params.speed_up = true
+    assert @params.speed_up
+    @params.speed_up = false
+    assert !@params.speed_up
+  end
+
  def test_whisper
-    @whisper = Whisper::Context.new("base.en")
+    @whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
    params  = Whisper::Params.new
    params.print_timestamps = false

-    @whisper.transcribe(AUDIO, params) {|text|
+    jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
+    @whisper.transcribe(jfk, params) {|text|
      assert_match /ask not what your country can do for you, ask what you can do for your country/, text
    }
  end

-  sub_test_case "After transcription" do
-    def test_full_n_segments
-      assert_equal 1, whisper.full_n_segments
-    end
-
-    def test_full_lang_id
-      assert_equal 0, whisper.full_lang_id
-    end
-
-    def test_full_get_segment_t0
-      assert_equal 0, whisper.full_get_segment_t0(0)
-      assert_raise IndexError do
-        whisper.full_get_segment_t0(whisper.full_n_segments)
-      end
-      assert_raise IndexError do
-        whisper.full_get_segment_t0(-1)
-      end
-    end
-
-    def test_full_get_segment_t1
-      t1 = whisper.full_get_segment_t1(0)
-      assert_kind_of Integer, t1
-      assert t1 > 0
-      assert_raise IndexError do
-        whisper.full_get_segment_t1(whisper.full_n_segments)
-      end
-    end
-
-    def test_full_get_segment_speaker_turn_next
-      assert_false whisper.full_get_segment_speaker_turn_next(0)
-    end
-
-    def test_full_get_segment_text
-      assert_match /ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0)
-    end
-
-    def test_full_get_segment_no_speech_prob
-      prob = whisper.full_get_segment_no_speech_prob(0)
-      assert prob > 0.0
-      assert prob < 1.0
-    end
-  end
-
-  def test_lang_max_id
-    assert_kind_of Integer, Whisper.lang_max_id
-  end
-
-  def test_lang_id
-    assert_equal 0, Whisper.lang_id("en")
-    assert_raise ArgumentError do
-      Whisper.lang_id("non existing language")
-    end
-  end
-
-  def test_lang_str
-    assert_equal "en", Whisper.lang_str(0)
-    assert_raise IndexError do
-      Whisper.lang_str(Whisper.lang_max_id + 1)
-    end
-  end
-
-  def test_lang_str_full
-    assert_equal "english", Whisper.lang_str_full(0)
-    assert_raise IndexError do
-      Whisper.lang_str_full(Whisper.lang_max_id + 1)
-    end
-  end
-
-  def test_log_set
-    user_data = Object.new
-    logs = []
-    log_callback = ->(level, buffer, udata) {
-      logs << [level, buffer, udata]
-    }
-    Whisper.log_set log_callback, user_data
-    Whisper::Context.new("base.en")
-
-    assert logs.length > 30
-    logs.each do |log|
-      assert_include [Whisper::LOG_LEVEL_DEBUG, Whisper::LOG_LEVEL_INFO, Whisper::LOG_LEVEL_WARN], log[0]
-      assert_same user_data, log[2]
-    end
-  end
-
-  def test_log_suppress
-    stderr = $stderr
-    Whisper.log_set ->(level, buffer, user_data) {
-      # do nothing
-    }, nil
-    dev = StringIO.new("")
-    $stderr = dev
-    Whisper::Context.new("base.en")
-    assert_empty dev.string
-  ensure
-    $stderr = stderr
-  end
-
-  sub_test_case "full" do
-    def setup
-      super
-      @whisper = Whisper::Context.new("base.en")
-      @samples = File.read(AUDIO, nil, 78).unpack("s<*").collect {|i| i.to_f / 2**15}
-    end
-
-    def test_full
-      @whisper.full(@params, @samples, @samples.length)
-
-      assert_equal 1, @whisper.full_n_segments
-      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
-    end
-
-    def test_full_without_length
-      @whisper.full(@params, @samples)
-
-      assert_equal 1, @whisper.full_n_segments
-      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
-    end
-
-    def test_full_enumerator
-      samples = @samples.each
-      @whisper.full(@params, samples, @samples.length)
-
-      assert_equal 1, @whisper.full_n_segments
-      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
-    end
-
-    def test_full_enumerator_without_length
-      samples = @samples.each
-      assert_raise ArgumentError do
-        @whisper.full(@params, samples)
-      end
-    end
-
-    def test_full_enumerator_with_too_large_length
-      samples = @samples.each.take(10).to_enum
-      assert_raise StopIteration do
-        @whisper.full(@params, samples, 11)
-      end
-    end
-
-    def test_full_with_memory_view
-      samples = JFKReader.new(AUDIO)
-      @whisper.full(@params, samples)
-
-      assert_equal 1, @whisper.full_n_segments
-      assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
-    end
-
-    def test_full_parallel
-      @whisper.full_parallel(@params, @samples, @samples.length, Etc.nprocessors)
-
-      assert_equal Etc.nprocessors, @whisper.full_n_segments
-      text = @whisper.each_segment.collect(&:text).join
-      assert_match /ask what you can do/i, text
-      assert_match /for your country/i, text
-    end
-
-    def test_full_parallel_with_memory_view
-      samples = JFKReader.new(AUDIO)
-      @whisper.full_parallel(@params, samples, nil, Etc.nprocessors)
-
-      assert_equal Etc.nprocessors, @whisper.full_n_segments
-      text = @whisper.each_segment.collect(&:text).join
-      assert_match /ask what you can do/i, text
-      assert_match /for your country/i, text
-    end
-
-    def test_full_parallel_without_length_and_n_processors
-      @whisper.full_parallel(@params, @samples)
-
-      assert_equal 1, @whisper.full_n_segments
-      text = @whisper.each_segment.collect(&:text).join
-      assert_match /ask what you can do/i, text
-      assert_match /for your country/i, text
-    end
-
-    def test_full_parallel_without_length
-      @whisper.full_parallel(@params, @samples, nil, Etc.nprocessors)
-
-      assert_equal Etc.nprocessors, @whisper.full_n_segments
-      text = @whisper.each_segment.collect(&:text).join
-      assert_match /ask what you can do/i, text
-      assert_match /for your country/i, text
-    end
-
-    def test_full_parallel_without_n_processors
-      @whisper.full_parallel(@params, @samples, @samples.length)
-
-      assert_equal 1, @whisper.full_n_segments
-      text = @whisper.each_segment.collect(&:text).join
-      assert_match /ask what you can do/i, text
-      assert_match /for your country/i, text
-    end
-  end
 end
--- a/bindings/ruby/whispercpp.gemspec
+++ b/bindings/ruby/whispercpp.gemspec
@ -1,36 +0,0 @@
-require_relative "extsources"
-
-Gem::Specification.new do |s|
-  s.name    = "whispercpp"
-  s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
-  s.version = '1.3.1'
-  s.date    = '2024-12-19'
-  s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
-  s.email   = 'todd.fisher@gmail.com'
-  s.extra_rdoc_files = ['LICENSE', 'README.md']
-
-  s.files = `git ls-files . -z`.split("\x0") +
-              EXTSOURCES.collect {|file|
-                basename = File.basename(file)
-                if s.extra_rdoc_files.include?(basename)
-                  basename
-                else
-                  file.sub("../..", "ext")
-                end
-              }
-
-  s.summary = %q{Ruby whisper.cpp bindings}
-  s.test_files = s.files.select {|file| file.start_with? "tests/"}
-
-  s.extensions << 'ext/extconf.rb'
-  s.required_ruby_version = '>= 3.1.0'
-
-  #### Documentation and testing.
-  s.homepage = 'https://github.com/ggerganov/whisper.cpp'
-  s.rdoc_options = ['--main', 'README.md']
-
-
-    s.platform = Gem::Platform::RUBY
-
-  s.licenses = ['MIT']
-end
--- a/cmake/BuildTypes.cmake
+++ b/cmake/BuildTypes.cmake
@ -0,0 +1,54 @@
+# Add new build types
+
+# ReleaseGG - Release with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELEASEGG
+    "-O3"
+    CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_C_FLAGS_RELEASEGG
+    "-O3"
+    CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+    ""
+    CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
+    ""
+    CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
+    FORCE )
+MARK_AS_ADVANCED(
+    CMAKE_CXX_FLAGS_RELEASEGG
+    CMAKE_C_FLAGS_RELEASEGG
+    CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+    CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
+
+# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+    "-O2 -g"
+    CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
+    "-O2 -g"
+    CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+    ""
+    CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
+    ""
+    CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
+    FORCE )
+MARK_AS_ADVANCED(
+    CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+    CMAKE_C_FLAGS_RELWITHDEBINFOGG
+    CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+    CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
+
+if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
+endif()
--- a/Show More
+++ b/Show More