mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-24 17:15:19 +00:00
Compare commits
1 Commits
v1.7.3-pre
...
chess
Author | SHA1 | Date | |
---|---|---|---|
59c997ca2d |
@ -1,28 +0,0 @@
|
||||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=11.7.1
|
||||
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git cmake libsdl2-dev wget
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Set nvcc architecture
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
# Enable cuBLAS
|
||||
ENV GGML_CUDA=1
|
||||
|
||||
RUN make base.en
|
||||
|
||||
ENTRYPOINT ["/app/main"]
|
@ -1,40 +0,0 @@
|
||||
ARG UBUNTU_VERSION=22.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=12.3.1
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
# Target the CUDA runtime image
|
||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||
WORKDIR /app
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
# Set nvcc architecture
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
# Enable cuBLAS
|
||||
ENV GGML_CUDA=1
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential libsdl2-dev wget cmake \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
# Ref: https://stackoverflow.com/a/53464012
|
||||
ENV CUDA_MAIN_VERSION=12.3
|
||||
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||
|
||||
COPY .. .
|
||||
RUN make base.en
|
||||
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
||||
ENV CUDA_MAIN_VERSION=12.3
|
||||
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg wget cmake \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
ENTRYPOINT [ "bash", "-c" ]
|
@ -1,19 +0,0 @@
|
||||
FROM ubuntu:22.04 AS build
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential wget cmake \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY .. .
|
||||
RUN make base.en
|
||||
|
||||
FROM ubuntu:22.04 AS runtime
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg libsdl2-dev wget cmake \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
ENTRYPOINT [ "bash", "-c" ]
|
55
.github/workflows/bindings-ruby.yml
vendored
55
.github/workflows/bindings-ruby.yml
vendored
@ -1,55 +0,0 @@
|
||||
name: Bindings Tests (Ruby)
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- bindings/ruby/**
|
||||
- src/**/*.c
|
||||
- src/**/*.cpp
|
||||
- src/**/*.h
|
||||
- src/**/*.m
|
||||
- src/**/*.metal
|
||||
- include/**/*.c
|
||||
- include/**/*.cpp
|
||||
- include/**/*.h
|
||||
- include/**/*.m
|
||||
- include/**/*.metal
|
||||
- ggml/**/*.c
|
||||
- ggml/**/*.cpp
|
||||
- ggml/**/*.h
|
||||
- ggml/**/*.m
|
||||
- ggml/**/*.metal
|
||||
- scripts/get-flags.mk
|
||||
- examples/dr_wav.h
|
||||
pull_request:
|
||||
paths:
|
||||
- bindings/ruby/**
|
||||
- src/**/*.c
|
||||
- src/**/*.cpp
|
||||
- src/**/*.h
|
||||
- src/**/*.m
|
||||
- src/**/*.metal
|
||||
- include/**/*.c
|
||||
- include/**/*.cpp
|
||||
- include/**/*.h
|
||||
- include/**/*.m
|
||||
- include/**/*.metal
|
||||
- ggml/**/*.c
|
||||
- ggml/**/*.cpp
|
||||
- ggml/**/*.h
|
||||
- ggml/**/*.m
|
||||
- ggml/**/*.metal
|
||||
- scripts/get-flags.mk
|
||||
- examples/dr_wav.h
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bindings/ruby
|
||||
steps:
|
||||
- uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: '3.1'
|
||||
- uses: actions/checkout@v4
|
||||
- run: rake test
|
@ -1,4 +1,4 @@
|
||||
name: Bindings Tests (Go)
|
||||
name: Bindings Tests
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
@ -13,10 +13,10 @@ jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
- uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: '^1.23'
|
||||
- uses: actions/checkout@v4
|
||||
go-version: '^1.19'
|
||||
- uses: actions/checkout@v1
|
||||
- run: |
|
||||
cd bindings/go
|
||||
make test
|
584
.github/workflows/build.yml
vendored
584
.github/workflows/build.yml
vendored
@ -1,301 +1,118 @@
|
||||
name: CI
|
||||
on: [push, pull_request]
|
||||
|
||||
env:
|
||||
ubuntu_image: "ubuntu:22.04"
|
||||
VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build ${{ matrix.arch }}
|
||||
- name: Dependencies
|
||||
run: |
|
||||
docker run --platform ${{ matrix.arch }} --rm \
|
||||
-v ${{ github.workspace }}:/workspace \
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential libsdl2-dev cmake
|
||||
cmake -B build
|
||||
cmake --build build --config Release -j $(nproc)'
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install libsdl2-dev
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make
|
||||
make stream
|
||||
|
||||
macOS-latest:
|
||||
runs-on: macOS-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew update
|
||||
brew install sdl2 cmake
|
||||
brew install sdl2
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
|
||||
# freeBSD-latest:
|
||||
# runs-on: macos-12
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Build
|
||||
# uses: cross-platform-actions/action@v0.24.0
|
||||
# with:
|
||||
# operating_system: freebsd
|
||||
# version: '13.3'
|
||||
# run: |
|
||||
# sudo pkg update
|
||||
# sudo pkg install -y gmake sdl2 cmake
|
||||
# cmake -B build
|
||||
# cmake --build build --config Release
|
||||
make
|
||||
make stream
|
||||
|
||||
ubuntu-latest-gcc:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build: [Debug, Release]
|
||||
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build ${{ matrix.arch }}
|
||||
- name: Dependencies
|
||||
run: |
|
||||
docker run --platform ${{ matrix.arch }} --rm \
|
||||
-v ${{ github.workspace }}:/workspace \
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential cmake libsdl2-dev
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install cmake
|
||||
sudo apt-get install libsdl2-dev
|
||||
|
||||
- name: Configure
|
||||
run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make
|
||||
ctest -L gh --output-on-failure
|
||||
|
||||
ubuntu-latest-clang:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build: [Debug, Release]
|
||||
#arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||
# TODO: arm/v7 disabled due to clang bug
|
||||
# https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
|
||||
arch: [linux/amd64, linux/arm64, linux/ppc64le]
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build ${{ matrix.arch }}
|
||||
- name: Dependencies
|
||||
run: |
|
||||
docker run --platform ${{ matrix.arch }} --rm \
|
||||
-v ${{ github.workspace }}:/workspace \
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y clang build-essential cmake libsdl2-dev
|
||||
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install cmake
|
||||
sudo apt-get install libsdl2-dev
|
||||
|
||||
- name: Configure
|
||||
run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make
|
||||
ctest -L gh --output-on-failure
|
||||
|
||||
ubuntu-latest-gcc-sanitized:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
||||
arch: [linux/amd64]
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Build ${{ matrix.arch }}
|
||||
- name: Dependencies
|
||||
run: |
|
||||
docker run --platform ${{ matrix.arch }} --rm \
|
||||
-v ${{ github.workspace }}:/workspace \
|
||||
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||
set -e
|
||||
apt update
|
||||
apt install -y build-essential cmake
|
||||
cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
|
||||
make
|
||||
ctest -L gh --output-on-failure'
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install cmake
|
||||
|
||||
ubuntu-22-cmake-sycl:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
dwhisper_sycl: [ON]
|
||||
dcmake_c_compiler: [icx]
|
||||
dcmake_cxx_compiler: [icpx]
|
||||
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: add oneAPI to apt
|
||||
shell: bash
|
||||
run: |
|
||||
cd /tmp
|
||||
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||
|
||||
- name: install oneAPI dpcpp compiler
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
||||
|
||||
- name: install oneAPI MKL library
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt install intel-oneapi-mkl-devel
|
||||
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Configure
|
||||
run: cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||
cmake --build . --config Release -j $(nproc)
|
||||
|
||||
ubuntu-22-cmake-sycl-fp16:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
dwhisper_sycl: [ON]
|
||||
dcmake_c_compiler: [icx]
|
||||
dcmake_cxx_compiler: [icpx]
|
||||
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: add oneAPI to apt
|
||||
shell: bash
|
||||
run: |
|
||||
cd /tmp
|
||||
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||
|
||||
- name: install oneAPI dpcpp compiler
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
||||
|
||||
- name: install oneAPI MKL library
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt install intel-oneapi-mkl-devel
|
||||
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||
cmake --build . --config Release -j $(nproc)
|
||||
|
||||
windows-msys2:
|
||||
runs-on: windows-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
||||
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup ${{ matrix.sys }}
|
||||
uses: msys2/setup-msys2@v2
|
||||
with:
|
||||
update: true
|
||||
msystem: ${{matrix.sys}}
|
||||
install: >-
|
||||
base-devel
|
||||
mingw-w64-${{matrix.env}}-toolchain
|
||||
mingw-w64-${{matrix.env}}-cmake
|
||||
mingw-w64-${{matrix.env}}-SDL2
|
||||
mingw-w64-${{matrix.env}}-openblas
|
||||
|
||||
- name: Build using CMake
|
||||
shell: msys2 {0}
|
||||
run: |
|
||||
cmake -B build
|
||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||
|
||||
- name: Clean after building using CMake
|
||||
shell: msys2 {0}
|
||||
run: |
|
||||
rm -rf build
|
||||
|
||||
- name: Build using CMake w/ OpenBLAS
|
||||
shell: msys2 {0}
|
||||
run: |
|
||||
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||
make
|
||||
ctest -L gh --output-on-failure
|
||||
|
||||
windows:
|
||||
runs-on: windows-latest
|
||||
@ -308,19 +125,17 @@ jobs:
|
||||
include:
|
||||
- arch: Win32
|
||||
s2arc: x86
|
||||
jnaPath: win32-x86
|
||||
- arch: x64
|
||||
s2arc: x64
|
||||
jnaPath: win32-x86-64
|
||||
- sdl2: ON
|
||||
s2ver: 2.28.5
|
||||
s2ver: 2.26.0
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v2
|
||||
uses: microsoft/setup-msbuild@v1
|
||||
|
||||
- name: Fetch SDL2 and set SDL2_DIR
|
||||
if: matrix.sdl2 == 'ON'
|
||||
@ -333,7 +148,7 @@ jobs:
|
||||
run: >
|
||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
@ -344,15 +159,9 @@ jobs:
|
||||
if: matrix.sdl2 == 'ON'
|
||||
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
||||
|
||||
- name: Upload dll
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.jnaPath }}_whisper.dll
|
||||
path: build/bin/${{ matrix.build }}/whisper.dll
|
||||
|
||||
- name: Upload binaries
|
||||
if: matrix.sdl2 == 'ON'
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: whisper-bin-${{ matrix.arch }}
|
||||
path: build/bin/${{ matrix.build }}
|
||||
@ -368,31 +177,29 @@ jobs:
|
||||
sdl2: [ON]
|
||||
include:
|
||||
- arch: Win32
|
||||
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
|
||||
s2arc: x86
|
||||
- arch: x64
|
||||
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
|
||||
s2arc: x64
|
||||
- sdl2: ON
|
||||
s2ver: 2.28.5
|
||||
s2ver: 2.26.0
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Export GitHub Actions cache environment variables
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
|
||||
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v2
|
||||
uses: microsoft/setup-msbuild@v1
|
||||
|
||||
- name: Install OpenBLAS and pkgconfiglite
|
||||
- name: Fetch OpenBLAS
|
||||
if: matrix.blas == 'ON'
|
||||
run: |
|
||||
vcpkg install --triplet=${{ matrix.s2arc }}-windows openblas
|
||||
choco install pkgconfiglite
|
||||
C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
|
||||
7z x blas.zip -oblas -y
|
||||
copy blas/include/cblas.h .
|
||||
copy blas/include/openblas_config.h .
|
||||
echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
||||
|
||||
- name: Fetch SDL2 and set SDL2_DIR
|
||||
if: matrix.sdl2 == 'ON'
|
||||
@ -404,20 +211,19 @@ jobs:
|
||||
- name: Configure
|
||||
run: >
|
||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||
-DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
-DGGML_BLAS=${{ matrix.blas }}
|
||||
-DGGML_BLAS_VENDOR=OpenBLAS
|
||||
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||
-DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
|
||||
-DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
|
||||
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
cd ./build
|
||||
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
||||
|
||||
- name: Copy openblas.dll
|
||||
- name: Copy libopenblas.dll
|
||||
if: matrix.blas == 'ON'
|
||||
run: copy "C:/vcpkg/packages/openblas_${{ matrix.s2arc }}-windows/bin/openblas.dll" build/bin/${{ matrix.build }}
|
||||
run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
||||
|
||||
- name: Copy SDL2.dll
|
||||
if: matrix.sdl2 == 'ON'
|
||||
@ -425,78 +231,11 @@ jobs:
|
||||
|
||||
- name: Upload binaries
|
||||
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: whisper-blas-bin-${{ matrix.arch }}
|
||||
path: build/bin/${{ matrix.build }}
|
||||
|
||||
# TODO: fix and re-enable
|
||||
# windows-cublas:
|
||||
# runs-on: windows-2019
|
||||
#
|
||||
# strategy:
|
||||
# matrix:
|
||||
# build: [Release]
|
||||
# arch: [x64]
|
||||
# cublas: [ON]
|
||||
# sdl2: [ON]
|
||||
# cuda-toolkit: [12.2.0, 11.8.0]
|
||||
# include:
|
||||
# - arch: x64
|
||||
# s2arc: x64
|
||||
# - sdl2: ON
|
||||
# s2ver: 2.28.5
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Add msbuild to PATH
|
||||
# uses: microsoft/setup-msbuild@v2
|
||||
#
|
||||
# - name: Install CUDA Toolkit
|
||||
# id: cuda-toolkit
|
||||
# uses: Jimver/cuda-toolkit@v0.2.15
|
||||
# with:
|
||||
# cuda: '${{ matrix.cuda-toolkit }}'
|
||||
#
|
||||
# - name: Fetch SDL2 and set SDL2_DIR
|
||||
# if: matrix.sdl2 == 'ON'
|
||||
# run: |
|
||||
# C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
|
||||
# 7z x sdl2.zip
|
||||
# echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
|
||||
#
|
||||
# - name: Configure
|
||||
# run: >
|
||||
# cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||
# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
# -DGGML_CUDA=${{ matrix.cublas }}
|
||||
# -DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||
#
|
||||
# - name: Build ${{ matrix.cuda-toolkit }}
|
||||
# run: |
|
||||
# cd ./build
|
||||
# cmake --build . --config ${{ matrix.build }}
|
||||
#
|
||||
# - name: Copy CUDA DLLs
|
||||
# run: >
|
||||
# Copy-Item -PassThru
|
||||
# -Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
|
||||
# -Include cudart64_*,cublas64_*,cublasLt64_*
|
||||
# -Destination build/bin/${{ matrix.build }}
|
||||
#
|
||||
# - name: Copy SDL2.dll
|
||||
# if: matrix.sdl2 == 'ON'
|
||||
# run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
||||
#
|
||||
# - name: Upload binaries
|
||||
# if: matrix.sdl2 == 'ON'
|
||||
# uses: actions/upload-artifact@v4
|
||||
# with:
|
||||
# name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
|
||||
# path: build/bin/${{ matrix.build }}
|
||||
|
||||
emscripten:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@ -506,174 +245,23 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Setup emsdk
|
||||
uses: mymindstorm/setup-emsdk@v14
|
||||
|
||||
- name: Verify
|
||||
run: emcc -v
|
||||
|
||||
- name: Build
|
||||
- name: Dependencies
|
||||
run: |
|
||||
emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
make
|
||||
|
||||
ios-xcode-build:
|
||||
runs-on: macos-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
build: [Release]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
|
||||
tar -xvf master.tar.gz
|
||||
emsdk-master/emsdk update
|
||||
emsdk-master/emsdk install latest
|
||||
emsdk-master/emsdk activate latest
|
||||
|
||||
- name: Configure
|
||||
run: |
|
||||
cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
|
||||
mkdir models/ggml-base.en-encoder.mlmodelc
|
||||
run: echo "tmp"
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
sysctl -a
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G Xcode .. \
|
||||
-DGGML_METAL_USE_BF16=ON \
|
||||
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||
-DWHISPER_BUILD_EXAMPLES=OFF \
|
||||
-DWHISPER_BUILD_TESTS=OFF \
|
||||
-DWHISPER_BUILD_SERVER=OFF \
|
||||
-DCMAKE_SYSTEM_NAME=iOS \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||
sudo cmake --install . --config Release
|
||||
|
||||
- name: xcodebuild for swift package
|
||||
id: xcodebuild
|
||||
run: |
|
||||
xcodebuild -scheme whisper-Package -destination 'generic/platform=iOS'
|
||||
|
||||
#- name: Build objc example
|
||||
# run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos build
|
||||
|
||||
- name: Build swiftui example
|
||||
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
||||
|
||||
# TODO: update android build and re-enable when it works
|
||||
# android:
|
||||
# runs-on: ubuntu-latest
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# path: whisper
|
||||
#
|
||||
# - name: Install Java
|
||||
# uses: actions/setup-java@v4
|
||||
# with:
|
||||
# distribution: zulu
|
||||
# java-version: 21
|
||||
#
|
||||
# - name: Setup Android SDK
|
||||
# uses: android-actions/setup-android@v3
|
||||
#
|
||||
# - name: Build
|
||||
# run: |
|
||||
# cd whisper/examples/whisper.android
|
||||
# ./gradlew assembleRelease --no-daemon
|
||||
#
|
||||
# - name: Build with external ggml
|
||||
# run: |
|
||||
# export PATH_TO_GGML=$PWD/ggml
|
||||
# cd whisper/examples/whisper.android
|
||||
# ./gradlew assembleRelease --no-daemon
|
||||
|
||||
# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
|
||||
# android_java:
|
||||
# runs-on: ubuntu-latest
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: set up JDK 11
|
||||
# uses: actions/setup-java@v4
|
||||
# with:
|
||||
# java-version: '11'
|
||||
# distribution: 'temurin'
|
||||
# cache: gradle
|
||||
#
|
||||
# - name: Setup Android SDK
|
||||
# uses: android-actions/setup-android@v3
|
||||
# with:
|
||||
# cmdline-tools-version: 9.0
|
||||
#
|
||||
# - name: Build
|
||||
# run: |
|
||||
# cd examples/whisper.android.java
|
||||
# chmod +x ./gradlew
|
||||
# ./gradlew assembleRelease
|
||||
|
||||
# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
|
||||
# java:
|
||||
# needs: [ 'windows' ]
|
||||
# runs-on: windows-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Install Java
|
||||
# uses: actions/setup-java@v4
|
||||
# with:
|
||||
# distribution: zulu
|
||||
# java-version: 20
|
||||
#
|
||||
# - name: Download Windows lib
|
||||
# uses: actions/download-artifact@v4
|
||||
# with:
|
||||
# name: win32-x86-64_whisper.dll
|
||||
# path: bindings/java/build/generated/resources/main/win32-x86-64
|
||||
#
|
||||
# - name: Build
|
||||
# run: |
|
||||
# models\download-ggml-model.cmd tiny.en
|
||||
# cd bindings/java
|
||||
# chmod +x ./gradlew
|
||||
# ./gradlew build
|
||||
#
|
||||
# - name: Upload jar
|
||||
# uses: actions/upload-artifact@v4
|
||||
# with:
|
||||
# name: whispercpp.jar
|
||||
# path: bindings/java/build/libs/whispercpp-*.jar
|
||||
#
|
||||
# - name: Publish package
|
||||
# if: ${{ github.ref == 'refs/heads/master' }}
|
||||
# uses: gradle/gradle-build-action@v2.4.2
|
||||
# with:
|
||||
# arguments: publish
|
||||
# build-root-directory: bindings/java
|
||||
# env:
|
||||
# MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
|
||||
# MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
|
||||
# PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||
|
||||
quantize:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Test quantize
|
||||
run: |
|
||||
./models/download-ggml-model.sh tiny.en
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
|
||||
pushd emsdk-master
|
||||
source ./emsdk_env.sh
|
||||
popd
|
||||
emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||
make
|
||||
|
59
.github/workflows/docker.yml
vendored
59
.github/workflows/docker.yml
vendored
@ -1,59 +0,0 @@
|
||||
name: Publish Docker image
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
push_to_registry:
|
||||
name: Push Docker image to Docker Hub
|
||||
if: github.event.pull_request.draft == false
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
|
||||
#TODO: the cuda image keeps failing - disable for now
|
||||
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
|
||||
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
||||
|
||||
steps:
|
||||
- name: Check out the repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push Docker image (versioned)
|
||||
if: github.event_name == 'push'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
platforms: ${{ matrix.config.platform }}
|
||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
||||
file: ${{ matrix.config.dockerfile }}
|
||||
|
||||
- name: Build and push Docker image (tagged)
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name == 'push' }}
|
||||
platforms: ${{ matrix.config.platform }}
|
||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
|
||||
file: ${{ matrix.config.dockerfile }}
|
2
.github/workflows/examples.yml
vendored
2
.github/workflows/examples.yml
vendored
@ -37,7 +37,7 @@ jobs:
|
||||
run: npm install
|
||||
|
||||
- name: Compile addon.node
|
||||
run: npx cmake-js compile -T addon.node -B Release
|
||||
run: npx cmake-js compile -T whisper-addon -B Release
|
||||
|
||||
- name: Download test model
|
||||
run: |
|
||||
|
39
.gitignore
vendored
39
.gitignore
vendored
@ -1,37 +1,23 @@
|
||||
*.o
|
||||
*.a
|
||||
*.d
|
||||
.cache/
|
||||
.coreml/
|
||||
.test/
|
||||
.venv/
|
||||
.vs/
|
||||
.vscode/
|
||||
.DS_Store
|
||||
.vimspector.json
|
||||
/CMakeSettings.json
|
||||
/talk-llama.dSYM/
|
||||
|
||||
build/
|
||||
build-*/
|
||||
|
||||
# SPM
|
||||
.build/
|
||||
.swiftpm
|
||||
*.metallib
|
||||
|
||||
ggml-metal-embed.metal
|
||||
ggml-metal-embed.metal.tmp
|
||||
build-em/
|
||||
build-debug/
|
||||
build-release/
|
||||
build-static/
|
||||
build-sanitize-addr/
|
||||
build-sanitize-thread/
|
||||
|
||||
/main
|
||||
/stream
|
||||
/command
|
||||
/talk
|
||||
/talk-llama
|
||||
/bench
|
||||
/quantize
|
||||
/server
|
||||
/lsp
|
||||
|
||||
arm_neon.h
|
||||
sync.sh
|
||||
@ -45,16 +31,3 @@ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
|
||||
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
|
||||
|
||||
extra/bench-gg.txt
|
||||
|
||||
models/*.mlmodel
|
||||
models/*.mlmodelc
|
||||
models/*.mlpackage
|
||||
bindings/java/.gradle/
|
||||
bindings/java/.idea/
|
||||
.idea/
|
||||
|
||||
benchmark_results.csv
|
||||
cmake-build-debug/
|
||||
.cxx/
|
||||
.gradle/
|
||||
local.properties
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -0,0 +1,3 @@
|
||||
[submodule "bindings/ios"]
|
||||
path = bindings/ios
|
||||
url = https://github.com/ggerganov/whisper.spm
|
||||
|
301
AUTHORS
301
AUTHORS
@ -1,301 +0,0 @@
|
||||
# date: Tue Apr 9 20:27:03 EEST 2024
|
||||
# this file is auto-generated by scripts/gen-authors.sh
|
||||
|
||||
0/0 <zero@imaskeleton.me>
|
||||
0cc4m <picard12@live.de>
|
||||
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
|
||||
AT <manyoso@users.noreply.github.com>
|
||||
Aarni Koskela <akx@iki.fi>
|
||||
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
|
||||
Aaron Taylor <aaron@exphat.com>
|
||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
|
||||
AfryMask <AfryMask@163.com>
|
||||
Ahmad Bilal <ahmad.bilal@empglabs.com>
|
||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||
Akash Mahajan <akash7190@gmail.com>
|
||||
Akash Mahajan <akashmjn@stanford.edu>
|
||||
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
|
||||
Alan <unknown>
|
||||
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
|
||||
Alex Azarov <alex@azarov.by>
|
||||
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
|
||||
Alex Evgrashin <aevgrashin@yandex.ru>
|
||||
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
|
||||
Alexandru Mariuti <alex@mariuti.com>
|
||||
Alexey Kharlamov <alexey@kharlamov.biz>
|
||||
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
|
||||
Ali Alameh <ali.alameh@isae.edu.lb>
|
||||
Ananta Bastola <anantarajbastola@gmail.com>
|
||||
Andreu Huguet <andreuhuguet@gmail.com>
|
||||
Andrew Huynh <a5thuynh@gmail.com>
|
||||
Andrew S <andrews54757@gmail.com>
|
||||
Andy Maloney <asmaloney@gmail.com>
|
||||
Anton Kostin <masguit42@users.noreply.github.com>
|
||||
Artyom Mezin <psycho.fading@gmail.com>
|
||||
Asad Memon <asad.lionpk@gmail.com>
|
||||
Ashraful Islam <ashraful.meche@gmail.com>
|
||||
AsukaMinato <asukaminato@nyan.eu.org>
|
||||
AustinMroz <austinmroz@utexas.edu>
|
||||
Avik Sengupta <avik@sengupta.net>
|
||||
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
|
||||
Baffin Lee <baffinlee@gmail.com>
|
||||
Ben Nortier <bjnortier@gmail.com>
|
||||
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
|
||||
Bo-Yi Wu <appleboy.tw@gmail.com>
|
||||
Boris Bliznioukov <blib@mail.com>
|
||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||
Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
|
||||
Brian Murray <brian@bmurray.ca>
|
||||
CRD716 <crd716@gmail.com>
|
||||
Canis Lupus <Canis-UK@users.noreply.github.com>
|
||||
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||
ChangSeok Oh <shivamidow@users.noreply.github.com>
|
||||
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
|
||||
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
|
||||
Chidi Williams <williamschidi1@gmail.com>
|
||||
Christian <12550267+iceychris@users.noreply.github.com>
|
||||
Clifford Heath <clifford.heath@gmail.com>
|
||||
Colin <github@whoisc.cc>
|
||||
DGdev91 <DGdev91@users.noreply.github.com>
|
||||
Damian Czaja <trojan295@protonmail.com>
|
||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||
David <dnhkng@gmail.com>
|
||||
David Thorpe <djt@mutablelogic.com>
|
||||
Davidson Francis <davidsondfgl@gmail.com>
|
||||
Dener Stassun <denerstassun@gmail.com>
|
||||
Didzis Gosko <didzis@users.noreply.github.com>
|
||||
Digipom <admin@digipom.com>
|
||||
Dimo <dimo@ieee.org>
|
||||
Dody Suria Wijaya <dodysw@gmail.com>
|
||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||
Duncan McConnell <ddmcconnell4@gmail.com>
|
||||
Egor Egorov <me@egorfine.com>
|
||||
Elkana Bardugo <ttv200@gmail.com>
|
||||
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
|
||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||
Eric Swanson <eswanson@alloscomp.com>
|
||||
Eric Tendian <erictendian@gmail.com>
|
||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||
Evan Jones <evan.q.jones@gmail.com>
|
||||
Evan Martin <evan.martin@gmail.com>
|
||||
Eve <139727413+netrunnereve@users.noreply.github.com>
|
||||
Evgeny Kuznetsov <evgeny@kuznetsov.md>
|
||||
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
|
||||
Fangjun Kuang <csukuangfj@gmail.com>
|
||||
Felix <stenbackfelix@gmail.com>
|
||||
Finn Voorhees <finnvoorhees@gmail.com>
|
||||
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
|
||||
Gang Chen <goncha@gmail.com>
|
||||
Gavin Cai <gavin1818@hotmail.com>
|
||||
George Hindle <george@georgehindle.com>
|
||||
Georgi Gerganov <ggerganov@gmail.com>
|
||||
GitAritron <103900385+GitAritron@users.noreply.github.com>
|
||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||
Gleicon Moraes <gleicon@gmail.com>
|
||||
Gregor Jasny <gjasny@googlemail.com>
|
||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
|
||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||
Hang <bebound@gmail.com>
|
||||
Herman Semenov <GermanAizek@yandex.ru>
|
||||
Hrishikesh Barman <geekodour@users.noreply.github.com>
|
||||
Ian Bicking <ian@ianbicking.org>
|
||||
Ian Bull <irbull@eclipsesource.com>
|
||||
Ikko Ashimine <eltociear@gmail.com>
|
||||
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
|
||||
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
|
||||
Ivan Gorin <ivangorin21@gmail.com>
|
||||
JJ <103335846+computerscienceiscool@users.noreply.github.com>
|
||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||
JacobLinCool <jacoblincool@gmail.com>
|
||||
Jakub Ráček <blizzcz@gmail.com>
|
||||
Jared Van Bortel <jared@nomic.ai>
|
||||
Jay Binks <jaybinks@gmail.com>
|
||||
Jhen-Jie Hong <developer@jhen.me>
|
||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||
JidongZhang-THU <1119708529@qq.com>
|
||||
Jo Liss <joliss42@gmail.com>
|
||||
Johan <jr.raffin@gmail.com>
|
||||
Johannes Gäßler <johannesg@5d6.de>
|
||||
John Balis <phobossystems@gmail.com>
|
||||
Jonathan Soo <jcsoo@agora.com>
|
||||
Jonno <1160532+razodactyl@users.noreply.github.com>
|
||||
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
|
||||
Jose <34888496+Jerry-Master@users.noreply.github.com>
|
||||
Josh Bleecher Snyder <josharian@gmail.com>
|
||||
Judd <foldl@users.noreply.github.com>
|
||||
Jumper775 <78500318+jumpers775@users.noreply.github.com>
|
||||
Justine Tunney <jtunney@gmail.com>
|
||||
KP Kaiser <kirk@zothcorp.com>
|
||||
Kamilake <exjang0@gmail.com>
|
||||
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
|
||||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||
Kevin Brothaler <admin@digipom.com>
|
||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||
Kreijstal <rainb@tfwno.gf>
|
||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||
LBlue <153975653+lbluep@users.noreply.github.com>
|
||||
Larry Battle <larry.battle.tech@gmail.com>
|
||||
Laytan Laats <laytanlaats@hotmail.com>
|
||||
Leo Moll <leo.moll@yeasoft.com>
|
||||
Lexevolution <31176843+Lexevolution@users.noreply.github.com>
|
||||
LittleLoli <26589867+WhichWho@users.noreply.github.com>
|
||||
Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
|
||||
Luis Herrera <herrera-luis@users.noreply.github.com>
|
||||
Lukas Rist <glaslos@gmail.com>
|
||||
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
|
||||
M. Eren Akbiyik <erenakbiyik@gmail.com>
|
||||
Maciek <maciek.mab122@gmail.com>
|
||||
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
|
||||
Martin Warnaar <martinwarnaar@gmail.com>
|
||||
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
|
||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||
Matija Pevec <mightymatth@users.noreply.github.com>
|
||||
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
|
||||
Meng, Hengyu <hengyu.meng@intel.com>
|
||||
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||
Michael Rienstra <mrienstra@gmail.com>
|
||||
Mikhail Grigorev <sleuthhound@gmail.com>
|
||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||
Mohit Agarwal <mohit@sdf.org>
|
||||
Murilo Santana <mvrilo@gmail.com>
|
||||
Neil Chudleigh <nchudleigh@users.noreply.github.com>
|
||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||
Neuman Vong <neuman.vong@gmail.com>
|
||||
Nicholas Albion <nalbion@yahoo.com>
|
||||
Niels Mayer <Niels.Mayer@gmail.com>
|
||||
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
|
||||
Oleg Sidorov <me@whitebox.io>
|
||||
Oleg Sidorov <oleg@sidorov.nl>
|
||||
Ondrej Kokes <ondrej.kokes@gmail.com>
|
||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||
Philipp Zabel <philipp.zabel@gmail.com>
|
||||
Philippe Normand <phil@base-art.net>
|
||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
|
||||
Radosław Gryta <radek.gryta@gmail.com>
|
||||
Reinforce-II <fate@eastal.com>
|
||||
Reinis Muiznieks <muiznieks.reinis@gmail.com>
|
||||
RelatedTitle <r3latedtitle@gmail.com>
|
||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||
Rich Jones <miserlou@gmail.com>
|
||||
Robin <robin.xw@hotmail.com>
|
||||
Roddur Dasgupta <roddurd@gmail.com>
|
||||
Roland Rabien <figbug@gmail.com>
|
||||
Rotem Dan <rotemdan@gmail.com>
|
||||
Ryan Hitchman <hitchmanr@gmail.com>
|
||||
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
|
||||
RyanChang <ftes90015@gmail.com>
|
||||
Sam <49637763+Onlyartist9@users.noreply.github.com>
|
||||
Sam Pullara <spullara@gmail.com>
|
||||
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
|
||||
Sergio López <slp@sinrega.org>
|
||||
Siddharth Ramakrishnan <srr2141@columbia.edu>
|
||||
Simon Moisselin <simon.moisstoll@gmail.com>
|
||||
Sindre Sorhus <sindresorhus@gmail.com>
|
||||
Slava Primenko <primenko.s@gmail.com>
|
||||
Syahmi Azhar <prsyahmi@gmail.com>
|
||||
Syed Jafri <syedjafri97@gmail.com>
|
||||
Sơn Phan Trung <phantrungson17@gmail.com>
|
||||
Taisei Mima <bhbstar.me@gmail.com>
|
||||
Takeshi Inoue <inoue.takeshi@gmail.com>
|
||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||
Taras Glek <taras@thegp.com>
|
||||
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
|
||||
Thijs Raymakers <thijs@raymakers.nl>
|
||||
Thomas Fitzsimmons <fitzsim@fitzsim.org>
|
||||
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
|
||||
Tienshiao Ma <tienshiao@tienshiao.org>
|
||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||
Tobrun <tobrun.van.nuland@gmail.com>
|
||||
Todd <taf2@users.noreply.github.com>
|
||||
Tong Li <31761981+litongjava@users.noreply.github.com>
|
||||
Topping1 <78745143+Topping1@users.noreply.github.com>
|
||||
Travis Cline <travis.cline@gmail.com>
|
||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||
Vadim Peretokin <vperetokin@hey.com>
|
||||
Valentin Gosu <1454649+valenting@users.noreply.github.com>
|
||||
Vulcan <93451215+trholding@users.noreply.github.com>
|
||||
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
|
||||
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
||||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||
Yajing Tang <phillis@google.com>
|
||||
Yang Shen <aplshenyang@gmail.com>
|
||||
Yunès <jean.baptiste.yunes@free.fr>
|
||||
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
|
||||
Zigfrid Zvezdin <ziggerZZ@gmail.com>
|
||||
Zollner <24618122+Zolliner@users.noreply.github.com>
|
||||
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
|
||||
alonfaraj <alonfaraj@gmail.com>
|
||||
andypayne <apayne@gmail.com>
|
||||
ardfork <134447697+ardfork@users.noreply.github.com>
|
||||
automaticcat <daogiatuank54@gmail.com>
|
||||
be-next <jerome.ramette@gmail.com>
|
||||
bert hubert <bert@hubertnet.nl>
|
||||
bmwl <brian.marshall@tolko.com>
|
||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||
bocytko <bocytko+github@gmail.com>
|
||||
boolemancer <48014766+boolemancer@users.noreply.github.com>
|
||||
boolemancer <boolemancer@gmail.com>
|
||||
bradmit <151883577+bradmit@users.noreply.github.com>
|
||||
brunofaustino <b.fa.amorim@gmail.com>
|
||||
bssrdf <merlintiger@hotmail.com>
|
||||
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||
cdosoftei <ciprian.dosoftei@gmail.com>
|
||||
clach04 <Chris.Clark@actian.com>
|
||||
compilade <113953597+compilade@users.noreply.github.com>
|
||||
conradg <conradjgodfrey@gmail.com>
|
||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||
denersc <denerstassun@gmail.com>
|
||||
dscripka <dscripka@users.noreply.github.com>
|
||||
duthils <duthils@duthils.net>
|
||||
ecneladis <ecneladis@users.noreply.github.com>
|
||||
faker <nspyia2002@gmail.com>
|
||||
fitzsim <fitzsim@fitzsim.org>
|
||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||
genevera (she/her) <genevera@users.noreply.github.com>
|
||||
geniusnut <geniusnut@gmail.com>
|
||||
greeshmay <greeshmay@gmail.com>
|
||||
hydai <z54981220@gmail.com>
|
||||
iamthad <thadeus.j.fleming@gmail.com>
|
||||
james wolf <contractorwolf@hotmail.com>
|
||||
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||
jorismertz <35079666+jorismertz@users.noreply.github.com>
|
||||
junkfood <69683722+JunkFood02@users.noreply.github.com>
|
||||
jwijffels <jwijffels@bnosac.be>
|
||||
kamranjon <kamranjon@gmail.com>
|
||||
katsu560 <katsu560oo-@docomo.ne.jp>
|
||||
kennethge <57784063+kenneth-ge@users.noreply.github.com>
|
||||
keyehzy <msamuel@aluno.puc-rio.br>
|
||||
leejet <leejet714@gmail.com>
|
||||
litong <31761981+litongjava@users.noreply.github.com>
|
||||
lnyan <lkwq007@gmail.com>
|
||||
m.bell <m.bell@techsmith.com>
|
||||
mkiol <mkiol@users.noreply.github.com>
|
||||
novag <7754358+novag@users.noreply.github.com>
|
||||
pajowu <pajowu@pajowu.de>
|
||||
polarmoon <90010972+polarmoon@users.noreply.github.com>
|
||||
rlapray <lapray.romain@gmail.com>
|
||||
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
|
||||
semiformal-net <84111142+semiformal-net@users.noreply.github.com>
|
||||
shibukazu <61775791+shibukazu@users.noreply.github.com>
|
||||
shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
|
||||
slaren <slarengh@gmail.com>
|
||||
slashlib <slashlib@users.noreply.github.com>
|
||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||
st-gr <38470677+st-gr@users.noreply.github.com>
|
||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||
thefinaldegree <thefinaldegree@gmail.com>
|
||||
trixirt <trix@redhat.com>
|
||||
ulatekh <ulatekh@yahoo.com>
|
||||
undef <undefdev@gmail.com>
|
||||
venkr <venkateshrameshkumar+1@gmail.com>
|
||||
vicalloy <zbirder@gmail.com>
|
||||
xdrudis <xavierdrudis@yahoo.es>
|
||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||
布客飞龙 <562826179@qq.com>
|
||||
Артём Земляк <azemlyak@smart-consulting.ru>
|
316
CMakeLists.txt
316
CMakeLists.txt
@ -1,31 +1,21 @@
|
||||
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
||||
project("whisper.cpp" C CXX)
|
||||
project("whisper.cpp" VERSION 1.7.2)
|
||||
include(CheckIncludeFileCXX)
|
||||
cmake_minimum_required (VERSION 3.0)
|
||||
|
||||
set(SOVERSION 1)
|
||||
|
||||
#set(CMAKE_WARN_DEPRECATED YES)
|
||||
set(CMAKE_WARN_UNUSED_CLI YES)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
project(whisper.cpp VERSION 1.2.0)
|
||||
|
||||
# Add path to modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
set(WHISPER_STANDALONE ON)
|
||||
|
||||
include(git-vars)
|
||||
include(GitVars)
|
||||
include(BuildTypes)
|
||||
|
||||
# configure project version
|
||||
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
|
||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
|
||||
endif()
|
||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
||||
else()
|
||||
set(WHISPER_STANDALONE OFF)
|
||||
@ -35,11 +25,6 @@ if (EMSCRIPTEN)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
|
||||
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
||||
|
||||
# TODO: without these, we get the following error:
|
||||
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||
else()
|
||||
if (MINGW)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
@ -48,136 +33,221 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||
# options
|
||||
|
||||
#
|
||||
# option list
|
||||
#
|
||||
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||
|
||||
# general
|
||||
option(WHISPER_CCACHE "whisper: use ccache if available" ON)
|
||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||
|
||||
# debug
|
||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||
|
||||
# build
|
||||
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||
|
||||
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
|
||||
|
||||
if (APPLE)
|
||||
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
||||
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
||||
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
||||
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
||||
else()
|
||||
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
|
||||
endif()
|
||||
|
||||
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
||||
|
||||
# sanitizers
|
||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||
|
||||
# extra artifacts
|
||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
|
||||
|
||||
# 3rd party libs
|
||||
option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
|
||||
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
||||
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
||||
endif()
|
||||
|
||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
||||
|
||||
# Required for relocatable CMake package
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||
|
||||
# override ggml options
|
||||
set(GGML_CCACHE ${WHISPER_CCACHE})
|
||||
set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
|
||||
set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
|
||||
set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
|
||||
set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
|
||||
set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
|
||||
|
||||
# transition helpers
|
||||
function (whisper_option_depr TYPE OLD NEW)
|
||||
if (${OLD})
|
||||
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
||||
set(${NEW} ON)
|
||||
if (NOT MSVC)
|
||||
if (WHISPER_SANITIZE_THREAD)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
|
||||
whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
|
||||
whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
|
||||
whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
|
||||
whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
||||
whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
|
||||
whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
|
||||
whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
|
||||
whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
|
||||
whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
||||
if (WHISPER_SANITIZE_ADDRESS)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
||||
endif()
|
||||
|
||||
#
|
||||
# build the library
|
||||
#
|
||||
|
||||
if (NOT TARGET ggml)
|
||||
add_subdirectory(ggml)
|
||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
||||
if (WHISPER_SANITIZE_UNDEFINED)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
||||
|
||||
# dependencies
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
# on APPLE - include Accelerate framework
|
||||
if (APPLE AND NOT WHISPER_NO_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||
if (ACCELERATE_FRAMEWORK)
|
||||
message(STATUS "Accelerate framework found")
|
||||
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
|
||||
else()
|
||||
message(WARNING "Accelerate framework not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WHISPER_SUPPORT_OPENBLAS)
|
||||
find_library(OPENBLAS_LIB
|
||||
NAMES openblas libopenblas
|
||||
)
|
||||
if (OPENBLAS_LIB)
|
||||
message(STATUS "OpenBLAS found")
|
||||
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${OPENBLAS_LIB})
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
|
||||
else()
|
||||
message(WARNING "OpenBLAS not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# compiler flags
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
|
||||
endif ()
|
||||
|
||||
if (WHISPER_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wpedantic \
|
||||
-Wshadow \
|
||||
-Wcast-qual \
|
||||
-Wstrict-prototypes \
|
||||
-Wpointer-arith \
|
||||
-Wno-unused-function \
|
||||
")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wpedantic \
|
||||
-Wcast-qual \
|
||||
")
|
||||
else()
|
||||
# todo : msvc
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT MSVC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
|
||||
endif()
|
||||
|
||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
|
||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||
message(STATUS "ARM detected")
|
||||
else()
|
||||
message(STATUS "x86 detected")
|
||||
if (MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
|
||||
else()
|
||||
if (EMSCRIPTEN)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
else()
|
||||
if(NOT WHISPER_NO_AVX)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
|
||||
endif()
|
||||
if(NOT WHISPER_NO_AVX2)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
|
||||
endif()
|
||||
if(NOT WHISPER_NO_FMA)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WHISPER_PERF)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
|
||||
endif()
|
||||
add_subdirectory(src)
|
||||
|
||||
#
|
||||
# install
|
||||
# whisper - this is the main library of the project
|
||||
#
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CMakePackageConfigHelpers)
|
||||
set(TARGET whisper)
|
||||
|
||||
set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
|
||||
set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
|
||||
set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
|
||||
add_library(${TARGET}
|
||||
ggml.h
|
||||
ggml.c
|
||||
whisper.h
|
||||
whisper.cpp
|
||||
)
|
||||
|
||||
set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
||||
set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||
set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
.
|
||||
)
|
||||
|
||||
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
|
||||
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
|
||||
if (MSVC)
|
||||
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
configure_package_config_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
|
||||
PATH_VARS
|
||||
WHISPER_INCLUDE_INSTALL_DIR
|
||||
WHISPER_LIB_INSTALL_DIR
|
||||
WHISPER_BIN_INSTALL_DIR )
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
||||
else()
|
||||
target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
||||
write_basic_package_version_file(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||
VERSION ${WHISPER_INSTALL_VERSION}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
if (BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${TARGET} PUBLIC
|
||||
${CMAKE_DL_LIBS}
|
||||
)
|
||||
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
||||
target_compile_definitions(${TARGET} PUBLIC
|
||||
WHISPER_SHARED
|
||||
)
|
||||
endif()
|
||||
|
||||
configure_file(cmake/whisper.pc.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||
@ONLY)
|
||||
if (EMSCRIPTEN)
|
||||
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
|
||||
endif()
|
||||
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||
DESTINATION lib/pkgconfig)
|
||||
target_compile_definitions(${TARGET} PUBLIC
|
||||
${WHISPER_EXTRA_FLAGS}
|
||||
)
|
||||
|
||||
set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h")
|
||||
|
||||
install(TARGETS ${TARGET}
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib/static
|
||||
RUNTIME DESTINATION bin
|
||||
PUBLIC_HEADER DESTINATION include
|
||||
)
|
||||
|
||||
#
|
||||
# bindings
|
||||
#
|
||||
|
||||
add_subdirectory(bindings)
|
||||
|
||||
#
|
||||
# programs, examples and tests
|
||||
#
|
||||
|
||||
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
#include(CTest)
|
||||
#add_subdirectory(tests)
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
endif ()
|
||||
|
||||
if (WHISPER_BUILD_EXAMPLES)
|
||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023-2024 The ggml authors
|
||||
Copyright (c) 2022 Georgi Gerganov
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
245
Makefile
245
Makefile
@ -1,12 +1,222 @@
|
||||
ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifndef UNAME_P
|
||||
UNAME_P := $(shell uname -p)
|
||||
endif
|
||||
|
||||
ifndef UNAME_M
|
||||
UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
CCV := $(shell $(CC) --version | head -n 1)
|
||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||
|
||||
# Mac OS + Arm can report x86_64
|
||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
ifneq ($(UNAME_P),arm)
|
||||
SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
|
||||
ifeq ($(SYSCTL_M),1)
|
||||
# UNAME_P := arm
|
||||
# UNAME_M := arm64
|
||||
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#
|
||||
# Compile flags
|
||||
#
|
||||
|
||||
CFLAGS = -I. -O3 -std=c11 -fPIC
|
||||
CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
|
||||
LDFLAGS =
|
||||
|
||||
# OS specific
|
||||
# TODO: support Windows
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
CFLAGS += -pthread
|
||||
CXXFLAGS += -pthread
|
||||
endif
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CFLAGS += -pthread
|
||||
CXXFLAGS += -pthread
|
||||
endif
|
||||
ifeq ($(UNAME_S),FreeBSD)
|
||||
CFLAGS += -pthread
|
||||
CXXFLAGS += -pthread
|
||||
endif
|
||||
ifeq ($(UNAME_S),Haiku)
|
||||
CFLAGS += -pthread
|
||||
CXXFLAGS += -pthread
|
||||
endif
|
||||
|
||||
# Architecture specific
|
||||
# TODO: probably these flags need to be tweaked on some architectures
|
||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CFLAGS += -mf16c
|
||||
AVX1_M := $(shell sysctl machdep.cpu.features)
|
||||
ifneq (,$(findstring FMA,$(AVX1_M)))
|
||||
CFLAGS += -mfma
|
||||
endif
|
||||
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
|
||||
CFLAGS += -mavx
|
||||
endif
|
||||
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
|
||||
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
||||
CFLAGS += -mavx2
|
||||
endif
|
||||
else ifeq ($(UNAME_S),Linux)
|
||||
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
|
||||
ifneq (,$(findstring avx,$(AVX1_M)))
|
||||
CFLAGS += -mavx
|
||||
endif
|
||||
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
|
||||
ifneq (,$(findstring avx2,$(AVX2_M)))
|
||||
CFLAGS += -mavx2
|
||||
endif
|
||||
FMA_M := $(shell grep "fma " /proc/cpuinfo)
|
||||
ifneq (,$(findstring fma,$(FMA_M)))
|
||||
CFLAGS += -mfma
|
||||
endif
|
||||
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
|
||||
ifneq (,$(findstring f16c,$(F16C_M)))
|
||||
CFLAGS += -mf16c
|
||||
endif
|
||||
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
|
||||
ifneq (,$(findstring sse3,$(SSE3_M)))
|
||||
CFLAGS += -msse3
|
||||
endif
|
||||
else ifeq ($(UNAME_S),Haiku)
|
||||
AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
|
||||
ifneq (,$(findstring avx,$(AVX1_M)))
|
||||
CFLAGS += -mavx
|
||||
endif
|
||||
AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ")
|
||||
ifneq (,$(findstring avx2,$(AVX2_M)))
|
||||
CFLAGS += -mavx2
|
||||
endif
|
||||
FMA_M := $(shell sysinfo -cpu | grep "FMA ")
|
||||
ifneq (,$(findstring fma,$(FMA_M)))
|
||||
CFLAGS += -mfma
|
||||
endif
|
||||
F16C_M := $(shell sysinfo -cpu | grep "F16C ")
|
||||
ifneq (,$(findstring f16c,$(F16C_M)))
|
||||
CFLAGS += -mf16c
|
||||
endif
|
||||
else
|
||||
CFLAGS += -mfma -mf16c -mavx -mavx2
|
||||
endif
|
||||
endif
|
||||
ifeq ($(UNAME_M),amd64)
|
||||
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
||||
endif
|
||||
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
||||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
||||
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
||||
CFLAGS += -mpower9-vector
|
||||
endif
|
||||
# Require c++23's std::byteswap for big-endian support.
|
||||
ifeq ($(UNAME_M),ppc64)
|
||||
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
||||
endif
|
||||
endif
|
||||
ifndef WHISPER_NO_ACCELERATE
|
||||
# Mac M1 - include Accelerate framework
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CFLAGS += -DGGML_USE_ACCELERATE
|
||||
LDFLAGS += -framework Accelerate
|
||||
endif
|
||||
endif
|
||||
ifdef WHISPER_OPENBLAS
|
||||
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
||||
LDFLAGS += -lopenblas
|
||||
endif
|
||||
ifdef WHISPER_GPROF
|
||||
CFLAGS += -pg
|
||||
CXXFLAGS += -pg
|
||||
endif
|
||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||
endif
|
||||
ifneq ($(filter armv6%,$(UNAME_M)),)
|
||||
# Raspberry Pi 1, 2, 3
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
ifneq ($(filter armv7%,$(UNAME_M)),)
|
||||
# Raspberry Pi 4
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
||||
endif
|
||||
ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||
# Raspberry Pi 4
|
||||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
|
||||
#
|
||||
# Print build information
|
||||
#
|
||||
|
||||
$(info I whisper.cpp build info: )
|
||||
$(info I UNAME_S: $(UNAME_S))
|
||||
$(info I UNAME_P: $(UNAME_P))
|
||||
$(info I UNAME_M: $(UNAME_M))
|
||||
$(info I CFLAGS: $(CFLAGS))
|
||||
$(info I CXXFLAGS: $(CXXFLAGS))
|
||||
$(info I LDFLAGS: $(LDFLAGS))
|
||||
$(info I CC: $(CCV))
|
||||
$(info I CXX: $(CXXV))
|
||||
$(info )
|
||||
|
||||
default: main
|
||||
|
||||
#
|
||||
# Build library
|
||||
#
|
||||
|
||||
ggml.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
||||
|
||||
whisper.o: whisper.cpp whisper.h
|
||||
$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
|
||||
|
||||
libwhisper.a: ggml.o whisper.o
|
||||
$(AR) rcs libwhisper.a ggml.o whisper.o
|
||||
|
||||
libwhisper.so: ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
|
||||
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
|
||||
CC_SDL=`sdl2-config --cflags --libs`
|
||||
|
||||
main: examples/main/main.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o whisper.o -o main $(LDFLAGS)
|
||||
./main -h
|
||||
|
||||
stream: examples/stream/stream.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
command: examples/command/command.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
bench: examples/bench/bench.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
|
||||
|
||||
#
|
||||
# Audio samples
|
||||
#
|
||||
|
||||
.PHONY: build
|
||||
build:
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
|
||||
# download a few audio samples into folder "./samples":
|
||||
.PHONY: samples
|
||||
samples:
|
||||
@ -16,19 +226,12 @@ samples:
|
||||
@wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||
@wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
|
||||
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
|
||||
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
|
||||
@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
|
||||
@echo "Converting to 16-bit WAV ..."
|
||||
@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
|
||||
@rm samples/*.ogg
|
||||
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
|
||||
@rm samples/mm1.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
|
||||
@rm samples/a13.mp3
|
||||
@ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
|
||||
@rm samples/diffusion2023-07-03.flac
|
||||
|
||||
#
|
||||
# Models
|
||||
@ -46,14 +249,10 @@ samples:
|
||||
.PHONY: medium.en
|
||||
.PHONY: medium
|
||||
.PHONY: large-v1
|
||||
.PHONY: large-v2
|
||||
.PHONY: large-v3
|
||||
.PHONY: large-v3-turbo
|
||||
.PHONY: large
|
||||
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
|
||||
bash ./models/download-ggml-model.sh $@
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
@echo ""
|
||||
@echo "==============================================="
|
||||
@echo "Running $@ on all samples in ./samples ..."
|
||||
@ -64,6 +263,14 @@ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 larg
|
||||
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
|
||||
echo "----------------------------------------------" ; \
|
||||
echo "" ; \
|
||||
./build/bin/main -m models/ggml-$@.bin -f $$f ; \
|
||||
./main -m models/ggml-$@.bin -f $$f ; \
|
||||
echo "" ; \
|
||||
done
|
||||
|
||||
#
|
||||
# Tests
|
||||
#
|
||||
|
||||
.PHONY: tests
|
||||
tests:
|
||||
bash ./tests/run-tests.sh
|
||||
|
@ -1,19 +0,0 @@
|
||||
// swift-tools-version:5.5
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "whisper",
|
||||
platforms: [
|
||||
.macOS(.v12),
|
||||
.iOS(.v14),
|
||||
.watchOS(.v4),
|
||||
.tvOS(.v14)
|
||||
],
|
||||
products: [
|
||||
.library(name: "whisper", targets: ["whisper"]),
|
||||
],
|
||||
targets: [
|
||||
.systemLibrary(name: "whisper", pkgConfig: "whisper"),
|
||||
]
|
||||
)
|
535
README.md
535
README.md
@ -1,44 +1,37 @@
|
||||
# whisper.cpp
|
||||
|
||||

|
||||
|
||||
[](https://github.com/ggerganov/whisper.cpp/actions)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://conan.io/center/whisper-cpp)
|
||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||
|
||||
Stable: [v1.7.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.2) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
Stable: [v1.2.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
|
||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||
|
||||
- Plain C/C++ implementation without dependencies
|
||||
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
|
||||
- Apple silicon first-class citizen - optimized via Arm Neon and Accelerate framework
|
||||
- AVX intrinsics support for x86 architectures
|
||||
- VSX intrinsics support for POWER architectures
|
||||
- Mixed F16 / F32 precision
|
||||
- [Integer quantization support](#quantization)
|
||||
- Low memory usage (Flash Attention)
|
||||
- Zero memory allocations at runtime
|
||||
- [Vulkan support](#vulkan-gpu-support)
|
||||
- Support for CPU-only inference
|
||||
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
|
||||
- [OpenVINO Support](#openvino-support)
|
||||
- [Ascend NPU Support](#ascend-npu-support)
|
||||
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
|
||||
- Runs on the CPU
|
||||
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
|
||||
|
||||
Supported platforms:
|
||||
|
||||
- [x] Mac OS (Intel and Arm)
|
||||
- [x] [iOS](examples/whisper.objc)
|
||||
- [x] [Android](examples/whisper.android)
|
||||
- [x] [Java](bindings/java/README.md)
|
||||
- [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
|
||||
- [x] [WebAssembly](examples/whisper.wasm)
|
||||
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
||||
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
||||
- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
|
||||
|
||||
The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
|
||||
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
|
||||
The entire implementation of the model is contained in 2 source files:
|
||||
|
||||
- Tensor operations: [ggml.h](ggml.h) / [ggml.c](ggml.c)
|
||||
- Transformer inference: [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
|
||||
|
||||
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
||||
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
|
||||
@ -49,59 +42,44 @@ You can also easily make your own offline voice assistant application: [command]
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
|
||||
|
||||
On Apple Silicon, the inference runs fully on the GPU via Metal:
|
||||
|
||||
https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
|
||||
|
||||
Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
|
||||
|
||||
## Implementation details
|
||||
|
||||
- The core tensor operations are implemented in C ([ggml.h](ggml/include/ggml.h) / [ggml.c](ggml/src/ggml.c))
|
||||
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](include/whisper.h) / [whisper.cpp](src/whisper.cpp))
|
||||
- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
|
||||
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
|
||||
- Sample usage is demonstrated in [main.cpp](examples/main)
|
||||
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
|
||||
- Various other examples are available in the [examples](examples) folder
|
||||
|
||||
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
|
||||
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
|
||||
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
|
||||
the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
|
||||
|
||||
## Quick start
|
||||
|
||||
First clone the repository:
|
||||
First, download one of the Whisper models converted in [ggml format](models). For example:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
```
|
||||
|
||||
Navigate into the directory:
|
||||
|
||||
```
|
||||
cd whisper.cpp
|
||||
```
|
||||
|
||||
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
|
||||
|
||||
```bash
|
||||
sh ./models/download-ggml-model.sh base.en
|
||||
bash ./models/download-ggml-model.sh base.en
|
||||
```
|
||||
|
||||
Now build the [main](examples/main) example and transcribe an audio file like this:
|
||||
|
||||
```bash
|
||||
# build the main example
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
make
|
||||
|
||||
# transcribe an audio file
|
||||
./build/bin/main -f samples/jfk.wav
|
||||
./main -f samples/jfk.wav
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
For a quick demo, simply run `make base.en`:
|
||||
|
||||
```text
|
||||
$ make -j base.en
|
||||
```java
|
||||
$ make base.en
|
||||
|
||||
cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
|
||||
c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
|
||||
@ -119,42 +97,32 @@ options:
|
||||
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
||||
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
||||
-ml N, --max-len N [0 ] maximum segment length in characters
|
||||
-sow, --split-on-word [false ] split on word rather than on token
|
||||
-bo N, --best-of N [5 ] number of best candidates to keep
|
||||
-bs N, --beam-size N [5 ] beam size for beam search
|
||||
-bs N, --beam-size N [-1 ] beam size for beam search
|
||||
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
||||
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
||||
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
||||
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
|
||||
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
|
||||
-tr, --translate [false ] translate from source language to english
|
||||
-di, --diarize [false ] stereo audio diarization
|
||||
-tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
|
||||
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
||||
-otxt, --output-txt [false ] output result in a text file
|
||||
-ovtt, --output-vtt [false ] output result in a vtt file
|
||||
-osrt, --output-srt [false ] output result in a srt file
|
||||
-olrc, --output-lrc [false ] output result in a lrc file
|
||||
-owts, --output-words [false ] output script for generating karaoke video
|
||||
-fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
|
||||
-ocsv, --output-csv [false ] output result in a CSV file
|
||||
-oj, --output-json [false ] output result in a JSON file
|
||||
-ojf, --output-json-full [false ] include more information in the JSON file
|
||||
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
||||
-ps, --print-special [false ] print special tokens
|
||||
-pc, --print-colors [false ] print colors
|
||||
-pp, --print-progress [false ] print progress
|
||||
-nt, --no-timestamps [false ] do not print timestamps
|
||||
-nt, --no-timestamps [true ] do not print timestamps
|
||||
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
|
||||
-dl, --detect-language [false ] exit after automatically detecting language
|
||||
--prompt PROMPT [ ] initial prompt
|
||||
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
||||
-f FNAME, --file FNAME [ ] input WAV file path
|
||||
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
||||
-ls, --log-score [false ] log best decoder scores of tokens
|
||||
-ng, --no-gpu [false ] disable GPU
|
||||
|
||||
|
||||
sh ./models/download-ggml-model.sh base.en
|
||||
bash ./models/download-ggml-model.sh base.en
|
||||
Downloading ggml model base.en ...
|
||||
ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
|
||||
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
|
||||
@ -216,7 +184,7 @@ For detailed usage instructions, run: `./main -h`
|
||||
Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
|
||||
For example, you can use `ffmpeg` like this:
|
||||
|
||||
```bash
|
||||
```java
|
||||
ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
||||
```
|
||||
|
||||
@ -225,7 +193,7 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
||||
If you want some extra audio samples to play with, simply run:
|
||||
|
||||
```
|
||||
make -j samples
|
||||
make samples
|
||||
```
|
||||
|
||||
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
|
||||
@ -233,299 +201,32 @@ This will download a few more audio files from Wikipedia and convert them to 16-
|
||||
You can download and run the other models as follows:
|
||||
|
||||
```
|
||||
make -j tiny.en
|
||||
make -j tiny
|
||||
make -j base.en
|
||||
make -j base
|
||||
make -j small.en
|
||||
make -j small
|
||||
make -j medium.en
|
||||
make -j medium
|
||||
make -j large-v1
|
||||
make -j large-v2
|
||||
make -j large-v3
|
||||
make -j large-v3-turbo
|
||||
make tiny.en
|
||||
make tiny
|
||||
make base.en
|
||||
make base
|
||||
make small.en
|
||||
make small
|
||||
make medium.en
|
||||
make medium
|
||||
make large-v1
|
||||
make large
|
||||
```
|
||||
|
||||
## Memory usage
|
||||
|
||||
| Model | Disk | Mem |
|
||||
| ------ | ------- | ------- |
|
||||
| tiny | 75 MiB | ~273 MB |
|
||||
| base | 142 MiB | ~388 MB |
|
||||
| small | 466 MiB | ~852 MB |
|
||||
| medium | 1.5 GiB | ~2.1 GB |
|
||||
| large | 2.9 GiB | ~3.9 GB |
|
||||
|
||||
## Quantization
|
||||
|
||||
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
|
||||
Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
|
||||
|
||||
Here are the steps for creating and using a quantized model:
|
||||
|
||||
```bash
|
||||
# quantize a model with Q5_0 method
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
|
||||
|
||||
# run the examples as usual, specifying the quantized model file
|
||||
./build/bin/main -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
|
||||
```
|
||||
|
||||
## Core ML support
|
||||
|
||||
On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
|
||||
speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
|
||||
|
||||
- Install Python dependencies needed for the creation of the Core ML model:
|
||||
|
||||
```bash
|
||||
pip install ane_transformers
|
||||
pip install openai-whisper
|
||||
pip install coremltools
|
||||
```
|
||||
|
||||
- To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
|
||||
- Python 3.10 is recommended.
|
||||
- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
|
||||
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
|
||||
- To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
|
||||
- To activate the environment, use: `conda activate py310-whisper`
|
||||
|
||||
- Generate a Core ML model. For example, to generate a `base.en` model, use:
|
||||
|
||||
```bash
|
||||
./models/generate-coreml-model.sh base.en
|
||||
```
|
||||
|
||||
This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
|
||||
|
||||
- Build `whisper.cpp` with Core ML support:
|
||||
|
||||
```bash
|
||||
# using CMake
|
||||
cmake -B build -DWHISPER_COREML=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
- Run the examples as usual. For example:
|
||||
|
||||
```text
|
||||
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||
|
||||
...
|
||||
|
||||
whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
|
||||
whisper_init_state: first run on a device may take a while ...
|
||||
whisper_init_state: Core ML model loaded
|
||||
|
||||
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
|
||||
|
||||
...
|
||||
```
|
||||
|
||||
The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
|
||||
Next runs are faster.
|
||||
|
||||
For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
|
||||
|
||||
## OpenVINO support
|
||||
|
||||
On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
|
||||
on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
|
||||
|
||||
This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
|
||||
|
||||
- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
|
||||
|
||||
Windows:
|
||||
|
||||
```powershell
|
||||
cd models
|
||||
python -m venv openvino_conv_env
|
||||
openvino_conv_env\Scripts\activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-openvino.txt
|
||||
```
|
||||
|
||||
Linux and macOS:
|
||||
|
||||
```bash
|
||||
cd models
|
||||
python3 -m venv openvino_conv_env
|
||||
source openvino_conv_env/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements-openvino.txt
|
||||
```
|
||||
|
||||
- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
|
||||
|
||||
```
|
||||
python convert-whisper-to-openvino.py --model base.en
|
||||
```
|
||||
|
||||
This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
|
||||
is the default location that the OpenVINO extension will search at runtime.
|
||||
|
||||
- Build `whisper.cpp` with OpenVINO support:
|
||||
|
||||
Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
|
||||
|
||||
After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
|
||||
|
||||
Linux:
|
||||
|
||||
```bash
|
||||
source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
|
||||
```
|
||||
|
||||
Windows (cmd):
|
||||
|
||||
```powershell
|
||||
C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
|
||||
```
|
||||
|
||||
And then build the project using cmake:
|
||||
|
||||
```bash
|
||||
cmake -B build -DWHISPER_OPENVINO=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
- Run the examples as usual. For example:
|
||||
|
||||
```text
|
||||
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||
|
||||
...
|
||||
|
||||
whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
|
||||
whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
|
||||
whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
|
||||
whisper_ctx_init_openvino_encoder: OpenVINO model loaded
|
||||
|
||||
system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
|
||||
|
||||
...
|
||||
```
|
||||
|
||||
The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
|
||||
cached for the next run.
|
||||
|
||||
For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
|
||||
|
||||
## NVIDIA GPU support
|
||||
|
||||
With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
|
||||
First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
|
||||
|
||||
Now build `whisper.cpp` with CUDA support:
|
||||
|
||||
```
|
||||
cmake -B build -DGGML_CUDA=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
## Vulkan GPU support
|
||||
Cross-vendor solution which allows you to accelerate workload on your GPU.
|
||||
First, make sure your graphics card driver provides support for Vulkan API.
|
||||
|
||||
Now build `whisper.cpp` with Vulkan support:
|
||||
```
|
||||
cmake -B build -DGGML_VULKAN=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
## BLAS CPU support via OpenBLAS
|
||||
|
||||
Encoder processing can be accelerated on the CPU via OpenBLAS.
|
||||
First, make sure you have installed `openblas`: https://www.openblas.net/
|
||||
|
||||
Now build `whisper.cpp` with OpenBLAS support:
|
||||
|
||||
```
|
||||
cmake -B build -DGGML_BLAS=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
## Ascend NPU support
|
||||
|
||||
Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
|
||||
|
||||
First, check if your Ascend NPU device is supported:
|
||||
|
||||
**Verified devices**
|
||||
| Ascend NPU | Status |
|
||||
|:-----------------------------:|:-------:|
|
||||
| Atlas 300T A2 | Support |
|
||||
|
||||
Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
|
||||
|
||||
Now build `whisper.cpp` with CANN support:
|
||||
|
||||
```
|
||||
cmake -B build -DGGML_CANN=1
|
||||
cmake --build build -j --config Release
|
||||
```
|
||||
|
||||
Run the inference examples as usual, for example:
|
||||
|
||||
```
|
||||
./build/bin/main -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
|
||||
```
|
||||
|
||||
*Notes:*
|
||||
|
||||
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
|
||||
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
|
||||
|
||||
## Docker
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Docker must be installed and running on your system.
|
||||
- Create a folder to store big models & intermediate files (ex. /whisper/models)
|
||||
|
||||
### Images
|
||||
|
||||
We have two Docker images available for this project:
|
||||
|
||||
1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
|
||||
2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
|
||||
|
||||
### Usage
|
||||
|
||||
```shell
|
||||
# download model and persist it in a local folder
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
whisper.cpp:main "./models/download-ggml-model.sh base /models"
|
||||
# transcribe an audio file
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
-v path/to/audios:/audios \
|
||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
|
||||
# transcribe an audio file in samples folder
|
||||
docker run -it --rm \
|
||||
-v path/to/models:/models \
|
||||
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
|
||||
```
|
||||
|
||||
## Installing with Conan
|
||||
|
||||
You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
|
||||
|
||||
```
|
||||
conan install --requires="whisper-cpp/[*]" --build=missing
|
||||
```
|
||||
|
||||
For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
|
||||
| Model | Disk | Mem | SHA |
|
||||
| --- | --- | --- | --- |
|
||||
| tiny | 75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
|
||||
| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
||||
| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
||||
| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
||||
| large | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
||||
|
||||
## Limitations
|
||||
|
||||
- Inference only
|
||||
- No GPU support (yet)
|
||||
|
||||
## Another example
|
||||
|
||||
@ -535,7 +236,7 @@ in about half a minute on a MacBook M1 Pro, using `medium.en` model:
|
||||
<details>
|
||||
<summary>Expand to see the result</summary>
|
||||
|
||||
```text
|
||||
```java
|
||||
$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
|
||||
|
||||
whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
|
||||
@ -607,19 +308,17 @@ whisper_print_timings: encode time = 18665.10 ms / 9 runs ( 2073.90 ms per
|
||||
whisper_print_timings: decode time = 13090.93 ms / 549 runs ( 23.85 ms per run)
|
||||
whisper_print_timings: total time = 32733.52 ms
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Real-time audio input example
|
||||
|
||||
This is a naive example of performing real-time inference on audio from your microphone.
|
||||
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
|
||||
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
|
||||
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
||||
|
||||
```bash
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
./build/bin/stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
||||
```java
|
||||
make stream
|
||||
./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
|
||||
@ -629,18 +328,14 @@ https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a
|
||||
Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
|
||||
to highlight words with high or low confidence:
|
||||
|
||||
```bash
|
||||
./main -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
|
||||
```
|
||||
|
||||
<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
|
||||
|
||||
## Controlling the length of the generated text segments (experimental)
|
||||
|
||||
For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
|
||||
|
||||
```text
|
||||
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
|
||||
```java
|
||||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
|
||||
|
||||
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
||||
...
|
||||
@ -659,12 +354,12 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr
|
||||
[00:00:10.020 --> 00:00:11.000] country.
|
||||
```
|
||||
|
||||
## Word-level timestamp (experimental)
|
||||
## Word-level timestamp
|
||||
|
||||
The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
|
||||
|
||||
```text
|
||||
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
|
||||
```java
|
||||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
|
||||
|
||||
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
||||
...
|
||||
@ -672,7 +367,7 @@ system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1
|
||||
|
||||
main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
||||
|
||||
[00:00:00.000 --> 00:00:00.320]
|
||||
[00:00:00.000 --> 00:00:00.320]
|
||||
[00:00:00.320 --> 00:00:00.370] And
|
||||
[00:00:00.370 --> 00:00:00.690] so
|
||||
[00:00:00.690 --> 00:00:00.850] my
|
||||
@ -700,41 +395,15 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr
|
||||
[00:00:10.510 --> 00:00:11.000] .
|
||||
```
|
||||
|
||||
## Speaker segmentation via tinydiarize (experimental)
|
||||
|
||||
More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
|
||||
|
||||
Sample usage:
|
||||
|
||||
```py
|
||||
# download a tinydiarize compatible model
|
||||
./models/download-ggml-model.sh small.en-tdrz
|
||||
|
||||
# run as usual, adding the "-tdrz" command-line argument
|
||||
./main -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
|
||||
...
|
||||
main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
|
||||
...
|
||||
[00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
|
||||
[00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
|
||||
[00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
|
||||
[00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
|
||||
[00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
|
||||
[00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
|
||||
[00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
|
||||
[00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
|
||||
[00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
|
||||
```
|
||||
|
||||
## Karaoke-style movie generation (experimental)
|
||||
|
||||
The [main](examples/main) example provides support for output of karaoke-style movies, where the
|
||||
currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
|
||||
This requires to have `ffmpeg` installed.
|
||||
|
||||
Here are a few _"typical"_ examples:
|
||||
Here are a few *"typical"* examples:
|
||||
|
||||
```bash
|
||||
```java
|
||||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
|
||||
source ./samples/jfk.wav.wts
|
||||
ffplay ./samples/jfk.wav.mp4
|
||||
@ -744,7 +413,7 @@ https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b
|
||||
|
||||
---
|
||||
|
||||
```bash
|
||||
```java
|
||||
./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
|
||||
source ./samples/mm0.wav.wts
|
||||
ffplay ./samples/mm0.wav.mp4
|
||||
@ -754,7 +423,7 @@ https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-9
|
||||
|
||||
---
|
||||
|
||||
```bash
|
||||
```java
|
||||
./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
|
||||
source ./samples/gb0.wav.wts
|
||||
ffplay ./samples/gb0.wav.mp4
|
||||
@ -764,19 +433,6 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a
|
||||
|
||||
---
|
||||
|
||||
## Video comparison of different models
|
||||
|
||||
Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
|
||||
|
||||
```bash
|
||||
./scripts/bench-wts.sh samples/jfk.wav
|
||||
ffplay ./samples/jfk.wav.all.mp4
|
||||
```
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
|
||||
|
||||
---
|
||||
|
||||
## Benchmarks
|
||||
|
||||
In order to have an objective comparison of the performance of the inference across different system configurations,
|
||||
@ -785,19 +441,7 @@ took to execute it. The results are summarized in the following Github issue:
|
||||
|
||||
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
|
||||
|
||||
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
|
||||
|
||||
You can run it with the following command, by default it will run against any standard model in the models folder.
|
||||
|
||||
```bash
|
||||
python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
|
||||
```
|
||||
|
||||
It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
|
||||
|
||||
It outputs a csv file with the results of the benchmarking.
|
||||
|
||||
## `ggml` format
|
||||
## ggml format
|
||||
|
||||
The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
|
||||
|
||||
@ -809,55 +453,42 @@ The original models are converted to a custom binary format. This allows to pack
|
||||
You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
|
||||
or manually from here:
|
||||
|
||||
- https://huggingface.co/ggerganov/whisper.cpp
|
||||
- https://huggingface.co/datasets/ggerganov/whisper.cpp
|
||||
- https://ggml.ggerganov.com
|
||||
|
||||
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
|
||||
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README
|
||||
in [models](models).
|
||||
|
||||
## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
|
||||
|
||||
- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
|
||||
- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
|
||||
- React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
|
||||
- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
||||
- [x] Java:
|
||||
- [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
|
||||
- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
|
||||
- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
|
||||
- [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
|
||||
- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
|
||||
- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
|
||||
- [X] Javascript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
|
||||
- [X] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
||||
- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
|
||||
- [X] .NET:
|
||||
- [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
|
||||
- [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
|
||||
- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
|
||||
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
|
||||
- [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
|
||||
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
|
||||
- [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
|
||||
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
|
||||
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
|
||||
- [ ] Python: soon | [WIP](https://github.com/ggerganov/whisper.cpp/issues/9)
|
||||
|
||||
## Examples
|
||||
|
||||
There are various examples of using the library for different projects in the [examples](examples) folder.
|
||||
Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
|
||||
|
||||
| Example | Web | Description |
|
||||
| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
|
||||
| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
|
||||
| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
|
||||
| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
|
||||
| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
|
||||
| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
|
||||
| [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
|
||||
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
|
||||
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
|
||||
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
|
||||
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
||||
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
||||
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
||||
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
|
||||
| [server](examples/server) | | HTTP transcription server with OAI-like API |
|
||||
| Example | Web | Description |
|
||||
| --- | --- | --- |
|
||||
| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
|
||||
| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
|
||||
| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
|
||||
| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
|
||||
| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
|
||||
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
|
||||
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
|
||||
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
|
||||
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
||||
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
||||
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
||||
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
|
||||
|
||||
## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
|
||||
|
||||
|
249
README_sycl.md
249
README_sycl.md
@ -1,249 +0,0 @@
|
||||
# whisper.cpp for SYCL
|
||||
|
||||
[Background](#background)
|
||||
|
||||
[OS](#os)
|
||||
|
||||
[Intel GPU](#intel-gpu)
|
||||
|
||||
[Linux](#linux)
|
||||
|
||||
[Environment Variable](#environment-variable)
|
||||
|
||||
[Known Issue](#known-issue)
|
||||
|
||||
[Todo](#todo)
|
||||
|
||||
## Background
|
||||
|
||||
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators<72>such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
||||
|
||||
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
||||
|
||||
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
||||
|
||||
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel<EFBFBD> DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
||||
|
||||
The whisper.cpp for SYCL is used to support Intel GPUs.
|
||||
|
||||
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
|
||||
|
||||
## OS
|
||||
|
||||
|OS|Status|Verified|
|
||||
|-|-|-|
|
||||
|Linux|Support|Ubuntu 22.04|
|
||||
|Windows|Ongoing| |
|
||||
|
||||
|
||||
## Intel GPU
|
||||
|
||||
|Intel GPU| Status | Verified Model|
|
||||
|-|-|-|
|
||||
|Intel Data Center Max Series| Support| Max 1550|
|
||||
|Intel Data Center Flex Series| Support| Flex 170|
|
||||
|Intel Arc Series| Support| Arc 770|
|
||||
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
||||
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
||||
|
||||
|
||||
## Linux
|
||||
|
||||
### Setup Environment
|
||||
|
||||
1. Install Intel GPU driver.
|
||||
|
||||
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
||||
|
||||
Note: for iGPU, please install the client GPU driver.
|
||||
|
||||
b. Add user to group: video, render.
|
||||
|
||||
```
|
||||
sudo usermod -aG render username
|
||||
sudo usermod -aG video username
|
||||
```
|
||||
|
||||
Note: re-login to enable it.
|
||||
|
||||
c. Check
|
||||
|
||||
```
|
||||
sudo apt install clinfo
|
||||
sudo clinfo -l
|
||||
```
|
||||
|
||||
Output (example):
|
||||
|
||||
```
|
||||
Platform #0: Intel(R) OpenCL Graphics
|
||||
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
||||
|
||||
|
||||
Platform #0: Intel(R) OpenCL HD Graphics
|
||||
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
||||
```
|
||||
|
||||
2. Install Intel<65> oneAPI Base toolkit.
|
||||
|
||||
|
||||
a. Please follow the procedure in [Get the Intel<65> oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
||||
|
||||
Recommend to install to default folder: **/opt/intel/oneapi**.
|
||||
|
||||
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
||||
|
||||
b. Check
|
||||
|
||||
```
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
sycl-ls
|
||||
```
|
||||
|
||||
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
||||
|
||||
Output (example):
|
||||
```
|
||||
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
||||
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
||||
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
||||
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
||||
|
||||
```
|
||||
|
||||
2. Build locally:
|
||||
|
||||
```
|
||||
mkdir -p build
|
||||
cd build
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
#for FP16
|
||||
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
|
||||
|
||||
#for FP32
|
||||
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
|
||||
#build example/main only
|
||||
#cmake --build . --config Release --target main
|
||||
|
||||
#build all binary
|
||||
cmake --build . --config Release -v
|
||||
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
./examples/sycl/build.sh
|
||||
```
|
||||
|
||||
Note:
|
||||
|
||||
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
||||
|
||||
### Run
|
||||
|
||||
1. Put model file to folder **models**
|
||||
|
||||
2. Enable oneAPI running environment
|
||||
|
||||
```
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
```
|
||||
|
||||
3. List device ID
|
||||
|
||||
Run without parameter:
|
||||
|
||||
```
|
||||
./build/bin/ls-sycl-device
|
||||
|
||||
or
|
||||
|
||||
./build/bin/main
|
||||
```
|
||||
|
||||
Check the ID in startup log, like:
|
||||
|
||||
```
|
||||
found 4 SYCL devices:
|
||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
|
||||
```
|
||||
|
||||
|Attribute|Note|
|
||||
|-|-|
|
||||
|compute capability 1.3|Level-zero running time, recommended |
|
||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
||||
|
||||
4. Set device ID and execute whisper.cpp
|
||||
|
||||
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
||||
|
||||
```
|
||||
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||
```
|
||||
or run by script:
|
||||
|
||||
```
|
||||
./examples/sycl/run_whisper.sh
|
||||
```
|
||||
|
||||
|
||||
|
||||
5. Check the device ID in output
|
||||
|
||||
Like:
|
||||
```
|
||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
```
|
||||
|
||||
|
||||
## Environment Variable
|
||||
|
||||
#### Build
|
||||
|
||||
|Name|Value|Function|
|
||||
|-|-|-|
|
||||
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
|
||||
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
|
||||
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
||||
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
|
||||
|
||||
#### Running
|
||||
|
||||
|
||||
|Name|Value|Function|
|
||||
|-|-|-|
|
||||
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
||||
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
||||
|
||||
## Known Issue
|
||||
|
||||
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
||||
|
||||
Miss to enable oneAPI running environment.
|
||||
|
||||
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
||||
|
||||
|
||||
- Hang during startup
|
||||
|
||||
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
||||
|
||||
Solution: add **--no-mmap**.
|
||||
|
||||
## Todo
|
||||
|
||||
- Support to build in Windows.
|
||||
|
||||
- Support multiple cards.
|
@ -1,5 +0,0 @@
|
||||
module whisper [system] {
|
||||
header "whisper.h"
|
||||
link "whisper"
|
||||
export *
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <whisper.h>
|
||||
|
@ -1,31 +1,9 @@
|
||||
ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifndef UNAME_P
|
||||
UNAME_P := $(shell uname -p)
|
||||
endif
|
||||
|
||||
ifndef UNAME_M
|
||||
UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
||||
BUILD_DIR := build
|
||||
MODELS_DIR := models
|
||||
EXAMPLES_DIR := $(wildcard examples/*)
|
||||
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
||||
INCLUDE_PATH := $(abspath ../..)
|
||||
LIBRARY_PATH := $(abspath ../..)
|
||||
|
||||
ifeq ($(GGML_CUDA),1)
|
||||
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
|
||||
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
|
||||
endif
|
||||
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
|
||||
endif
|
||||
|
||||
all: clean whisper examples
|
||||
|
||||
whisper: mkdir
|
||||
@ -33,13 +11,8 @@ whisper: mkdir
|
||||
@${MAKE} -C ../.. libwhisper.a
|
||||
|
||||
test: model-small whisper modtidy
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
|
||||
else
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
|
||||
endif
|
||||
|
||||
examples: $(EXAMPLES_DIR)
|
||||
|
||||
@ -48,11 +21,7 @@ model-small: mkdir examples/go-model-download
|
||||
|
||||
$(EXAMPLES_DIR): mkdir whisper modtidy
|
||||
@echo Build example $(notdir $@)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
|
||||
else
|
||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
|
||||
endif
|
||||
|
||||
mkdir:
|
||||
@echo Mkdir ${BUILD_DIR}
|
||||
@ -63,7 +32,7 @@ mkdir:
|
||||
modtidy:
|
||||
@go mod tidy
|
||||
|
||||
clean:
|
||||
clean:
|
||||
@echo Clean
|
||||
@rm -fr $(BUILD_DIR)
|
||||
@go clean
|
||||
|
@ -31,7 +31,7 @@ func main() {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := context.Process(samples, nil, nil); err != nil {
|
||||
if err := context.Process(samples, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -62,12 +62,6 @@ This will compile a static `libwhisper.a` in a `build` folder, download a model
|
||||
make examples
|
||||
```
|
||||
|
||||
To build using cuda support add `GGML_CUDA=1`:
|
||||
|
||||
```bash
|
||||
GGML_CUDA=1 make examples
|
||||
```
|
||||
|
||||
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
||||
|
||||
```bash
|
||||
@ -77,7 +71,7 @@ The examples are placed in the `build` directory. Once built, you can download a
|
||||
And you can then test a model against samples with the following command:
|
||||
|
||||
```bash
|
||||
./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
|
||||
./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
|
||||
```
|
||||
|
||||
## Using the bindings
|
||||
|
@ -17,14 +17,14 @@ import (
|
||||
// CONSTANTS
|
||||
|
||||
const (
|
||||
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
|
||||
srcExt = ".bin" // Filename extension
|
||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||
srcUrl = "https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main" // The location of the models
|
||||
srcExt = ".bin" // Filename extension
|
||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||
)
|
||||
|
||||
var (
|
||||
// The models which will be downloaded, if no model is specified as an argument
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -68,6 +68,10 @@ func (flags *Flags) GetOut() string {
|
||||
return strings.ToLower(flags.Lookup("out").Value.String())
|
||||
}
|
||||
|
||||
func (flags *Flags) IsSpeedup() bool {
|
||||
return flags.Lookup("speedup").Value.String() == "true"
|
||||
}
|
||||
|
||||
func (flags *Flags) IsTokens() bool {
|
||||
return flags.Lookup("tokens").Value.String() == "true"
|
||||
}
|
||||
@ -107,6 +111,10 @@ func (flags *Flags) SetParams(context whisper.Context) error {
|
||||
fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
|
||||
context.SetDuration(duration)
|
||||
}
|
||||
if flags.IsSpeedup() {
|
||||
fmt.Fprintf(flags.Output(), "Setting speedup to true\n")
|
||||
context.SetSpeedup(true)
|
||||
}
|
||||
if threads := flags.GetThreads(); threads != 0 {
|
||||
fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
|
||||
context.SetThreads(threads)
|
||||
@ -138,6 +146,7 @@ func registerFlags(flag *Flags) {
|
||||
flag.Duration("offset", 0, "Time offset")
|
||||
flag.Duration("duration", 0, "Duration of audio to process")
|
||||
flag.Uint("threads", 0, "Number of threads to use")
|
||||
flag.Bool("speedup", false, "Enable speedup")
|
||||
flag.Uint("max-len", 0, "Maximum segment length in characters")
|
||||
flag.Uint("max-tokens", 0, "Maximum tokens per segment")
|
||||
flag.Float64("word-thold", 0, "Maximum segment score")
|
||||
|
@ -67,7 +67,7 @@ func Process(model whisper.Model, path string, flags *Flags) error {
|
||||
// Process the data
|
||||
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
|
||||
context.ResetTimings()
|
||||
if err := context.Process(data, cb, nil); err != nil {
|
||||
if err := context.Process(data, cb); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
module github.com/ggerganov/whisper.cpp/bindings/go
|
||||
|
||||
go 1.23
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/go-audio/wav v1.1.0
|
||||
github.com/stretchr/testify v1.9.0
|
||||
github.com/stretchr/testify v1.8.1
|
||||
)
|
||||
|
||||
require (
|
||||
|
@ -1,3 +1,4 @@
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||
@ -8,9 +9,15 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
@ -19,10 +19,6 @@ func (p *Params) SetTranslate(v bool) {
|
||||
p.translate = toBool(v)
|
||||
}
|
||||
|
||||
func (p *Params) SetSplitOnWord(v bool) {
|
||||
p.split_on_word = toBool(v)
|
||||
}
|
||||
|
||||
func (p *Params) SetNoContext(v bool) {
|
||||
p.no_context = toBool(v)
|
||||
}
|
||||
@ -47,6 +43,10 @@ func (p *Params) SetPrintTimestamps(v bool) {
|
||||
p.print_timestamps = toBool(v)
|
||||
}
|
||||
|
||||
func (p *Params) SetSpeedup(v bool) {
|
||||
p.speed_up = toBool(v)
|
||||
}
|
||||
|
||||
// Set language id
|
||||
func (p *Params) SetLanguage(lang int) error {
|
||||
if lang == -1 {
|
||||
@ -105,47 +105,11 @@ func (p *Params) SetMaxSegmentLength(n int) {
|
||||
p.max_len = C.int(n)
|
||||
}
|
||||
|
||||
func (p *Params) SetTokenTimestamps(b bool) {
|
||||
p.token_timestamps = toBool(b)
|
||||
}
|
||||
|
||||
// Set max tokens per segment (0 = no limit)
|
||||
func (p *Params) SetMaxTokensPerSegment(n int) {
|
||||
p.max_tokens = C.int(n)
|
||||
}
|
||||
|
||||
// Set audio encoder context
|
||||
func (p *Params) SetAudioCtx(n int) {
|
||||
p.audio_ctx = C.int(n)
|
||||
}
|
||||
|
||||
func (p *Params) SetMaxContext(n int) {
|
||||
p.n_max_text_ctx = C.int(n)
|
||||
}
|
||||
|
||||
func (p *Params) SetBeamSize(n int) {
|
||||
p.beam_search.beam_size = C.int(n)
|
||||
}
|
||||
|
||||
func (p *Params) SetEntropyThold(t float32) {
|
||||
p.entropy_thold = C.float(t)
|
||||
}
|
||||
|
||||
func (p *Params) SetTemperature(t float32) {
|
||||
p.temperature = C.float(t)
|
||||
}
|
||||
|
||||
// Sets the fallback temperature incrementation
|
||||
// Pass -1.0 to disable this feature
|
||||
func (p *Params) SetTemperatureFallback(t float32) {
|
||||
p.temperature_inc = C.float(t)
|
||||
}
|
||||
|
||||
// Set initial prompt
|
||||
func (p *Params) SetInitialPrompt(prompt string) {
|
||||
p.initial_prompt = C.CString(prompt)
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// PRIVATE METHODS
|
||||
|
||||
@ -169,12 +133,6 @@ func (p *Params) String() string {
|
||||
str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
|
||||
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
|
||||
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
||||
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
||||
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
||||
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
|
||||
str += fmt.Sprintf(" temperature=%f", p.temperature)
|
||||
str += fmt.Sprintf(" temperature_inc=%f", p.temperature_inc)
|
||||
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
|
||||
if p.translate {
|
||||
str += " translate"
|
||||
}
|
||||
@ -199,6 +157,9 @@ func (p *Params) String() string {
|
||||
if p.token_timestamps {
|
||||
str += " token_timestamps"
|
||||
}
|
||||
if p.speed_up {
|
||||
str += " speed_up"
|
||||
}
|
||||
|
||||
return str + ">"
|
||||
}
|
||||
|
@ -76,8 +76,9 @@ func (context *context) SetTranslate(v bool) {
|
||||
context.params.SetTranslate(v)
|
||||
}
|
||||
|
||||
func (context *context) SetSplitOnWord(v bool) {
|
||||
context.params.SetSplitOnWord(v)
|
||||
// Set speedup flag
|
||||
func (context *context) SetSpeedup(v bool) {
|
||||
context.params.SetSpeedup(v)
|
||||
}
|
||||
|
||||
// Set number of threads to use
|
||||
@ -92,7 +93,7 @@ func (context *context) SetOffset(v time.Duration) {
|
||||
|
||||
// Set duration of audio to process
|
||||
func (context *context) SetDuration(v time.Duration) {
|
||||
context.params.SetDuration(int(v.Milliseconds()))
|
||||
context.params.SetOffset(int(v.Milliseconds()))
|
||||
}
|
||||
|
||||
// Set timestamp token probability threshold (~0.01)
|
||||
@ -110,52 +111,11 @@ func (context *context) SetMaxSegmentLength(n uint) {
|
||||
context.params.SetMaxSegmentLength(int(n))
|
||||
}
|
||||
|
||||
// Set token timestamps flag
|
||||
func (context *context) SetTokenTimestamps(b bool) {
|
||||
context.params.SetTokenTimestamps(b)
|
||||
}
|
||||
|
||||
// Set max tokens per segment (0 = no limit)
|
||||
func (context *context) SetMaxTokensPerSegment(n uint) {
|
||||
context.params.SetMaxTokensPerSegment(int(n))
|
||||
}
|
||||
|
||||
// Set audio encoder context
|
||||
func (context *context) SetAudioCtx(n uint) {
|
||||
context.params.SetAudioCtx(int(n))
|
||||
}
|
||||
|
||||
// Set maximum number of text context tokens to store
|
||||
func (context *context) SetMaxContext(n int) {
|
||||
context.params.SetMaxContext(n)
|
||||
}
|
||||
|
||||
// Set Beam Size
|
||||
func (context *context) SetBeamSize(n int) {
|
||||
context.params.SetBeamSize(n)
|
||||
}
|
||||
|
||||
// Set Entropy threshold
|
||||
func (context *context) SetEntropyThold(t float32) {
|
||||
context.params.SetEntropyThold(t)
|
||||
}
|
||||
|
||||
// Set Temperature
|
||||
func (context *context) SetTemperature(t float32) {
|
||||
context.params.SetTemperature(t)
|
||||
}
|
||||
|
||||
// Set the fallback temperature incrementation
|
||||
// Pass -1.0 to disable this feature
|
||||
func (context *context) SetTemperatureFallback(t float32) {
|
||||
context.params.SetTemperatureFallback(t)
|
||||
}
|
||||
|
||||
// Set initial prompt
|
||||
func (context *context) SetInitialPrompt(prompt string) {
|
||||
context.params.SetInitialPrompt(prompt)
|
||||
}
|
||||
|
||||
// ResetTimings resets the mode timings. Should be called before processing
|
||||
func (context *context) ResetTimings() {
|
||||
context.model.ctx.Whisper_reset_timings()
|
||||
@ -187,16 +147,12 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f
|
||||
}
|
||||
|
||||
// Process new sample data and return any errors
|
||||
func (context *context) Process(
|
||||
data []float32,
|
||||
callNewSegment SegmentCallback,
|
||||
callProgress ProgressCallback,
|
||||
) error {
|
||||
func (context *context) Process(data []float32, cb SegmentCallback) error {
|
||||
if context.model.ctx == nil {
|
||||
return ErrInternalAppError
|
||||
}
|
||||
// If the callback is defined then we force on single_segment mode
|
||||
if callNewSegment != nil {
|
||||
if cb != nil {
|
||||
context.params.SetSingleSegment(true)
|
||||
}
|
||||
|
||||
@ -204,28 +160,24 @@ func (context *context) Process(
|
||||
processors := 0
|
||||
if processors > 1 {
|
||||
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
|
||||
if callNewSegment != nil {
|
||||
if cb != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
for i := s0; i < num_segments; i++ {
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
cb(toSegment(context.model.ctx, i))
|
||||
}
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
|
||||
if callNewSegment != nil {
|
||||
if cb != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
for i := s0; i < num_segments; i++ {
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
cb(toSegment(context.model.ctx, i))
|
||||
}
|
||||
}
|
||||
}, func(progress int) {
|
||||
if callProgress != nil {
|
||||
callProgress(progress)
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -328,14 +280,10 @@ func toSegment(ctx *whisper.Context, n int) Segment {
|
||||
func toTokens(ctx *whisper.Context, n int) []Token {
|
||||
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
||||
for i := 0; i < len(result); i++ {
|
||||
data := ctx.Whisper_full_get_token_data(n, i)
|
||||
|
||||
result[i] = Token{
|
||||
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
||||
Text: ctx.Whisper_full_get_token_text(n, i),
|
||||
P: ctx.Whisper_full_get_token_p(n, i),
|
||||
Start: time.Duration(data.T0()) * time.Millisecond * 10,
|
||||
End: time.Duration(data.T1()) * time.Millisecond * 10,
|
||||
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
||||
Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
|
||||
P: ctx.Whisper_full_get_token_p(n, i),
|
||||
}
|
||||
}
|
||||
return result
|
||||
|
@ -4,90 +4,52 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-audio/wav"
|
||||
// Packages
|
||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
assert "github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSetLanguage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
const (
|
||||
ModelPath = "../../models/ggml-tiny.bin"
|
||||
SamplePath = "../../samples/jfk.wav"
|
||||
)
|
||||
|
||||
func Test_Whisper_000(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, model not found:", ModelPath)
|
||||
}
|
||||
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||
}
|
||||
|
||||
// Load model
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
assert.NoError(model.Close())
|
||||
|
||||
t.Log("languages=", model.Languages())
|
||||
}
|
||||
|
||||
func Test_Whisper_001(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, model not found:", ModelPath)
|
||||
}
|
||||
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||
}
|
||||
|
||||
// Load model
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
// Get context for decoding
|
||||
ctx, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
assert.NotNil(ctx)
|
||||
|
||||
// This returns an error since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
err = context.SetLanguage("en")
|
||||
assert.Error(err)
|
||||
}
|
||||
|
||||
func TestContextModelIsMultilingual(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
isMultilingual := context.IsMultilingual()
|
||||
|
||||
// This returns false since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
assert.False(isMultilingual)
|
||||
}
|
||||
|
||||
func TestLanguage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
// This always returns en since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
expectedLanguage := "en"
|
||||
actualLanguage := context.Language()
|
||||
assert.Equal(expectedLanguage, actualLanguage)
|
||||
}
|
||||
|
||||
func TestProcess(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
fh, err := os.Open(SamplePath)
|
||||
assert.NoError(err)
|
||||
defer fh.Close()
|
||||
|
||||
// Decode the WAV file - load the full buffer
|
||||
dec := wav.NewDecoder(fh)
|
||||
buf, err := dec.FullPCMBuffer()
|
||||
assert.NoError(err)
|
||||
assert.Equal(uint16(1), dec.NumChans)
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
err = context.Process(data, nil, nil)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
@ -12,10 +12,6 @@ import (
|
||||
// time. It is called during the Process function
|
||||
type SegmentCallback func(Segment)
|
||||
|
||||
// ProgressCallback is the callback function for reporting progress during
|
||||
// processing. It is called during the Process function
|
||||
type ProgressCallback func(int)
|
||||
|
||||
// Model is the interface to a whisper model. Create a new model with the
|
||||
// function whisper.New(string)
|
||||
type Model interface {
|
||||
@ -38,27 +34,19 @@ type Context interface {
|
||||
IsMultilingual() bool // Return true if the model is multilingual.
|
||||
Language() string // Get language
|
||||
|
||||
SetOffset(time.Duration) // Set offset
|
||||
SetDuration(time.Duration) // Set duration
|
||||
SetThreads(uint) // Set number of threads to use
|
||||
SetSplitOnWord(bool) // Set split on word flag
|
||||
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
||||
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
||||
SetMaxSegmentLength(uint) // Set max segment length in characters
|
||||
SetTokenTimestamps(bool) // Set token timestamps flag
|
||||
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
||||
SetAudioCtx(uint) // Set audio encoder context
|
||||
SetMaxContext(n int) // Set maximum number of text context tokens to store
|
||||
SetBeamSize(n int) // Set Beam Size
|
||||
SetEntropyThold(t float32) // Set Entropy threshold
|
||||
SetInitialPrompt(prompt string) // Set initial prompt
|
||||
SetTemperature(t float32) // Set temperature
|
||||
SetTemperatureFallback(t float32) // Set temperature incrementation
|
||||
SetOffset(time.Duration) // Set offset
|
||||
SetDuration(time.Duration) // Set duration
|
||||
SetThreads(uint) // Set number of threads to use
|
||||
SetSpeedup(bool) // Set speedup flag
|
||||
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
||||
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
||||
SetMaxSegmentLength(uint) // Set max segment length in characters
|
||||
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
||||
|
||||
// Process mono audio data and return any errors.
|
||||
// If defined, newly generated segments are passed to the
|
||||
// callback function during processing.
|
||||
Process([]float32, SegmentCallback, ProgressCallback) error
|
||||
Process([]float32, SegmentCallback) error
|
||||
|
||||
// After process is called, return segments until the end of the stream
|
||||
// is reached, when io.EOF is returned.
|
||||
@ -97,8 +85,7 @@ type Segment struct {
|
||||
|
||||
// Token is a text or special token
|
||||
type Token struct {
|
||||
Id int
|
||||
Text string
|
||||
P float32
|
||||
Start, End time.Duration
|
||||
Id int
|
||||
Text string
|
||||
P float32
|
||||
}
|
||||
|
@ -94,7 +94,6 @@ func (model *model) NewContext() (Context, error) {
|
||||
params.SetPrintRealtime(false)
|
||||
params.SetPrintTimestamps(false)
|
||||
params.SetThreads(runtime.NumCPU())
|
||||
params.SetNoContext(true)
|
||||
|
||||
// Return new context
|
||||
return newContext(model, params)
|
||||
|
@ -1,91 +0,0 @@
|
||||
package whisper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
assert "github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
t.Run("valid model path", func(t *testing.T) {
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
})
|
||||
|
||||
t.Run("invalid model path", func(t *testing.T) {
|
||||
invalidModelPath := "invalid-model-path.bin"
|
||||
model, err := whisper.New(invalidModelPath)
|
||||
assert.Error(err)
|
||||
assert.Nil(model)
|
||||
})
|
||||
}
|
||||
|
||||
func TestClose(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
|
||||
err = model.Close()
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
func TestNewContext(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
assert.NotNil(context)
|
||||
}
|
||||
|
||||
func TestIsMultilingual(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
isMultilingual := model.IsMultilingual()
|
||||
|
||||
// This returns false since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
assert.False(isMultilingual)
|
||||
}
|
||||
|
||||
func TestLanguages(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
expectedLanguages := []string{
|
||||
"en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl",
|
||||
"ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk",
|
||||
"el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr",
|
||||
"bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn",
|
||||
"sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne",
|
||||
"mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn",
|
||||
"yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi",
|
||||
"lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my",
|
||||
"bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su",
|
||||
}
|
||||
|
||||
actualLanguages := model.Languages()
|
||||
|
||||
assert.Equal(expectedLanguages, actualLanguages)
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
package whisper_test
|
||||
|
||||
const (
|
||||
ModelPath = "../../models/ggml-small.en.bin"
|
||||
SamplePath = "../../samples/jfk.wav"
|
||||
)
|
@ -9,37 +9,27 @@ import (
|
||||
// CGO
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
|
||||
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
||||
#cgo LDFLAGS: -lwhisper -lm -lstdc++
|
||||
#cgo darwin LDFLAGS: -framework Accelerate
|
||||
#include <whisper.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern void callNewSegment(void* user_data, int new);
|
||||
extern void callProgress(void* user_data, int progress);
|
||||
extern bool callEncoderBegin(void* user_data);
|
||||
|
||||
// Text segment callback
|
||||
// Called on every newly generated text segment
|
||||
// Use the whisper_full_...() functions to obtain the text segments
|
||||
static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_state* state, int n_new, void* user_data) {
|
||||
static void whisper_new_segment_cb(struct whisper_context* ctx, int n_new, void* user_data) {
|
||||
if(user_data != NULL && ctx != NULL) {
|
||||
callNewSegment(user_data, n_new);
|
||||
}
|
||||
}
|
||||
|
||||
// Progress callback
|
||||
// Called on every newly generated text segment
|
||||
// Use the whisper_full_...() functions to obtain the text segments
|
||||
static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
|
||||
if(user_data != NULL && ctx != NULL) {
|
||||
callProgress(user_data, progress);
|
||||
}
|
||||
}
|
||||
|
||||
// Encoder begin callback
|
||||
// If not NULL, called before the encoder starts
|
||||
// If it returns false, the computation is aborted
|
||||
static bool whisper_encoder_begin_cb(struct whisper_context* ctx, struct whisper_state* state, void* user_data) {
|
||||
static bool whisper_encoder_begin_cb(struct whisper_context* ctx, void* user_data) {
|
||||
if(user_data != NULL && ctx != NULL) {
|
||||
return callEncoderBegin(user_data);
|
||||
}
|
||||
@ -53,8 +43,6 @@ static struct whisper_full_params whisper_full_default_params_cb(struct whisper_
|
||||
params.new_segment_callback_user_data = (void*)(ctx);
|
||||
params.encoder_begin_callback = whisper_encoder_begin_cb;
|
||||
params.encoder_begin_callback_user_data = (void*)(ctx);
|
||||
params.progress_callback = whisper_progress_cb;
|
||||
params.progress_callback_user_data = (void*)(ctx);
|
||||
return params;
|
||||
}
|
||||
*/
|
||||
@ -83,6 +71,7 @@ const (
|
||||
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
|
||||
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
|
||||
NumFFT = C.WHISPER_N_FFT
|
||||
NumMEL = C.WHISPER_N_MEL
|
||||
HopLength = C.WHISPER_HOP_LENGTH
|
||||
ChunkSize = C.WHISPER_CHUNK_SIZE
|
||||
)
|
||||
@ -102,7 +91,7 @@ var (
|
||||
func Whisper_init(path string) *Context {
|
||||
cPath := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(cPath))
|
||||
if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
|
||||
if ctx := C.whisper_init_from_file(cPath); ctx != nil {
|
||||
return (*Context)(ctx)
|
||||
} else {
|
||||
return nil
|
||||
@ -269,13 +258,13 @@ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
|
||||
}
|
||||
|
||||
// Task tokens
|
||||
func (ctx *Context) Whisper_token_translate() Token {
|
||||
return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
|
||||
func Whisper_token_translate() Token {
|
||||
return Token(C.whisper_token_translate())
|
||||
}
|
||||
|
||||
// Task tokens
|
||||
func (ctx *Context) Whisper_token_transcribe() Token {
|
||||
return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
|
||||
func Whisper_token_transcribe() Token {
|
||||
return Token(C.whisper_token_transcribe())
|
||||
}
|
||||
|
||||
// Performance information
|
||||
@ -301,19 +290,11 @@ func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Param
|
||||
|
||||
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
// Uses the specified decoding strategy to obtain the text.
|
||||
func (ctx *Context) Whisper_full(
|
||||
params Params,
|
||||
samples []float32,
|
||||
encoderBeginCallback func() bool,
|
||||
newSegmentCallback func(int),
|
||||
progressCallback func(int),
|
||||
) error {
|
||||
func (ctx *Context) Whisper_full(params Params, samples []float32, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
|
||||
registerEncoderBeginCallback(ctx, encoderBeginCallback)
|
||||
registerNewSegmentCallback(ctx, newSegmentCallback)
|
||||
registerProgressCallback(ctx, progressCallback)
|
||||
defer registerEncoderBeginCallback(ctx, nil)
|
||||
defer registerNewSegmentCallback(ctx, nil)
|
||||
defer registerProgressCallback(ctx, nil)
|
||||
if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
|
||||
return nil
|
||||
} else {
|
||||
@ -337,18 +318,6 @@ func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, proc
|
||||
}
|
||||
}
|
||||
|
||||
// Return the id of the autodetected language, returns -1 if not found
|
||||
// Added to whisper.cpp in
|
||||
// https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
|
||||
//
|
||||
// Examples:
|
||||
//
|
||||
// "de" -> 2
|
||||
// "german" -> 2
|
||||
func (ctx *Context) Whisper_full_lang_id() int {
|
||||
return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
|
||||
}
|
||||
|
||||
// Number of generated text segments.
|
||||
// A segment can be a few words, a sentence, or even a paragraph.
|
||||
func (ctx *Context) Whisper_full_n_segments() int {
|
||||
@ -387,7 +356,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
|
||||
|
||||
// Get token data for the specified token in the specified segment.
|
||||
// This contains probabilities, timestamps, etc.
|
||||
func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
|
||||
func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
|
||||
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
||||
}
|
||||
|
||||
@ -401,7 +370,6 @@ func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
|
||||
|
||||
var (
|
||||
cbNewSegment = make(map[unsafe.Pointer]func(int))
|
||||
cbProgress = make(map[unsafe.Pointer]func(int))
|
||||
cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
|
||||
)
|
||||
|
||||
@ -413,14 +381,6 @@ func registerNewSegmentCallback(ctx *Context, fn func(int)) {
|
||||
}
|
||||
}
|
||||
|
||||
func registerProgressCallback(ctx *Context, fn func(int)) {
|
||||
if fn == nil {
|
||||
delete(cbProgress, unsafe.Pointer(ctx))
|
||||
} else {
|
||||
cbProgress[unsafe.Pointer(ctx)] = fn
|
||||
}
|
||||
}
|
||||
|
||||
func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
|
||||
if fn == nil {
|
||||
delete(cbEncoderBegin, unsafe.Pointer(ctx))
|
||||
@ -436,13 +396,6 @@ func callNewSegment(user_data unsafe.Pointer, new C.int) {
|
||||
}
|
||||
}
|
||||
|
||||
//export callProgress
|
||||
func callProgress(user_data unsafe.Pointer, progress C.int) {
|
||||
if fn, ok := cbProgress[user_data]; ok {
|
||||
fn(int(progress))
|
||||
}
|
||||
}
|
||||
|
||||
//export callEncoderBegin
|
||||
func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
||||
if fn, ok := cbEncoderBegin[user_data]; ok {
|
||||
@ -454,15 +407,3 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (t TokenData) T0() int64 {
|
||||
return int64(t.t0)
|
||||
}
|
||||
|
||||
func (t TokenData) T1() int64 {
|
||||
return int64(t.t1)
|
||||
}
|
||||
|
||||
func (t TokenData) Id() Token {
|
||||
return Token(t.id)
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ func Test_Whisper_001(t *testing.T) {
|
||||
defer ctx.Whisper_free()
|
||||
params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
err = ctx.Whisper_full(params, data, nil, nil, nil)
|
||||
err = ctx.Whisper_full(params, data, nil, nil)
|
||||
assert.NoError(err)
|
||||
|
||||
// Print out tokens
|
||||
|
1
bindings/ios
Submodule
1
bindings/ios
Submodule
Submodule bindings/ios added at d5c6d5c8a3
124
bindings/java/.idea/uiDesigner.xml
generated
124
bindings/java/.idea/uiDesigner.xml
generated
@ -1,124 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Palette2">
|
||||
<group name="Swing">
|
||||
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
|
||||
</item>
|
||||
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
|
||||
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
|
||||
<initial-values>
|
||||
<property name="text" value="Button" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="RadioButton" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="CheckBox" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
|
||||
<initial-values>
|
||||
<property name="text" value="Label" />
|
||||
</initial-values>
|
||||
</item>
|
||||
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||
<preferred-size width="150" height="-1" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||
<preferred-size width="150" height="50" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||
<preferred-size width="200" height="200" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||
<preferred-size width="200" height="200" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
|
||||
</item>
|
||||
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
|
||||
<preferred-size width="-1" height="20" />
|
||||
</default-constraints>
|
||||
</item>
|
||||
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
|
||||
</item>
|
||||
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
|
||||
</item>
|
||||
</group>
|
||||
</component>
|
||||
</project>
|
@ -1,71 +0,0 @@
|
||||
# Java JNI bindings for Whisper
|
||||
|
||||
This package provides Java JNI bindings for whisper.cpp. They have been tested on:
|
||||
|
||||
* <strike>Darwin (OS X) 12.6 on x64_64</strike>
|
||||
* Ubuntu on x86_64
|
||||
* Windows on x86_64
|
||||
|
||||
The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
|
||||
|
||||
JNA will attempt to load the `whispercpp` shared library from:
|
||||
|
||||
- jna.library.path
|
||||
- jna.platform.library
|
||||
- ~/Library/Frameworks
|
||||
- /Library/Frameworks
|
||||
- /System/Library/Frameworks
|
||||
- classpath
|
||||
|
||||
```java
|
||||
import io.github.ggerganov.whispercpp.WhisperCpp;
|
||||
|
||||
public class Example {
|
||||
|
||||
public static void main(String[] args) {
|
||||
WhisperCpp whisper = new WhisperCpp();
|
||||
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
||||
// or you can provide the absolute path to the model file.
|
||||
long context = whisper.initContext("base.en");
|
||||
try {
|
||||
var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
// custom configuration if required
|
||||
whisperParams.temperature_inc = 0f;
|
||||
|
||||
var samples = readAudio(); // divide each value by 32767.0f
|
||||
whisper.fullTranscribe(whisperParams, samples);
|
||||
|
||||
int segmentCount = whisper.getTextSegmentCount(context);
|
||||
for (int i = 0; i < segmentCount; i++) {
|
||||
String text = whisper.getTextSegment(context, i);
|
||||
System.out.println(segment.getText());
|
||||
}
|
||||
} finally {
|
||||
whisper.freeContext(context);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Building & Testing
|
||||
|
||||
In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp/bindings/java
|
||||
|
||||
./gradlew build
|
||||
```
|
||||
|
||||
You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
|
||||
|
||||
```bash
|
||||
copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
|
||||
```
|
||||
|
||||
|
||||
## License
|
||||
|
||||
The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
|
||||
|
@ -1,133 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'java-library'
|
||||
id 'maven-publish'
|
||||
id 'signing'
|
||||
}
|
||||
|
||||
archivesBaseName = 'whispercpp'
|
||||
group = 'io.github.ggerganov'
|
||||
version = '1.4.0'
|
||||
|
||||
|
||||
sourceCompatibility = 1.8
|
||||
targetCompatibility = 1.8
|
||||
|
||||
sourceSets {
|
||||
main {
|
||||
resources {
|
||||
srcDirs = ['src/main/resources', 'build/generated/resources/main']
|
||||
}
|
||||
}
|
||||
test {
|
||||
runtimeClasspath += files('build/generated/resources/main')
|
||||
}
|
||||
}
|
||||
|
||||
tasks.register('copyLibwhisperDynlib', Copy) {
|
||||
from '../../build'
|
||||
include 'libwhisper.dynlib'
|
||||
into 'build/generated/resources/main/darwin'
|
||||
}
|
||||
|
||||
tasks.register('copyLibwhisperSo', Copy) {
|
||||
from '../../build'
|
||||
include 'libwhisper.so'
|
||||
into 'build/generated/resources/main/linux-x86-64'
|
||||
}
|
||||
|
||||
tasks.register('copyWhisperDll', Copy) {
|
||||
from '../../build/Release'
|
||||
include 'whisper.dll'
|
||||
into 'build/generated/resources/main/windows-x86-64'
|
||||
}
|
||||
|
||||
tasks.register('copyLibs') {
|
||||
dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll
|
||||
}
|
||||
|
||||
test {
|
||||
systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
|
||||
}
|
||||
|
||||
java {
|
||||
withSourcesJar()
|
||||
withJavadocJar()
|
||||
}
|
||||
|
||||
jar {
|
||||
exclude '**/whisper_java.exp', '**/whisper_java.lib'
|
||||
}
|
||||
|
||||
javadoc {
|
||||
options.addStringOption('Xdoclint:none', '-quiet')
|
||||
}
|
||||
|
||||
tasks.withType(Test) {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation "net.java.dev.jna:jna:5.13.0"
|
||||
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
|
||||
testImplementation "org.assertj:assertj-core:3.24.2"
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
publishing {
|
||||
publications {
|
||||
mavenJava(MavenPublication) {
|
||||
artifactId = 'whispercpp'
|
||||
from components.java
|
||||
pom {
|
||||
name = 'whispercpp'
|
||||
description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
|
||||
url = 'https://github.com/ggerganov/whisper.cpp'
|
||||
licenses {
|
||||
license {
|
||||
name = 'MIT licence'
|
||||
url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
|
||||
}
|
||||
}
|
||||
developers {
|
||||
developer {
|
||||
id = 'ggerganov'
|
||||
name = 'Georgi Gerganov'
|
||||
email = 'ggerganov@gmail.com'
|
||||
}
|
||||
developer {
|
||||
id = 'nalbion'
|
||||
name = 'Nicholas Albion'
|
||||
email = 'nalbion@yahoo.com'
|
||||
}
|
||||
}
|
||||
scm {
|
||||
connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
|
||||
url = 'https://github.com/ggerganov/whisper.cpp'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
repositories {
|
||||
maven {
|
||||
def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
|
||||
def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
|
||||
url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
|
||||
credentials {
|
||||
username = System.getenv("MAVEN_USERNAME")
|
||||
password = System.getenv("MAVEN_PASSWORD")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
signing {
|
||||
def signingKey = System.getenv("PGP_SECRET")
|
||||
def signingPassword = System.getenv("PGP_PASSPHRASE")
|
||||
useInMemoryPgpKeys(signingKey, signingPassword)
|
||||
sign publishing.publications.mavenJava
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
org.gradle.jvmargs=-Xms256m -Xmx1024m
|
||||
system.include.dir=/usr/include
|
||||
#system.local.include.dir=../../include
|
||||
system.local.include.dir=./build/generated/sources/headers/java/main
|
||||
jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
|
||||
jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
|
BIN
bindings/java/gradle/wrapper/gradle-wrapper.jar
vendored
BIN
bindings/java/gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
@ -1,6 +0,0 @@
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
|
||||
networkTimeout=10000
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
244
bindings/java/gradlew
vendored
244
bindings/java/gradlew
vendored
@ -1,244 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright © 2015-2021 the original authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
##############################################################################
|
||||
#
|
||||
# Gradle start up script for POSIX generated by Gradle.
|
||||
#
|
||||
# Important for running:
|
||||
#
|
||||
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||
# noncompliant, but you have some other compliant shell such as ksh or
|
||||
# bash, then to run this script, type that shell name before the whole
|
||||
# command line, like:
|
||||
#
|
||||
# ksh Gradle
|
||||
#
|
||||
# Busybox and similar reduced shells will NOT work, because this script
|
||||
# requires all of these POSIX shell features:
|
||||
# * functions;
|
||||
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||
# * compound commands having a testable exit status, especially «case»;
|
||||
# * various built-in commands including «command», «set», and «ulimit».
|
||||
#
|
||||
# Important for patching:
|
||||
#
|
||||
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||
#
|
||||
# The "traditional" practice of packing multiple parameters into a
|
||||
# space-separated string is a well documented source of bugs and security
|
||||
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||
# options in "$@", and eventually passing that to Java.
|
||||
#
|
||||
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||
# see the in-line comments for details.
|
||||
#
|
||||
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||
# Darwin, MinGW, and NonStop.
|
||||
#
|
||||
# (3) This script is generated from the Groovy template
|
||||
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# within the Gradle project.
|
||||
#
|
||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||
#
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
|
||||
# Resolve links: $0 may be a link
|
||||
app_path=$0
|
||||
|
||||
# Need this for daisy-chained symlinks.
|
||||
while
|
||||
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||
[ -h "$app_path" ]
|
||||
do
|
||||
ls=$( ls -ld "$app_path" )
|
||||
link=${ls#*' -> '}
|
||||
case $link in #(
|
||||
/*) app_path=$link ;; #(
|
||||
*) app_path=$APP_HOME$link ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# This is normally unused
|
||||
# shellcheck disable=SC2034
|
||||
APP_BASE_NAME=${0##*/}
|
||||
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD=maximum
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
} >&2
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
} >&2
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "$( uname )" in #(
|
||||
CYGWIN* ) cygwin=true ;; #(
|
||||
Darwin* ) darwin=true ;; #(
|
||||
MSYS* | MINGW* ) msys=true ;; #(
|
||||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||
else
|
||||
JAVACMD=$JAVA_HOME/bin/java
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD=java
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC3045
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
case $MAX_FD in #(
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC3045
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command, stacking in reverse order:
|
||||
# * args from the command line
|
||||
# * the main class name
|
||||
# * -classpath
|
||||
# * -D...appname settings
|
||||
# * --module-path (only if needed)
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if "$cygwin" || "$msys" ; then
|
||||
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||
|
||||
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
for arg do
|
||||
if
|
||||
case $arg in #(
|
||||
-*) false ;; # don't mess with options #(
|
||||
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||
[ -e "$t" ] ;; #(
|
||||
*) false ;;
|
||||
esac
|
||||
then
|
||||
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||
fi
|
||||
# Roll the args list around exactly as many times as the number of
|
||||
# args, so each arg winds up back in the position where it started, but
|
||||
# possibly modified.
|
||||
#
|
||||
# NB: a `for` loop captures its iteration list before it begins, so
|
||||
# changing the positional parameters here affects neither the number of
|
||||
# iterations, nor the values presented in `arg`.
|
||||
shift # remove old arg
|
||||
set -- "$@" "$arg" # push replacement arg
|
||||
done
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command;
|
||||
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
||||
# shell script including quotes and variable substitutions, so put them in
|
||||
# double quotes to make sure that they get re-expanded; and
|
||||
# * put everything else in single quotes, so that it's not re-expanded.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
"$@"
|
||||
|
||||
# Stop when "xargs" is not available.
|
||||
if ! command -v xargs >/dev/null 2>&1
|
||||
then
|
||||
die "xargs is not available"
|
||||
fi
|
||||
|
||||
# Use "xargs" to parse quoted args.
|
||||
#
|
||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||
#
|
||||
# In Bash we could simply go:
|
||||
#
|
||||
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||
# set -- "${ARGS[@]}" "$@"
|
||||
#
|
||||
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||
# character that might be a shell metacharacter, then use eval to reverse
|
||||
# that process (while maintaining the separation between arguments), and wrap
|
||||
# the whole thing up as a single "set" statement.
|
||||
#
|
||||
# This will of course break if any of these variables contains a newline or
|
||||
# an unmatched quote.
|
||||
#
|
||||
|
||||
eval "set -- $(
|
||||
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||
xargs -n1 |
|
||||
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||
tr '\n' ' '
|
||||
)" '"$@"'
|
||||
|
||||
exec "$JAVACMD" "$@"
|
92
bindings/java/gradlew.bat
vendored
92
bindings/java/gradlew.bat
vendored
@ -1,92 +0,0 @@
|
||||
@rem
|
||||
@rem Copyright 2015 the original author or authors.
|
||||
@rem
|
||||
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@rem you may not use this file except in compliance with the License.
|
||||
@rem You may obtain a copy of the License at
|
||||
@rem
|
||||
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||
@rem
|
||||
@rem Unless required by applicable law or agreed to in writing, software
|
||||
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@rem See the License for the specific language governing permissions and
|
||||
@rem limitations under the License.
|
||||
@rem
|
||||
|
||||
@if "%DEBUG%"=="" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%"=="" set DIRNAME=.
|
||||
@rem This is normally unused
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if %ERRORLEVEL% equ 0 goto execute
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto execute
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
set EXIT_CODE=%ERRORLEVEL%
|
||||
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
||||
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
||||
exit /b %EXIT_CODE%
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
@ -1 +0,0 @@
|
||||
rootProject.name = "whispercpp"
|
@ -1,41 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import com.sun.jna.Structure;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
||||
import io.github.ggerganov.whispercpp.WhisperModel;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperContext extends Structure {
|
||||
int t_load_us = 0;
|
||||
int t_start_us = 0;
|
||||
|
||||
/** weight type (FP32 / FP16 / QX) */
|
||||
GgmlType wtype = GgmlType.GGML_TYPE_F16;
|
||||
/** intermediate type (FP32 or FP16) */
|
||||
GgmlType itype = GgmlType.GGML_TYPE_F16;
|
||||
|
||||
// WhisperModel model;
|
||||
public PointerByReference model;
|
||||
// whisper_vocab vocab;
|
||||
// whisper_state * state = nullptr;
|
||||
public PointerByReference vocab;
|
||||
public PointerByReference state;
|
||||
|
||||
/** populated by whisper_init_from_file_with_params() */
|
||||
String path_model;
|
||||
WhisperContextParams params;
|
||||
|
||||
// public static class ByReference extends WhisperContext implements Structure.ByReference {
|
||||
// }
|
||||
//
|
||||
// public static class ByValue extends WhisperContext implements Structure.ByValue {
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// protected List<String> getFieldOrder() {
|
||||
// return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
|
||||
// }
|
||||
}
|
@ -1,207 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import com.sun.jna.Native;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer.
|
||||
*/
|
||||
public class WhisperCpp implements AutoCloseable {
|
||||
private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
|
||||
private Pointer ctx = null;
|
||||
private Pointer paramsPointer = null;
|
||||
private Pointer greedyParamsPointer = null;
|
||||
private Pointer beamParamsPointer = null;
|
||||
|
||||
public File modelDir() {
|
||||
String modelDirPath = System.getenv("XDG_CACHE_HOME");
|
||||
if (modelDirPath == null) {
|
||||
modelDirPath = System.getProperty("user.home") + "/.cache";
|
||||
}
|
||||
|
||||
return new File(modelDirPath, "whisper");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||
*/
|
||||
public void initContext(String modelPath) throws FileNotFoundException {
|
||||
initContextImpl(modelPath, getContextDefaultParams());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||
* @param params - params to use when initialising the context
|
||||
*/
|
||||
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
initContextImpl(modelPath, params);
|
||||
}
|
||||
|
||||
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
if (ctx != null) {
|
||||
lib.whisper_free(ctx);
|
||||
}
|
||||
|
||||
if (!modelPath.contains("/") && !modelPath.contains("\\")) {
|
||||
if (!modelPath.endsWith(".bin")) {
|
||||
modelPath = "ggml-" + modelPath.replace("-", ".") + ".bin";
|
||||
}
|
||||
|
||||
modelPath = new File(modelDir(), modelPath).getAbsolutePath();
|
||||
}
|
||||
|
||||
ctx = lib.whisper_init_from_file_with_params(modelPath, params);
|
||||
|
||||
if (ctx == null) {
|
||||
throw new FileNotFoundException(modelPath);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_context_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
*/
|
||||
public WhisperContextParams getContextDefaultParams() {
|
||||
paramsPointer = lib.whisper_context_default_params_by_ref();
|
||||
WhisperContextParams params = new WhisperContextParams(paramsPointer);
|
||||
params.read();
|
||||
return params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_full()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
*
|
||||
* @param strategy - GREEDY
|
||||
*/
|
||||
public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
|
||||
Pointer pointer;
|
||||
|
||||
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
||||
if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
|
||||
if (greedyParamsPointer == null) {
|
||||
greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||
}
|
||||
pointer = greedyParamsPointer;
|
||||
} else {
|
||||
if (beamParamsPointer == null) {
|
||||
beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||
}
|
||||
pointer = beamParamsPointer;
|
||||
}
|
||||
|
||||
WhisperFullParams params = new WhisperFullParams(pointer);
|
||||
params.read();
|
||||
return params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
freeContext();
|
||||
freeParams();
|
||||
System.out.println("Whisper closed");
|
||||
}
|
||||
|
||||
private void freeContext() {
|
||||
if (ctx != null) {
|
||||
lib.whisper_free(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
private void freeParams() {
|
||||
if (paramsPointer != null) {
|
||||
Native.free(Pointer.nativeValue(paramsPointer));
|
||||
paramsPointer = null;
|
||||
}
|
||||
if (greedyParamsPointer != null) {
|
||||
Native.free(Pointer.nativeValue(greedyParamsPointer));
|
||||
greedyParamsPointer = null;
|
||||
}
|
||||
if (beamParamsPointer != null) {
|
||||
Native.free(Pointer.nativeValue(beamParamsPointer));
|
||||
beamParamsPointer = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*/
|
||||
public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||
if (ctx == null) {
|
||||
throw new IllegalStateException("Model not initialised");
|
||||
}
|
||||
|
||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||
throw new IOException("Failed to process audio");
|
||||
}
|
||||
|
||||
int nSegments = lib.whisper_full_n_segments(ctx);
|
||||
|
||||
StringBuilder str = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < nSegments; i++) {
|
||||
String text = lib.whisper_full_get_segment_text(ctx, i);
|
||||
System.out.println("Segment:" + text);
|
||||
str.append(text);
|
||||
}
|
||||
|
||||
return str.toString().trim();
|
||||
}
|
||||
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||
if (ctx == null) {
|
||||
throw new IllegalStateException("Model not initialised");
|
||||
}
|
||||
|
||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||
throw new IOException("Failed to process audio");
|
||||
}
|
||||
|
||||
int nSegments = lib.whisper_full_n_segments(ctx);
|
||||
List<WhisperSegment> segments= new ArrayList<>(nSegments);
|
||||
|
||||
|
||||
for (int i = 0; i < nSegments; i++) {
|
||||
long t0 = lib.whisper_full_get_segment_t0(ctx, i);
|
||||
String text = lib.whisper_full_get_segment_text(ctx, i);
|
||||
long t1 = lib.whisper_full_get_segment_t1(ctx, i);
|
||||
segments.add(new WhisperSegment(t0,t1,text));
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
// public int getTextSegmentCount(Pointer ctx) {
|
||||
// return lib.whisper_full_n_segments(ctx);
|
||||
// }
|
||||
// public String getTextSegment(Pointer ctx, int index) {
|
||||
// return lib.whisper_full_get_segment_text(ctx, index);
|
||||
// }
|
||||
|
||||
public String getSystemInfo() {
|
||||
return lib.whisper_print_system_info();
|
||||
}
|
||||
|
||||
public int benchMemcpy(int nthread) {
|
||||
return lib.whisper_bench_memcpy(nthread);
|
||||
}
|
||||
|
||||
public int benchGgmlMulMat(int nthread) {
|
||||
return lib.whisper_bench_ggml_mul_mat(nthread);
|
||||
}
|
||||
}
|
@ -1,388 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import com.sun.jna.Library;
|
||||
import com.sun.jna.Native;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
|
||||
public interface WhisperCppJnaLibrary extends Library {
|
||||
WhisperCppJnaLibrary instance = Native.load("whisper", WhisperCppJnaLibrary.class);
|
||||
|
||||
String whisper_print_system_info();
|
||||
|
||||
/**
|
||||
* DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
|
||||
*
|
||||
* @param path_model Path to the model file
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file(String path_model);
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_context_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
*/
|
||||
Pointer whisper_context_default_params_by_ref();
|
||||
|
||||
void whisper_free_context_params(Pointer params);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a file.
|
||||
*
|
||||
* @param path_model Path to the model file
|
||||
* @param params Pointer to whisper_context_params
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
||||
*
|
||||
* @param buffer Model buffer
|
||||
* @param buffer_size Size of the model buffer
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_buffer(Pointer buffer, int buffer_size);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model using a model loader.
|
||||
*
|
||||
* @param loader Model loader
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init(WhisperModelLoader loader);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a file without allocating the state.
|
||||
*
|
||||
* @param path_model Path to the model file
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file_no_state(String path_model);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a buffer without allocating the state.
|
||||
*
|
||||
* @param buffer Model buffer
|
||||
* @param buffer_size Size of the model buffer
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_buffer_no_state(Pointer buffer, int buffer_size);
|
||||
|
||||
// Pointer whisper_init_from_buffer_no_state(Pointer buffer, long buffer_size);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model using a model loader without allocating the state.
|
||||
*
|
||||
* @param loader Model loader
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_no_state(WhisperModelLoader loader);
|
||||
|
||||
/**
|
||||
* Allocate memory for the Whisper state.
|
||||
*
|
||||
* @param ctx Whisper context
|
||||
* @return Whisper state on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_state(Pointer ctx);
|
||||
|
||||
/**
|
||||
* Free all allocated memory associated with the Whisper context.
|
||||
*
|
||||
* @param ctx Whisper context
|
||||
*/
|
||||
void whisper_free(Pointer ctx);
|
||||
|
||||
/**
|
||||
* Free all allocated memory associated with the Whisper state.
|
||||
*
|
||||
* @param state Whisper state
|
||||
*/
|
||||
void whisper_free_state(Pointer state);
|
||||
|
||||
|
||||
/**
|
||||
* Convert RAW PCM audio to log mel spectrogram.
|
||||
* The resulting spectrogram is stored inside the default state of the provided whisper context.
|
||||
*
|
||||
* @param ctx - Pointer to a WhisperContext
|
||||
* @return 0 on success
|
||||
*/
|
||||
int whisper_pcm_to_mel(Pointer ctx, final float[] samples, int n_samples, int n_threads);
|
||||
|
||||
/**
|
||||
* @param ctx Pointer to a WhisperContext
|
||||
* @param state Pointer to WhisperState
|
||||
* @param n_samples
|
||||
* @param n_threads
|
||||
* @return 0 on success
|
||||
*/
|
||||
int whisper_pcm_to_mel_with_state(Pointer ctx, Pointer state, final float[] samples, int n_samples, int n_threads);
|
||||
|
||||
/**
|
||||
* This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
|
||||
* Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
||||
* n_mel must be 80
|
||||
* @return 0 on success
|
||||
*/
|
||||
int whisper_set_mel(Pointer ctx, final float[] data, int n_len, int n_mel);
|
||||
int whisper_set_mel_with_state(Pointer ctx, Pointer state, final float[] data, int n_len, int n_mel);
|
||||
|
||||
/**
|
||||
* Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context.
|
||||
* Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
||||
* Offset can be used to specify the offset of the first frame in the spectrogram.
|
||||
* @return 0 on success
|
||||
*/
|
||||
int whisper_encode(Pointer ctx, int offset, int n_threads);
|
||||
|
||||
int whisper_encode_with_state(Pointer ctx, Pointer state, int offset, int n_threads);
|
||||
|
||||
/**
|
||||
* Run the Whisper decoder to obtain the logits and probabilities for the next token.
|
||||
* Make sure to call whisper_encode() first.
|
||||
* tokens + n_tokens is the provided context for the decoder.
|
||||
* n_past is the number of tokens to use from previous decoder calls.
|
||||
* Returns 0 on success
|
||||
* TODO: add support for multiple decoders
|
||||
*/
|
||||
int whisper_decode(Pointer ctx, Pointer tokens, int n_tokens, int n_past, int n_threads);
|
||||
|
||||
/**
|
||||
* @param ctx
|
||||
* @param state
|
||||
* @param tokens Pointer to int tokens
|
||||
* @param n_tokens
|
||||
* @param n_past
|
||||
* @param n_threads
|
||||
* @return
|
||||
*/
|
||||
int whisper_decode_with_state(Pointer ctx, Pointer state, Pointer tokens, int n_tokens, int n_past, int n_threads);
|
||||
|
||||
/**
|
||||
* Convert the provided text into tokens.
|
||||
* The tokens pointer must be large enough to hold the resulting tokens.
|
||||
* Returns the number of tokens on success, no more than n_max_tokens
|
||||
* Returns -1 on failure
|
||||
* TODO: not sure if correct
|
||||
*/
|
||||
int whisper_tokenize(Pointer ctx, String text, Pointer tokens, int n_max_tokens);
|
||||
|
||||
/** Largest language id (i.e. number of available languages - 1) */
|
||||
int whisper_lang_max_id();
|
||||
|
||||
/**
|
||||
* @return the id of the specified language, returns -1 if not found.
|
||||
* Examples:
|
||||
* "de" -> 2
|
||||
* "german" -> 2
|
||||
*/
|
||||
int whisper_lang_id(String lang);
|
||||
|
||||
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
|
||||
String whisper_lang_str(int id);
|
||||
|
||||
/**
|
||||
* Use mel data at offset_ms to try and auto-detect the spoken language.
|
||||
* Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
|
||||
* Returns the top language id or negative on failure
|
||||
* If not null, fills the lang_probs array with the probabilities of all languages
|
||||
* The array must be whisper_lang_max_id() + 1 in size
|
||||
*
|
||||
* ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
|
||||
*/
|
||||
int whisper_lang_auto_detect(Pointer ctx, int offset_ms, int n_threads, float[] lang_probs);
|
||||
|
||||
int whisper_lang_auto_detect_with_state(Pointer ctx, Pointer state, int offset_ms, int n_threads, float[] lang_probs);
|
||||
|
||||
int whisper_n_len (Pointer ctx); // mel length
|
||||
int whisper_n_len_from_state(Pointer state); // mel length
|
||||
int whisper_n_vocab (Pointer ctx);
|
||||
int whisper_n_text_ctx (Pointer ctx);
|
||||
int whisper_n_audio_ctx (Pointer ctx);
|
||||
int whisper_is_multilingual (Pointer ctx);
|
||||
|
||||
int whisper_model_n_vocab (Pointer ctx);
|
||||
int whisper_model_n_audio_ctx (Pointer ctx);
|
||||
int whisper_model_n_audio_state(Pointer ctx);
|
||||
int whisper_model_n_audio_head (Pointer ctx);
|
||||
int whisper_model_n_audio_layer(Pointer ctx);
|
||||
int whisper_model_n_text_ctx (Pointer ctx);
|
||||
int whisper_model_n_text_state (Pointer ctx);
|
||||
int whisper_model_n_text_head (Pointer ctx);
|
||||
int whisper_model_n_text_layer (Pointer ctx);
|
||||
int whisper_model_n_mels (Pointer ctx);
|
||||
int whisper_model_ftype (Pointer ctx);
|
||||
int whisper_model_type (Pointer ctx);
|
||||
|
||||
/**
|
||||
* Token logits obtained from the last call to whisper_decode().
|
||||
* The logits for the last token are stored in the last row
|
||||
* Rows: n_tokens
|
||||
* Cols: n_vocab
|
||||
*/
|
||||
float[] whisper_get_logits (Pointer ctx);
|
||||
float[] whisper_get_logits_from_state(Pointer state);
|
||||
|
||||
// Token Id -> String. Uses the vocabulary in the provided context
|
||||
String whisper_token_to_str(Pointer ctx, int token);
|
||||
String whisper_model_type_readable(Pointer ctx);
|
||||
|
||||
// Special tokens
|
||||
int whisper_token_eot (Pointer ctx);
|
||||
int whisper_token_sot (Pointer ctx);
|
||||
int whisper_token_prev(Pointer ctx);
|
||||
int whisper_token_solm(Pointer ctx);
|
||||
int whisper_token_not (Pointer ctx);
|
||||
int whisper_token_beg (Pointer ctx);
|
||||
int whisper_token_lang(Pointer ctx, int lang_id);
|
||||
|
||||
// Task tokens
|
||||
int whisper_token_translate (Pointer ctx);
|
||||
int whisper_token_transcribe(Pointer ctx);
|
||||
|
||||
// Performance information from the default state.
|
||||
void whisper_print_timings(Pointer ctx);
|
||||
void whisper_reset_timings(Pointer ctx);
|
||||
|
||||
// Note: Even if `whisper_full_params is stripped back to just 4 ints, JNA throws "Invalid memory access"
|
||||
// when `whisper_full_default_params()` tries to return a struct.
|
||||
// WhisperFullParams whisper_full_default_params(int strategy);
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_full()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
*
|
||||
* @param strategy - WhisperSamplingStrategy.value
|
||||
*/
|
||||
Pointer whisper_full_default_params_by_ref(int strategy);
|
||||
|
||||
void whisper_free_params(Pointer params);
|
||||
|
||||
/**
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*/
|
||||
int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
|
||||
int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
|
||||
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||
// Result is stored in the default state of the context
|
||||
// Not thread safe if executed in parallel on the same context.
|
||||
// It seems this approach can offer some speedup in some cases.
|
||||
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||
int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
|
||||
|
||||
/**
|
||||
* Number of generated text segments.
|
||||
* A segment can be a few words, a sentence, or even a paragraph.
|
||||
* @param ctx Pointer to WhisperContext
|
||||
*/
|
||||
int whisper_full_n_segments (Pointer ctx);
|
||||
|
||||
/**
|
||||
* @param state Pointer to WhisperState
|
||||
*/
|
||||
int whisper_full_n_segments_from_state(Pointer state);
|
||||
|
||||
/**
|
||||
* Language id associated with the context's default state.
|
||||
* @param ctx Pointer to WhisperContext
|
||||
*/
|
||||
int whisper_full_lang_id(Pointer ctx);
|
||||
|
||||
/** Language id associated with the provided state */
|
||||
int whisper_full_lang_id_from_state(Pointer state);
|
||||
|
||||
|
||||
/** Get the start time of the specified segment. */
|
||||
long whisper_full_get_segment_t0(Pointer ctx, int i_segment);
|
||||
|
||||
/** Get the start time of the specified segment from the state. */
|
||||
long whisper_full_get_segment_t0_from_state(Pointer state, int i_segment);
|
||||
|
||||
/** Get the end time of the specified segment. */
|
||||
long whisper_full_get_segment_t1(Pointer ctx, int i_segment);
|
||||
|
||||
/** Get the end time of the specified segment from the state. */
|
||||
long whisper_full_get_segment_t1_from_state(Pointer state, int i_segment);
|
||||
|
||||
/** Get the text of the specified segment. */
|
||||
String whisper_full_get_segment_text(Pointer ctx, int i_segment);
|
||||
|
||||
/** Get the text of the specified segment from the state. */
|
||||
String whisper_full_get_segment_text_from_state(Pointer state, int i_segment);
|
||||
|
||||
/** Get the number of tokens in the specified segment. */
|
||||
int whisper_full_n_tokens(Pointer ctx, int i_segment);
|
||||
|
||||
/** Get the number of tokens in the specified segment from the state. */
|
||||
int whisper_full_n_tokens_from_state(Pointer state, int i_segment);
|
||||
|
||||
/** Get the token text of the specified token in the specified segment. */
|
||||
String whisper_full_get_token_text(Pointer ctx, int i_segment, int i_token);
|
||||
|
||||
|
||||
/** Get the token text of the specified token in the specified segment from the state. */
|
||||
String whisper_full_get_token_text_from_state(Pointer ctx, Pointer state, int i_segment, int i_token);
|
||||
|
||||
/** Get the token ID of the specified token in the specified segment. */
|
||||
int whisper_full_get_token_id(Pointer ctx, int i_segment, int i_token);
|
||||
|
||||
/** Get the token ID of the specified token in the specified segment from the state. */
|
||||
int whisper_full_get_token_id_from_state(Pointer state, int i_segment, int i_token);
|
||||
|
||||
/** Get token data for the specified token in the specified segment. */
|
||||
WhisperTokenData whisper_full_get_token_data(Pointer ctx, int i_segment, int i_token);
|
||||
|
||||
/** Get token data for the specified token in the specified segment from the state. */
|
||||
WhisperTokenData whisper_full_get_token_data_from_state(Pointer state, int i_segment, int i_token);
|
||||
|
||||
/** Get the probability of the specified token in the specified segment. */
|
||||
float whisper_full_get_token_p(Pointer ctx, int i_segment, int i_token);
|
||||
|
||||
/** Get the probability of the specified token in the specified segment from the state. */
|
||||
float whisper_full_get_token_p_from_state(Pointer state, int i_segment, int i_token);
|
||||
|
||||
/**
|
||||
* Benchmark function for memcpy.
|
||||
*
|
||||
* @param nThreads Number of threads to use for the benchmark.
|
||||
* @return The result of the benchmark.
|
||||
*/
|
||||
int whisper_bench_memcpy(int nThreads);
|
||||
|
||||
/**
|
||||
* Benchmark function for memcpy as a string.
|
||||
*
|
||||
* @param nThreads Number of threads to use for the benchmark.
|
||||
* @return The result of the benchmark as a string.
|
||||
*/
|
||||
String whisper_bench_memcpy_str(int nThreads);
|
||||
|
||||
/**
|
||||
* Benchmark function for ggml_mul_mat.
|
||||
*
|
||||
* @param nThreads Number of threads to use for the benchmark.
|
||||
* @return The result of the benchmark.
|
||||
*/
|
||||
int whisper_bench_ggml_mul_mat(int nThreads);
|
||||
|
||||
/**
|
||||
* Benchmark function for ggml_mul_mat as a string.
|
||||
*
|
||||
* @param nThreads Number of threads to use for the benchmark.
|
||||
* @return The result of the benchmark as a string.
|
||||
*/
|
||||
String whisper_bench_ggml_mul_mat_str(int nThreads);
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.bean;
|
||||
|
||||
/**
|
||||
* Created by litonglinux@qq.com on 10/21/2023_7:48 AM
|
||||
*/
|
||||
public class WhisperSegment {
|
||||
private long start, end;
|
||||
private String sentence;
|
||||
|
||||
public WhisperSegment() {
|
||||
}
|
||||
|
||||
public WhisperSegment(long start, long end, String sentence) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
public long getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public long getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public String getSentence() {
|
||||
return sentence;
|
||||
}
|
||||
|
||||
public void setStart(long start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
public void setEnd(long end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
public void setSentence(String sentence) {
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" + start + " --> " + end + "]:" + sentence;
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.callbacks;
|
||||
|
||||
import com.sun.jna.Callback;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.WhisperContext;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperState;
|
||||
|
||||
/**
|
||||
* Callback before the encoder starts.
|
||||
* If not null, called before the encoder starts.
|
||||
* If it returns false, the computation is aborted.
|
||||
*/
|
||||
public interface WhisperEncoderBeginCallback extends Callback {
|
||||
|
||||
/**
|
||||
* Callback method before the encoder starts.
|
||||
*
|
||||
* @param ctx The whisper context.
|
||||
* @param state The whisper state.
|
||||
* @param user_data User data.
|
||||
* @return True if the computation should proceed, false otherwise.
|
||||
*/
|
||||
boolean callback(Pointer ctx, Pointer state, Pointer user_data);
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.callbacks;
|
||||
|
||||
import com.sun.jna.Callback;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
||||
|
||||
/**
|
||||
* Callback to filter logits.
|
||||
* Can be used to modify the logits before sampling.
|
||||
* If not null, called after applying temperature to logits.
|
||||
*/
|
||||
public interface WhisperLogitsFilterCallback extends Callback {
|
||||
|
||||
/**
|
||||
* Callback method to filter logits.
|
||||
*
|
||||
* @param ctx The whisper context.
|
||||
* @param state The whisper state.
|
||||
* @param tokens The array of whisper_token_data.
|
||||
* @param n_tokens The number of tokens.
|
||||
* @param logits The array of logits.
|
||||
* @param user_data User data.
|
||||
*/
|
||||
void callback(Pointer ctx, Pointer state, WhisperTokenData[] tokens, int n_tokens, float[] logits, Pointer user_data);
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.callbacks;
|
||||
|
||||
import com.sun.jna.Callback;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.WhisperContext;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperState;
|
||||
|
||||
/**
|
||||
* Callback for the text segment.
|
||||
* Called on every newly generated text segment.
|
||||
* Use the whisper_full_...() functions to obtain the text segments.
|
||||
*/
|
||||
public interface WhisperNewSegmentCallback extends Callback {
|
||||
|
||||
/**
|
||||
* Callback method for the text segment.
|
||||
*
|
||||
* @param ctx The whisper context.
|
||||
* @param state The whisper state.
|
||||
* @param n_new The number of newly generated text segments.
|
||||
* @param user_data User data.
|
||||
*/
|
||||
void callback(Pointer ctx, Pointer state, int n_new, Pointer user_data);
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.callbacks;
|
||||
|
||||
import com.sun.jna.Callback;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.WhisperContext;
|
||||
import io.github.ggerganov.whispercpp.model.WhisperState;
|
||||
|
||||
/**
|
||||
* Callback for progress updates.
|
||||
*/
|
||||
public interface WhisperProgressCallback extends Callback {
|
||||
|
||||
/**
|
||||
* Callback method for progress updates.
|
||||
*
|
||||
* @param ctx The whisper context.
|
||||
* @param state The whisper state.
|
||||
* @param progress The progress value.
|
||||
* @param user_data User data.
|
||||
*/
|
||||
void callback(Pointer ctx, Pointer state, int progress, Pointer user_data);
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.ggml;
|
||||
|
||||
public class GgmlTensor {
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.ggml;
|
||||
|
||||
public enum GgmlType {
|
||||
GGML_TYPE_F32,
|
||||
GGML_TYPE_F16,
|
||||
GGML_TYPE_Q4_0,
|
||||
GGML_TYPE_Q4_1,
|
||||
REMOVED_GGML_TYPE_Q4_2, // support has been removed
|
||||
REMOVED_GGML_TYPE_Q4_3, // support has been removed
|
||||
GGML_TYPE_Q5_0,
|
||||
GGML_TYPE_Q5_1,
|
||||
GGML_TYPE_Q8_0,
|
||||
GGML_TYPE_Q8_1,
|
||||
GGML_TYPE_I8,
|
||||
GGML_TYPE_I16,
|
||||
GGML_TYPE_I32,
|
||||
GGML_TYPE_COUNT,
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.model;
|
||||
|
||||
public enum EModel {
|
||||
MODEL_UNKNOWN,
|
||||
MODEL_TINY,
|
||||
MODEL_BASE,
|
||||
MODEL_SMALL,
|
||||
MODEL_MEDIUM,
|
||||
MODEL_LARGE,
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import io.github.ggerganov.whispercpp.ggml.GgmlTensor;
|
||||
import io.github.ggerganov.whispercpp.model.EModel;
|
||||
|
||||
public class WhisperModel {
|
||||
// EModel type = EModel.MODEL_UNKNOWN;
|
||||
//
|
||||
// WhisperHParams hparams;
|
||||
// WhisperFilters filters;
|
||||
//
|
||||
// // encoder.positional_embedding
|
||||
// GgmlTensor e_pe;
|
||||
//
|
||||
// // encoder.conv1
|
||||
// GgmlTensor e_conv_1_w;
|
||||
// GgmlTensor e_conv_1_b;
|
||||
//
|
||||
// // encoder.conv2
|
||||
// GgmlTensor e_conv_2_w;
|
||||
// GgmlTensor e_conv_2_b;
|
||||
//
|
||||
// // encoder.ln_post
|
||||
// GgmlTensor e_ln_w;
|
||||
// GgmlTensor e_ln_b;
|
||||
//
|
||||
// // decoder.positional_embedding
|
||||
// GgmlTensor d_pe;
|
||||
//
|
||||
// // decoder.token_embedding
|
||||
// GgmlTensor d_te;
|
||||
//
|
||||
// // decoder.ln
|
||||
// GgmlTensor d_ln_w;
|
||||
// GgmlTensor d_ln_b;
|
||||
//
|
||||
// std::vector<whisper_layer_encoder> layers_encoder;
|
||||
// std::vector<whisper_layer_decoder> layers_decoder;
|
||||
//
|
||||
// // context
|
||||
// struct ggml_context * ctx;
|
||||
//
|
||||
// // the model memory buffer is read-only and can be shared between processors
|
||||
// std::vector<uint8_t> * buf;
|
||||
//
|
||||
// // tensors
|
||||
// int n_loaded;
|
||||
// Map<String, GgmlTensor> tensors;
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.model;
|
||||
|
||||
import com.sun.jna.Callback;
|
||||
import com.sun.jna.Pointer;
|
||||
import com.sun.jna.Structure;
|
||||
|
||||
|
||||
public class WhisperModelLoader extends Structure {
|
||||
public Pointer context;
|
||||
public ReadFunction read;
|
||||
public EOFFunction eof;
|
||||
public CloseFunction close;
|
||||
|
||||
public static class ReadFunction implements Callback {
|
||||
public Pointer invoke(Pointer ctx, Pointer output, int readSize) {
|
||||
// TODO
|
||||
return ctx;
|
||||
}
|
||||
}
|
||||
|
||||
public static class EOFFunction implements Callback {
|
||||
public boolean invoke(Pointer ctx) {
|
||||
// TODO
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static class CloseFunction implements Callback {
|
||||
public void invoke(Pointer ctx) {
|
||||
// TODO
|
||||
}
|
||||
}
|
||||
|
||||
// public WhisperModelLoader(Pointer p) {
|
||||
// super(p);
|
||||
// read = new ReadFunction();
|
||||
// eof = new EOFFunction();
|
||||
// close = new CloseFunction();
|
||||
// read.setCallback(this);
|
||||
// eof.setCallback(this);
|
||||
// close.setCallback(this);
|
||||
// read.write();
|
||||
// eof.write();
|
||||
// close.write();
|
||||
// }
|
||||
|
||||
public WhisperModelLoader() {
|
||||
super();
|
||||
}
|
||||
|
||||
public interface ReadCallback extends Callback {
|
||||
Pointer invoke(Pointer ctx, Pointer output, int readSize);
|
||||
}
|
||||
|
||||
public interface EOFCallback extends Callback {
|
||||
boolean invoke(Pointer ctx);
|
||||
}
|
||||
|
||||
public interface CloseCallback extends Callback {
|
||||
void invoke(Pointer ctx);
|
||||
}
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.model;
|
||||
|
||||
public class WhisperState {
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.model;
|
||||
|
||||
import com.sun.jna.Structure;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Structure representing token data.
|
||||
*/
|
||||
public class WhisperTokenData extends Structure {
|
||||
|
||||
/** Token ID. */
|
||||
public int id;
|
||||
|
||||
/** Forced timestamp token ID. */
|
||||
public int tid;
|
||||
|
||||
/** Probability of the token. */
|
||||
public float p;
|
||||
|
||||
/** Log probability of the token. */
|
||||
public float plog;
|
||||
|
||||
/** Probability of the timestamp token. */
|
||||
public float pt;
|
||||
|
||||
/** Sum of probabilities of all timestamp tokens. */
|
||||
public float ptsum;
|
||||
|
||||
/**
|
||||
* Start time of the token (token-level timestamp data).
|
||||
* Do not use if you haven't computed token-level timestamps.
|
||||
*/
|
||||
public long t0;
|
||||
|
||||
/**
|
||||
* End time of the token (token-level timestamp data).
|
||||
* Do not use if you haven't computed token-level timestamps.
|
||||
*/
|
||||
public long t1;
|
||||
|
||||
/** Voice length of the token. */
|
||||
public float vlen;
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("id", "tid", "p", "plog", "pt", "ptsum", "t0", "t1", "vlen");
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.Structure;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class BeamSearchParams extends Structure {
|
||||
/** ref: <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L265">...</a> */
|
||||
public int beam_size;
|
||||
|
||||
/** ref: <a href="https://arxiv.org/pdf/2204.05424.pdf">...</a> */
|
||||
public float patience;
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("beam_size", "patience");
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.IntegerType;
|
||||
|
||||
import java.util.function.BooleanSupplier;
|
||||
|
||||
public class CBool extends IntegerType implements BooleanSupplier {
|
||||
public static final int SIZE = 1;
|
||||
public static final CBool FALSE = new CBool(0);
|
||||
public static final CBool TRUE = new CBool(1);
|
||||
|
||||
|
||||
public CBool() {
|
||||
this(0);
|
||||
}
|
||||
|
||||
public CBool(long value) {
|
||||
super(SIZE, value, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getAsBoolean() {
|
||||
return intValue() == 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return intValue() == 1 ? "true" : "false";
|
||||
}
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.Structure;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class GreedyParams extends Structure {
|
||||
/** <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264">...</a> */
|
||||
public int best_of;
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Collections.singletonList("best_of");
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Parameters for the whisper_init_from_file_with_params() function.
|
||||
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
||||
* whisper_context_default_params()
|
||||
*/
|
||||
public class WhisperContextParams extends Structure {
|
||||
|
||||
public WhisperContextParams(Pointer p) {
|
||||
super(p);
|
||||
}
|
||||
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
public CBool use_gpu;
|
||||
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
public void useGpu(boolean enable) {
|
||||
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("use_gpu");
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperFilters {
|
||||
int n_mel;
|
||||
int n_fft;
|
||||
|
||||
List<Float> data;
|
||||
}
|
@ -1,326 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.*;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Parameters for the whisper_full() function.
|
||||
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
||||
* whisper_full_default_params()
|
||||
*/
|
||||
public class WhisperFullParams extends Structure {
|
||||
|
||||
public WhisperFullParams(Pointer p) {
|
||||
super(p);
|
||||
// super(p, ALIGN_MSVC);
|
||||
// super(p, ALIGN_GNUC);
|
||||
}
|
||||
|
||||
/** Sampling strategy for whisper_full() function. */
|
||||
public int strategy;
|
||||
|
||||
/** Number of threads. (default = 4) */
|
||||
public int n_threads;
|
||||
|
||||
/** Maximum tokens to use from past text as a prompt for the decoder. (default = 16384) */
|
||||
public int n_max_text_ctx;
|
||||
|
||||
/** Start offset in milliseconds. (default = 0) */
|
||||
public int offset_ms;
|
||||
|
||||
/** Audio duration to process in milliseconds. (default = 0) */
|
||||
public int duration_ms;
|
||||
|
||||
/** Translate flag. (default = false) */
|
||||
public CBool translate;
|
||||
|
||||
/** The compliment of translateMode() */
|
||||
public void transcribeMode() {
|
||||
translate = CBool.FALSE;
|
||||
}
|
||||
|
||||
/** The compliment of transcribeMode() */
|
||||
public void translateMode() {
|
||||
translate = CBool.TRUE;
|
||||
}
|
||||
|
||||
/** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
|
||||
public CBool no_context;
|
||||
|
||||
/** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
|
||||
public void enableContext(boolean enable) {
|
||||
no_context = enable ? CBool.FALSE : CBool.TRUE;
|
||||
}
|
||||
|
||||
/** Generate timestamps or not? */
|
||||
public CBool no_timestamps;
|
||||
|
||||
/** Flag to force single segment output (useful for streaming). (default = false) */
|
||||
public CBool single_segment;
|
||||
|
||||
/** Flag to force single segment output (useful for streaming). (default = false) */
|
||||
public void singleSegment(boolean single) {
|
||||
single_segment = single ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
public CBool print_special;
|
||||
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
public void printSpecial(boolean enable) {
|
||||
print_special = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Flag to print progress information. (default = true) */
|
||||
public CBool print_progress;
|
||||
|
||||
/** Flag to print progress information. (default = true) */
|
||||
public void printProgress(boolean enable) {
|
||||
print_progress = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
|
||||
public CBool print_realtime;
|
||||
|
||||
/** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
|
||||
public void printRealtime(boolean enable) {
|
||||
print_realtime = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Flag to print timestamps for each text segment when printing realtime. (default = true) */
|
||||
public CBool print_timestamps;
|
||||
|
||||
/** Flag to print timestamps for each text segment when printing realtime. (default = true) */
|
||||
public void printTimestamps(boolean enable) {
|
||||
print_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
|
||||
public CBool token_timestamps;
|
||||
|
||||
/** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
|
||||
public void tokenTimestamps(boolean enable) {
|
||||
token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** [EXPERIMENTAL] Timestamp token probability threshold (~0.01). (default = 0.01) */
|
||||
public float thold_pt;
|
||||
|
||||
/** [EXPERIMENTAL] Timestamp token sum probability threshold (~0.01). */
|
||||
public float thold_ptsum;
|
||||
|
||||
/** Maximum segment length in characters. (default = 0) */
|
||||
public int max_len;
|
||||
|
||||
/** Flag to split on word rather than on token (when used with max_len). (default = false) */
|
||||
public CBool split_on_word;
|
||||
|
||||
/** Flag to split on word rather than on token (when used with max_len). (default = false) */
|
||||
public void splitOnWord(boolean enable) {
|
||||
split_on_word = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Maximum tokens per segment (0, default = no limit) */
|
||||
public int max_tokens;
|
||||
|
||||
/** Overwrite the audio context size (0 = use default). */
|
||||
public int audio_ctx;
|
||||
|
||||
/** Enable tinydiarize (default = false) */
|
||||
public CBool tdrz_enable;
|
||||
|
||||
/** Enable tinydiarize (default = false) */
|
||||
public void tdrzEnable(boolean enable) {
|
||||
tdrz_enable = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Regular expression matching tokens to suppress. */
|
||||
public String suppress_regex;
|
||||
|
||||
/** Tokens to provide to the whisper decoder as an initial prompt.
|
||||
* These are prepended to any existing text context from a previous call. */
|
||||
public String initial_prompt;
|
||||
|
||||
/** Prompt tokens. (int*) */
|
||||
public Pointer prompt_tokens;
|
||||
|
||||
public void setPromptTokens(int[] tokens) {
|
||||
Memory mem = new Memory(tokens.length * 4L);
|
||||
mem.write(0, tokens, 0, tokens.length);
|
||||
prompt_tokens = mem;
|
||||
}
|
||||
|
||||
/** Number of prompt tokens. */
|
||||
public int prompt_n_tokens;
|
||||
|
||||
/** Language for auto-detection.
|
||||
* For auto-detection, set to `null`, `""`, or "auto". */
|
||||
public String language;
|
||||
|
||||
/** Flag to indicate whether to detect language automatically. */
|
||||
public CBool detect_language;
|
||||
|
||||
/** Flag to indicate whether to detect language automatically. */
|
||||
public void detectLanguage(boolean enable) {
|
||||
detect_language = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
// Common decoding parameters.
|
||||
|
||||
/** Flag to suppress blank tokens. */
|
||||
public CBool suppress_blank;
|
||||
|
||||
public void suppressBlanks(boolean enable) {
|
||||
suppress_blank = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Flag to suppress non-speech tokens. */
|
||||
public CBool suppress_non_speech_tokens;
|
||||
|
||||
/** Flag to suppress non-speech tokens. */
|
||||
public void suppressNonSpeechTokens(boolean enable) {
|
||||
suppress_non_speech_tokens = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Initial decoding temperature. */
|
||||
public float temperature;
|
||||
|
||||
/** Maximum initial timestamp. */
|
||||
public float max_initial_ts;
|
||||
|
||||
/** Length penalty. */
|
||||
public float length_penalty;
|
||||
|
||||
// Fallback parameters.
|
||||
|
||||
/** Temperature increment. */
|
||||
public float temperature_inc;
|
||||
|
||||
/** Entropy threshold (similar to OpenAI's "compression_ratio_threshold"). */
|
||||
public float entropy_thold;
|
||||
|
||||
/** Log probability threshold. */
|
||||
public float logprob_thold;
|
||||
|
||||
/** No speech threshold. */
|
||||
public float no_speech_thold;
|
||||
|
||||
/** Greedy decoding parameters. */
|
||||
public GreedyParams greedy;
|
||||
|
||||
/**
|
||||
* Beam search decoding parameters.
|
||||
*/
|
||||
public BeamSearchParams beam_search;
|
||||
|
||||
public void setBestOf(int bestOf) {
|
||||
if (greedy == null) {
|
||||
greedy = new GreedyParams();
|
||||
}
|
||||
greedy.best_of = bestOf;
|
||||
}
|
||||
|
||||
public void setBeamSize(int beamSize) {
|
||||
if (beam_search == null) {
|
||||
beam_search = new BeamSearchParams();
|
||||
}
|
||||
beam_search.beam_size = beamSize;
|
||||
}
|
||||
|
||||
public void setBeamSizeAndPatience(int beamSize, float patience) {
|
||||
if (beam_search == null) {
|
||||
beam_search = new BeamSearchParams();
|
||||
}
|
||||
beam_search.beam_size = beamSize;
|
||||
beam_search.patience = patience;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback for every newly generated text segment.
|
||||
* WhisperNewSegmentCallback
|
||||
*/
|
||||
public Pointer new_segment_callback;
|
||||
|
||||
/**
|
||||
* User data for the new_segment_callback.
|
||||
*/
|
||||
public Pointer new_segment_callback_user_data;
|
||||
|
||||
/**
|
||||
* Callback on each progress update.
|
||||
* WhisperProgressCallback
|
||||
*/
|
||||
public Pointer progress_callback;
|
||||
|
||||
/**
|
||||
* User data for the progress_callback.
|
||||
*/
|
||||
public Pointer progress_callback_user_data;
|
||||
|
||||
/**
|
||||
* Callback each time before the encoder starts.
|
||||
* WhisperEncoderBeginCallback
|
||||
*/
|
||||
public Pointer encoder_begin_callback;
|
||||
|
||||
/**
|
||||
* User data for the encoder_begin_callback.
|
||||
*/
|
||||
public Pointer encoder_begin_callback_user_data;
|
||||
|
||||
/**
|
||||
* Callback by each decoder to filter obtained logits.
|
||||
* WhisperLogitsFilterCallback
|
||||
*/
|
||||
public Pointer logits_filter_callback;
|
||||
|
||||
/**
|
||||
* User data for the logits_filter_callback.
|
||||
*/
|
||||
public Pointer logits_filter_callback_user_data;
|
||||
|
||||
|
||||
public void setNewSegmentCallback(WhisperNewSegmentCallback callback) {
|
||||
new_segment_callback = CallbackReference.getFunctionPointer(callback);
|
||||
}
|
||||
|
||||
public void setProgressCallback(WhisperProgressCallback callback) {
|
||||
progress_callback = CallbackReference.getFunctionPointer(callback);
|
||||
}
|
||||
|
||||
public void setEncoderBeginCallbackeginCallbackCallback(WhisperEncoderBeginCallback callback) {
|
||||
encoder_begin_callback = CallbackReference.getFunctionPointer(callback);
|
||||
}
|
||||
|
||||
public void setLogitsFilterCallback(WhisperLogitsFilterCallback callback) {
|
||||
logits_filter_callback = CallbackReference.getFunctionPointer(callback);
|
||||
}
|
||||
|
||||
/** Grammar stuff */
|
||||
public Pointer grammar_rules;
|
||||
public long n_grammar_rules;
|
||||
public long i_start_rule;
|
||||
public float grammar_penalty;
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
|
||||
"no_context", "single_segment", "no_timestamps",
|
||||
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
|
||||
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
|
||||
"tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
||||
"suppress_blank", "suppress_non_speech_tokens", "temperature", "max_initial_ts", "length_penalty",
|
||||
"temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
|
||||
"new_segment_callback", "new_segment_callback_user_data",
|
||||
"progress_callback", "progress_callback_user_data",
|
||||
"encoder_begin_callback", "encoder_begin_callback_user_data",
|
||||
"logits_filter_callback", "logits_filter_callback_user_data",
|
||||
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
public class WhisperHParams {
|
||||
int n_vocab = 51864;
|
||||
int n_audio_ctx = 1500;
|
||||
int n_audio_state = 384;
|
||||
int n_audio_head = 6;
|
||||
int n_audio_layer = 4;
|
||||
int n_text_ctx = 448;
|
||||
int n_text_state = 384;
|
||||
int n_text_head = 6;
|
||||
int n_text_layer = 4;
|
||||
int n_mels = 80;
|
||||
int ftype = 1;
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
/** Available sampling strategies */
|
||||
public enum WhisperSamplingStrategy {
|
||||
/** similar to OpenAI's GreedyDecoder */
|
||||
WHISPER_SAMPLING_GREEDY,
|
||||
|
||||
/** similar to OpenAI's BeamSearchDecoder */
|
||||
WHISPER_SAMPLING_BEAM_SEARCH
|
||||
}
|
@ -1,144 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
||||
import io.github.ggerganov.whispercpp.params.CBool;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import javax.sound.sampled.AudioInputStream;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
|
||||
class WhisperCppTest {
|
||||
private static WhisperCpp whisper = new WhisperCpp();
|
||||
private static boolean modelInitialised = false;
|
||||
|
||||
@BeforeAll
|
||||
static void init() throws FileNotFoundException {
|
||||
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
||||
// or you can provide the absolute path to the model file.
|
||||
//String modelName = "../../models/ggml-tiny.bin";
|
||||
String modelName = "../../models/ggml-tiny.en.bin";
|
||||
try {
|
||||
whisper.initContext(modelName);
|
||||
//whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
//whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
modelInitialised = true;
|
||||
} catch (FileNotFoundException ex) {
|
||||
System.out.println("Model " + modelName + " not found");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testGetDefaultFullParams_BeamSearch() {
|
||||
// When
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
|
||||
// Then
|
||||
assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal(), params.strategy);
|
||||
assertNotEquals(0, params.n_threads);
|
||||
assertEquals(16384, params.n_max_text_ctx);
|
||||
assertFalse(params.translate);
|
||||
assertEquals(0.01f, params.thold_pt);
|
||||
assertEquals(5, params.beam_search.beam_size);
|
||||
assertEquals(-1.0f, params.beam_search.patience);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testGetDefaultFullParams_Greedy() {
|
||||
// When
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
// Then
|
||||
assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY.ordinal(), params.strategy);
|
||||
assertNotEquals(0, params.n_threads);
|
||||
assertEquals(16384, params.n_max_text_ctx);
|
||||
assertEquals(5, params.greedy.best_of);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFullTranscribe() throws Exception {
|
||||
if (!modelInitialised) {
|
||||
System.out.println("Model not initialised, skipping test");
|
||||
return;
|
||||
}
|
||||
|
||||
// Given
|
||||
File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
|
||||
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
|
||||
|
||||
byte[] b = new byte[audioInputStream.available()];
|
||||
float[] floats = new float[b.length / 2];
|
||||
|
||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||
params.print_progress = CBool.FALSE;
|
||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||
|
||||
|
||||
try {
|
||||
audioInputStream.read(b);
|
||||
|
||||
for (int i = 0, j = 0; i < b.length; i += 2, j++) {
|
||||
int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
|
||||
floats[j] = intSample / 32767.0f;
|
||||
}
|
||||
|
||||
// When
|
||||
String result = whisper.fullTranscribe(params, floats);
|
||||
|
||||
// Then
|
||||
System.err.println(result);
|
||||
assertEquals("And so my fellow Americans ask not what your country can do for you " +
|
||||
"ask what you can do for your country.",
|
||||
result.replace(",", ""));
|
||||
} finally {
|
||||
audioInputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFullTranscribeWithTime() throws Exception {
|
||||
if (!modelInitialised) {
|
||||
System.out.println("Model not initialised, skipping test");
|
||||
return;
|
||||
}
|
||||
|
||||
// Given
|
||||
File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
|
||||
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
|
||||
|
||||
byte[] b = new byte[audioInputStream.available()];
|
||||
float[] floats = new float[b.length / 2];
|
||||
|
||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||
params.print_progress = CBool.FALSE;
|
||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||
|
||||
try {
|
||||
audioInputStream.read(b);
|
||||
|
||||
for (int i = 0, j = 0; i < b.length; i += 2, j++) {
|
||||
int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
|
||||
floats[j] = intSample / 32767.0f;
|
||||
}
|
||||
|
||||
List<WhisperSegment> segments = whisper.fullTranscribeWithTime(params, floats);
|
||||
assertTrue(segments.size() > 0, "The size of segments should be greater than 0");
|
||||
for (WhisperSegment segment : segments) {
|
||||
System.out.println(segment);
|
||||
}
|
||||
} finally {
|
||||
audioInputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class WhisperJnaLibraryTest {
|
||||
|
||||
@Test
|
||||
void testWhisperPrint_system_info() {
|
||||
String systemInfo = WhisperCppJnaLibrary.instance.whisper_print_system_info();
|
||||
// eg: "AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0
|
||||
// | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | "
|
||||
System.out.println("System info: " + systemInfo);
|
||||
assertTrue(systemInfo.length() > 10);
|
||||
}
|
||||
}
|
@ -41,7 +41,7 @@ make publish-npm
|
||||
|
||||
## Sample run
|
||||
|
||||
```text
|
||||
```java
|
||||
$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
||||
|
||||
whisper_model_load: loading model from 'whisper.bin'
|
||||
@ -63,7 +63,7 @@ whisper_model_load: ggml ctx size = 140.60 MB
|
||||
whisper_model_load: memory size = 22.83 MB
|
||||
whisper_model_load: model size = 140.54 MB
|
||||
|
||||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
||||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
||||
|
||||
operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...
|
||||
|
||||
|
@ -20,7 +20,7 @@ struct whisper_context * g_context;
|
||||
EMSCRIPTEN_BINDINGS(whisper) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
if (g_context == nullptr) {
|
||||
g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
g_context = whisper_init_from_file(path_model.c_str());
|
||||
if (g_context != nullptr) {
|
||||
return true;
|
||||
} else {
|
||||
|
@ -1 +1 @@
|
||||
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:f=>(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f),postMessage:msg=>parentPort.postMessage(msg),performance:global.performance||{now:Date.now}})}var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err(`worker.js received unknown command ${e.data.cmd}`);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}}self.onmessage=handleMessage;
|
||||
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance||{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "whisper.cpp",
|
||||
"version": "1.7.2",
|
||||
"version": "1.2.0",
|
||||
"description": "Whisper speech recognition",
|
||||
"main": "whisper.js",
|
||||
"scripts": {
|
||||
|
File diff suppressed because one or more lines are too long
3
bindings/ruby/.gitignore
vendored
3
bindings/ruby/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
LICENSE
|
||||
pkg/
|
||||
lib/whisper.*
|
@ -1,187 +0,0 @@
|
||||
whispercpp
|
||||
==========
|
||||
|
||||

|
||||
|
||||
Ruby bindings for [whisper.cpp][], an interface of automatic speech recognition model.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
Install the gem and add to the application's Gemfile by executing:
|
||||
|
||||
$ bundle add whispercpp
|
||||
|
||||
If bundler is not being used to manage dependencies, install the gem by executing:
|
||||
|
||||
$ gem install whispercpp
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
```ruby
|
||||
require "whisper"
|
||||
|
||||
whisper = Whisper::Context.new("path/to/model.bin")
|
||||
|
||||
params = Whisper::Params.new
|
||||
params.language = "en"
|
||||
params.offset = 10_000
|
||||
params.duration = 60_000
|
||||
params.max_text_tokens = 300
|
||||
params.translate = true
|
||||
params.print_timestamps = false
|
||||
params.initial_prompt = "Initial prompt here."
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
|
||||
puts whole_text
|
||||
end
|
||||
|
||||
```
|
||||
|
||||
### Preparing model ###
|
||||
|
||||
Use script to download model file(s):
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp
|
||||
sh ./models/download-ggml-model.sh base.en
|
||||
```
|
||||
|
||||
There are some types of models. See [models][] page for details.
|
||||
|
||||
### Preparing audio file ###
|
||||
|
||||
Currently, whisper.cpp accepts only 16-bit WAV files.
|
||||
|
||||
### API ###
|
||||
|
||||
Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`:
|
||||
|
||||
```ruby
|
||||
def format_time(time_ms)
|
||||
sec, decimal_part = time_ms.divmod(1000)
|
||||
min, sec = sec.divmod(60)
|
||||
hour, min = min.divmod(60)
|
||||
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
||||
end
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params)
|
||||
|
||||
whisper.each_segment.with_index do |segment, index|
|
||||
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
|
||||
nth: index + 1,
|
||||
st: format_time(segment.start_time),
|
||||
ed: format_time(segment.end_time),
|
||||
text: segment.text
|
||||
}
|
||||
line << " (speaker turned)" if segment.speaker_next_turn?
|
||||
puts line
|
||||
end
|
||||
|
||||
```
|
||||
|
||||
You can also add hook to params called on new segment:
|
||||
|
||||
```ruby
|
||||
def format_time(time_ms)
|
||||
sec, decimal_part = time_ms.divmod(1000)
|
||||
min, sec = sec.divmod(60)
|
||||
hour, min = min.divmod(60)
|
||||
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
||||
end
|
||||
|
||||
# Add hook before calling #transcribe
|
||||
params.on_new_segment do |segment|
|
||||
line = "[%{st} --> %{ed}] %{text}" % {
|
||||
st: format_time(segment.start_time),
|
||||
ed: format_time(segment.end_time),
|
||||
text: segment.text
|
||||
}
|
||||
line << " (speaker turned)" if segment.speaker_next_turn?
|
||||
puts line
|
||||
end
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params)
|
||||
|
||||
```
|
||||
|
||||
You can see model information:
|
||||
|
||||
```ruby
|
||||
whisper = Whisper::Context.new("path/to/model.bin")
|
||||
model = whisper.model
|
||||
|
||||
model.n_vocab # => 51864
|
||||
model.n_audio_ctx # => 1500
|
||||
model.n_audio_state # => 512
|
||||
model.n_audio_head # => 8
|
||||
model.n_audio_layer # => 6
|
||||
model.n_text_ctx # => 448
|
||||
model.n_text_state # => 512
|
||||
model.n_text_head # => 8
|
||||
model.n_text_layer # => 6
|
||||
model.n_mels # => 80
|
||||
model.ftype # => 1
|
||||
model.type # => "base"
|
||||
|
||||
```
|
||||
|
||||
You can set log callback:
|
||||
|
||||
```ruby
|
||||
prefix = "[MyApp] "
|
||||
log_callback = ->(level, buffer, user_data) {
|
||||
case level
|
||||
when Whisper::LOG_LEVEL_NONE
|
||||
puts "#{user_data}none: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_INFO
|
||||
puts "#{user_data}info: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_WARN
|
||||
puts "#{user_data}warn: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_ERROR
|
||||
puts "#{user_data}error: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_DEBUG
|
||||
puts "#{user_data}debug: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_CONT
|
||||
puts "#{user_data}same to previous: #{buffer}"
|
||||
end
|
||||
}
|
||||
Whisper.log_set log_callback, prefix
|
||||
```
|
||||
|
||||
Using this feature, you are also able to suppress log:
|
||||
|
||||
```ruby
|
||||
Whisper.log_set ->(level, buffer, user_data) {
|
||||
# do nothing
|
||||
}, nil
|
||||
Whisper::Context.new(MODEL)
|
||||
```
|
||||
|
||||
You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility.
|
||||
|
||||
```ruby
|
||||
require "whisper"
|
||||
require "wavefile"
|
||||
|
||||
reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
|
||||
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
|
||||
|
||||
whisper = Whisper::Context.new("path/to/model.bin")
|
||||
whisper.full(Whisper::Params.new, samples)
|
||||
whisper.each_segment do |segment|
|
||||
puts segment.text
|
||||
end
|
||||
```
|
||||
|
||||
The second argument `samples` may be an array, an object with `length` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
The same to [whisper.cpp][].
|
||||
|
||||
[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
|
||||
[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models
|
@ -1,80 +0,0 @@
|
||||
require 'rake/clean'
|
||||
require "bundler/gem_tasks"
|
||||
require "rake/testtask"
|
||||
require_relative "extsources"
|
||||
|
||||
SOURCES = FileList[]
|
||||
|
||||
EXTSOURCES.each do |src|
|
||||
basename = src.pathmap("%f")
|
||||
dest = basename == "LICENSE" ? basename : src.pathmap("%{../..,ext}p")
|
||||
dir = dest.pathmap("%d")
|
||||
file src
|
||||
directory dir
|
||||
file dest => [src, dir] do |t|
|
||||
cp t.source, t.name
|
||||
end
|
||||
SOURCES.include dest
|
||||
end
|
||||
|
||||
CLEAN.include SOURCES
|
||||
CLEAN.include FileList[
|
||||
"ext/*.o",
|
||||
"ext/*.metal",
|
||||
"ext/whisper.{so,bundle,dll}",
|
||||
"ext/depend"
|
||||
]
|
||||
|
||||
task build: FileList[
|
||||
"ext/Makefile",
|
||||
"ext/ruby_whisper.h",
|
||||
"ext/ruby_whisper.cpp",
|
||||
"whispercpp.gemspec",
|
||||
]
|
||||
|
||||
directory "pkg"
|
||||
CLOBBER.include "pkg"
|
||||
|
||||
TEST_MODEL = "../../models/ggml-base.en.bin"
|
||||
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
||||
SO_FILE = File.join("ext", LIB_NAME)
|
||||
LIB_FILE = File.join("lib", LIB_NAME)
|
||||
|
||||
file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
|
||||
Dir.chdir "ext" do
|
||||
ruby "extconf.rb"
|
||||
end
|
||||
end
|
||||
|
||||
file SO_FILE => "ext/Makefile" do |t|
|
||||
Dir.chdir "ext" do
|
||||
sh "make"
|
||||
end
|
||||
end
|
||||
CLEAN.include LIB_FILE
|
||||
|
||||
directory "lib"
|
||||
file LIB_FILE => [SO_FILE, "lib"] do |t|
|
||||
copy t.source, t.name
|
||||
end
|
||||
|
||||
Rake::TestTask.new do |t|
|
||||
t.test_files = FileList["tests/test_*.rb"]
|
||||
end
|
||||
task test: [TEST_MODEL, LIB_FILE]
|
||||
|
||||
file TEST_MODEL do
|
||||
Dir.chdir "../.." do
|
||||
sh "./models/download-ggml-model.sh base.en"
|
||||
end
|
||||
end
|
||||
|
||||
TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
|
||||
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
|
||||
Dir.chdir "tests/jfk_reader" do
|
||||
ruby "extconf.rb"
|
||||
sh "make"
|
||||
end
|
||||
end
|
||||
CLEAN.include "tests/jfk_reader/jfk_reader.{o,#{RbConfig::CONFIG['DLEXT']}}"
|
||||
task test: TEST_MEMORY_VIEW
|
14
bindings/ruby/ext/.gitignore
vendored
14
bindings/ruby/ext/.gitignore
vendored
@ -1,14 +0,0 @@
|
||||
Makefile
|
||||
whisper.so
|
||||
whisper.bundle
|
||||
whisper.dll
|
||||
depend
|
||||
scripts/get-flags.mk
|
||||
*.o
|
||||
*.c
|
||||
*.cpp
|
||||
*.h
|
||||
*.m
|
||||
*.metal
|
||||
!ruby_whisper.cpp
|
||||
!ruby_whisper.h
|
@ -1,9 +0,0 @@
|
||||
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
|
||||
ggml/src/ggml-cpu/ggml-cpu.cpp \
|
||||
ggml/include/ggml-backend.h \
|
||||
ggml/include/ggml.h \
|
||||
ggml/include/ggml-alloc.h \
|
||||
ggml/src/ggml-backend-impl.h \
|
||||
ggml/include/ggml-cpu.h \
|
||||
ggml/src/ggml-impl.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
@ -1,203 +0,0 @@
|
||||
require 'mkmf'
|
||||
|
||||
# need to use c++ compiler flags
|
||||
$CXXFLAGS << ' -std=c++11'
|
||||
|
||||
$LDFLAGS << ' -lstdc++'
|
||||
|
||||
# Set to true when building binary gems
|
||||
if enable_config('static-stdlib', false)
|
||||
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
||||
end
|
||||
|
||||
if enable_config('march-tune-native', false)
|
||||
$CFLAGS << ' -march=native -mtune=native'
|
||||
$CXXFLAGS << ' -march=native -mtune=native'
|
||||
end
|
||||
|
||||
if ENV['WHISPER_METAL']
|
||||
$GGML_METAL ||= true
|
||||
$DEPRECATE_WARNING ||= true
|
||||
end
|
||||
|
||||
$UNAME_S = `uname -s`.chomp
|
||||
$UNAME_P = `uname -p`.chomp
|
||||
$UNAME_M = `uname -m`.chomp
|
||||
|
||||
if $UNAME_S == 'Darwin'
|
||||
unless ENV['GGML_NO_METAL']
|
||||
$GGML_METAL ||= true
|
||||
end
|
||||
$GGML_NO_OPENMP ||= true
|
||||
end
|
||||
|
||||
if $GGML_METAL
|
||||
$GGML_METAL_EMBED_LIBRARY = true
|
||||
end
|
||||
|
||||
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iinclude -Isrc -Iexamples'
|
||||
$MK_CFLAGS = '-std=c11 -fPIC'
|
||||
$MK_CXXFLAGS = '-std=c++11 -fPIC'
|
||||
$MK_NVCCFLAGS = '-std=c++11'
|
||||
$MK_LDFLAGS = ''
|
||||
|
||||
$OBJ_GGML = []
|
||||
$OBJ_WHISPER = []
|
||||
$OBJ_COMMON = []
|
||||
$OBJ_SDL = []
|
||||
|
||||
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
|
||||
|
||||
if $UNAME_S == 'Linux'
|
||||
$MK_CPPFLAGS << ' -D_GNU_SOURCE'
|
||||
end
|
||||
|
||||
if $UNAME_S == 'Darwin'
|
||||
$MK_CPPFLAGS << ' -D_DARWIN_C_SOURCE'
|
||||
end
|
||||
|
||||
if ENV['WHISPER_DEBUG']
|
||||
$MK_CFLAGS << ' -O0 -g'
|
||||
$MK_CXXFLAGS << ' -O0 -g'
|
||||
$MK_LDFLAGS << ' -g'
|
||||
$MK_NVCCFLAGS << ' -O0 -g'
|
||||
else
|
||||
$MK_CPPFLAGS << ' -DNDEBUG'
|
||||
$MK_CFLAGS << ' -O3'
|
||||
$MK_CXXFLAGS << ' -O3'
|
||||
$MK_NVCCFLAGS << ' -O3'
|
||||
end
|
||||
|
||||
$WARN_FLAGS =
|
||||
' -Wall' <<
|
||||
' -Wextra' <<
|
||||
' -Wpedantic' <<
|
||||
' -Wcast-qual' <<
|
||||
' -Wno-unused-function'
|
||||
|
||||
$MK_CFLAGS <<
|
||||
$WARN_FLAGS <<
|
||||
' -Wshadow' <<
|
||||
' -Wstrict-prototypes' <<
|
||||
' -Wpointer-arith' <<
|
||||
' -Wmissing-prototypes' <<
|
||||
' -Werror=implicit-int' <<
|
||||
' -Werror=implicit-function-declaration'
|
||||
|
||||
$MK_CXXFLAGS <<
|
||||
$WARN_FLAGS <<
|
||||
' -Wmissing-declarations' <<
|
||||
' -Wmissing-noreturn'
|
||||
|
||||
unless `#{cc_command} #{$LDFLAGS} -Wl,-v 2>&1`.chomp.include? 'dyld-1015.7'
|
||||
$MK_CPPFLAGS << ' -DHAVE_BUGGY_APPLE_LINKER'
|
||||
end
|
||||
|
||||
if %w[Linux Darwin FreeBSD NetBSD OpenBSD Haiku].include? $UNAME_S
|
||||
$MK_CFLAGS << ' -pthread'
|
||||
$MK_CXXFLAGS << ' -pthread'
|
||||
end
|
||||
|
||||
unless $_WIN32
|
||||
$DSO_EXT = '.so'
|
||||
else
|
||||
$DSO_EXT = '.dll'
|
||||
end
|
||||
|
||||
unless ENV['RISCV']
|
||||
if %w[x86_64 i686 amd64].include? $UNAME_M
|
||||
$HOST_CXXFLAGS ||= ''
|
||||
|
||||
$MK_CFLAGS << ' -march=native -mtune=native'
|
||||
$HOST_CXXFLAGS << ' -march=native -mtune=native'
|
||||
end
|
||||
|
||||
if $UNAME_M.match? /aarch64.*/
|
||||
$MK_CFLAGS << ' -mcpu=native'
|
||||
$MK_CXXFLAGS << ' -mcpu=native'
|
||||
end
|
||||
else
|
||||
$MK_CFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
||||
$MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
||||
end
|
||||
|
||||
unless ENV['GGML_NO_ACCELERATE']
|
||||
if $UNAME_S == 'Darwin'
|
||||
$MK_CPPFLAGS << ' -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE'
|
||||
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
|
||||
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
|
||||
$MK_LDFLAGS << ' -framework Accelerate'
|
||||
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
||||
end
|
||||
end
|
||||
|
||||
if ENV['GGML_OPENBLAS']
|
||||
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
|
||||
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
|
||||
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
|
||||
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
||||
end
|
||||
|
||||
if ENV['GGML_OPENBLAS64']
|
||||
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
|
||||
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
|
||||
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
|
||||
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
||||
end
|
||||
|
||||
if $GGML_METAL
|
||||
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
|
||||
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
||||
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal.o'
|
||||
|
||||
if ENV['GGML_METAL_NDEBUG']
|
||||
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
|
||||
end
|
||||
|
||||
if $GGML_METAL_EMBED_LIBRARY
|
||||
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
|
||||
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal-embed.o'
|
||||
end
|
||||
end
|
||||
|
||||
$OBJ_GGML <<
|
||||
'ggml/src/ggml.o' <<
|
||||
'ggml/src/ggml-aarch64.o' <<
|
||||
'ggml/src/ggml-alloc.o' <<
|
||||
'ggml/src/ggml-backend.o' <<
|
||||
'ggml/src/ggml-backend-reg.o' <<
|
||||
'ggml/src/ggml-opt.o' <<
|
||||
'ggml/src/ggml-quants.o' <<
|
||||
'ggml/src/ggml-threading.o' <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu.o' <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu-cpp.o' <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
|
||||
'ggml/src/ggml-cpu/ggml-cpu-quants.o'
|
||||
|
||||
$OBJ_WHISPER <<
|
||||
'src/whisper.o'
|
||||
|
||||
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
||||
$objs << "ruby_whisper.o"
|
||||
|
||||
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
||||
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
||||
$BASE_CXXFLAGS = "#{$MK_CXXFLAGS} #{$CXXFLAGS}"
|
||||
$CXXFLAGS = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
|
||||
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
|
||||
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
|
||||
|
||||
create_makefile('whisper')
|
||||
|
||||
File.open 'Makefile', 'a' do |file|
|
||||
file.puts 'include scripts/get-flags.mk'
|
||||
file.puts 'include cpu.mk'
|
||||
|
||||
if $GGML_METAL
|
||||
file.puts 'include metal.mk'
|
||||
|
||||
if $GGML_METAL_EMBED_LIBRARY
|
||||
file.puts 'include metal-embed.mk'
|
||||
end
|
||||
end
|
||||
end
|
@ -1,17 +0,0 @@
|
||||
ggml/src/ggml-metal/ggml-metal-embed.o: \
|
||||
ggml/src/ggml-metal/ggml-metal.metal \
|
||||
ggml/src/ggml-metal/ggml-metal-impl.h \
|
||||
ggml/src/ggml-common.h
|
||||
@echo "Embedding Metal library"
|
||||
@sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
|
||||
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
|
||||
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
||||
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||
@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
|
||||
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
|
||||
@rmdir ${TEMP_ASSEMBLY}
|
@ -1,6 +0,0 @@
|
||||
ggml/src/ggml-metal/ggml-metal.o: \
|
||||
ggml/src/ggml-metal/ggml-metal.m \
|
||||
ggml/src/ggml-metal/ggml-metal-impl.h \
|
||||
ggml/include/ggml-metal.h \
|
||||
ggml/include/ggml.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
File diff suppressed because it is too large
Load Diff
@ -1,25 +0,0 @@
|
||||
#ifndef __RUBY_WHISPER_H
|
||||
#define __RUBY_WHISPER_H
|
||||
|
||||
#include "whisper.h"
|
||||
|
||||
typedef struct {
|
||||
VALUE *context;
|
||||
VALUE user_data;
|
||||
VALUE callback;
|
||||
VALUE callbacks;
|
||||
} ruby_whisper_callback_container;
|
||||
|
||||
typedef struct {
|
||||
struct whisper_context *context;
|
||||
} ruby_whisper;
|
||||
|
||||
typedef struct {
|
||||
struct whisper_full_params params;
|
||||
bool diarize;
|
||||
ruby_whisper_callback_container *new_segment_callback_container;
|
||||
ruby_whisper_callback_container *progress_callback_container;
|
||||
ruby_whisper_callback_container *abort_callback_container;
|
||||
} ruby_whisper_params;
|
||||
|
||||
#endif
|
@ -1,6 +0,0 @@
|
||||
require "yaml"
|
||||
|
||||
sources = `git ls-files -z ../..`.split("\x0")
|
||||
paths = YAML.load_file("../../.github/workflows/bindings-ruby.yml")[true]["push"]["paths"]
|
||||
paths.delete "bindings/ruby/**"
|
||||
EXTSOURCES = (Dir.glob(paths, base: "../..").collect {|path| "../../#{path}"} << "../../LICENSE") & sources
|
@ -1,8 +0,0 @@
|
||||
require "test/unit"
|
||||
require "whisper"
|
||||
require_relative "jfk_reader/jfk_reader"
|
||||
|
||||
class TestBase < Test::Unit::TestCase
|
||||
MODEL = File.join(__dir__, "..", "..", "..", "models", "ggml-base.en.bin")
|
||||
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
|
||||
end
|
5
bindings/ruby/tests/jfk_reader/.gitignore
vendored
5
bindings/ruby/tests/jfk_reader/.gitignore
vendored
@ -1,5 +0,0 @@
|
||||
Makefile
|
||||
jfk_reader.o
|
||||
jfk_reader.so
|
||||
jfk_reader.bundle
|
||||
jfk_reader.dll
|
@ -1,3 +0,0 @@
|
||||
require "mkmf"
|
||||
|
||||
create_makefile("jfk_reader")
|
@ -1,108 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include <ruby/memory_view.h>
|
||||
#include <ruby/encoding.h>
|
||||
|
||||
static VALUE
|
||||
jfk_reader_initialize(VALUE self, VALUE audio_path)
|
||||
{
|
||||
rb_iv_set(self, "audio_path", audio_path);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static bool
|
||||
jfk_reader_get_memory_view(const VALUE obj, rb_memory_view_t *view, int flags)
|
||||
{
|
||||
VALUE audio_path = rb_iv_get(obj, "audio_path");
|
||||
const char *audio_path_str = StringValueCStr(audio_path);
|
||||
const int n_samples = 176000;
|
||||
float *data = (float *)malloc(n_samples * sizeof(float));
|
||||
short *samples = (short *)malloc(n_samples * sizeof(short));
|
||||
FILE *file = fopen(audio_path_str, "rb");
|
||||
|
||||
fseek(file, 78, SEEK_SET);
|
||||
fread(samples, sizeof(short), n_samples, file);
|
||||
fclose(file);
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
data[i] = samples[i]/32768.0;
|
||||
}
|
||||
|
||||
view->obj = obj;
|
||||
view->data = (void *)data;
|
||||
view->byte_size = sizeof(float) * n_samples;
|
||||
view->readonly = true;
|
||||
view->format = "f";
|
||||
view->item_size = sizeof(float);
|
||||
view->item_desc.components = NULL;
|
||||
view->item_desc.length = 0;
|
||||
view->ndim = 1;
|
||||
view->shape = NULL;
|
||||
view->sub_offsets = NULL;
|
||||
view->private_data = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
jfk_reader_release_memory_view(const VALUE obj, rb_memory_view_t *view)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
jfk_reader_memory_view_available_p(const VALUE obj)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static const rb_memory_view_entry_t jfk_reader_view_entry = {
|
||||
jfk_reader_get_memory_view,
|
||||
jfk_reader_release_memory_view,
|
||||
jfk_reader_memory_view_available_p
|
||||
};
|
||||
|
||||
static VALUE
|
||||
read_jfk(int argc, VALUE *argv, VALUE obj)
|
||||
{
|
||||
const char *audio_path_str = StringValueCStr(argv[0]);
|
||||
const int n_samples = 176000;
|
||||
|
||||
short samples[n_samples];
|
||||
FILE *file = fopen(audio_path_str, "rb");
|
||||
|
||||
fseek(file, 78, SEEK_SET);
|
||||
fread(samples, sizeof(short), n_samples, file);
|
||||
fclose(file);
|
||||
|
||||
VALUE rb_samples = rb_ary_new2(n_samples);
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
rb_ary_push(rb_samples, INT2FIX(samples[i]));
|
||||
}
|
||||
|
||||
VALUE rb_data = rb_ary_new2(n_samples);
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
rb_ary_push(rb_data, DBL2NUM(samples[i]/32768.0));
|
||||
}
|
||||
|
||||
float data[n_samples];
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
data[i] = samples[i]/32768.0;
|
||||
}
|
||||
void *c_data = (void *)data;
|
||||
VALUE rb_void = rb_enc_str_new((const char *)c_data, sizeof(data), rb_ascii8bit_encoding());
|
||||
|
||||
VALUE rb_result = rb_ary_new3(3, rb_samples, rb_data, rb_void);
|
||||
return rb_result;
|
||||
}
|
||||
|
||||
void Init_jfk_reader(void)
|
||||
{
|
||||
VALUE cJFKReader = rb_define_class("JFKReader", rb_cObject);
|
||||
rb_memory_view_register(cJFKReader, &jfk_reader_view_entry);
|
||||
rb_define_method(cJFKReader, "initialize", jfk_reader_initialize, 1);
|
||||
|
||||
|
||||
rb_define_global_function("read_jfk", read_jfk, -1);
|
||||
|
||||
|
||||
|
||||
}
|
@ -1,163 +0,0 @@
|
||||
require "test/unit"
|
||||
require "whisper"
|
||||
|
||||
class TestCallback < Test::Unit::TestCase
|
||||
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
||||
|
||||
def setup
|
||||
GC.start
|
||||
@params = Whisper::Params.new
|
||||
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
|
||||
@audio = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
|
||||
end
|
||||
|
||||
def test_new_segment_callback
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
assert_kind_of Integer, n_new
|
||||
assert n_new > 0
|
||||
assert_same @whisper, context
|
||||
|
||||
n_segments = context.full_n_segments
|
||||
n_new.times do |i|
|
||||
i_segment = n_segments - 1 + i
|
||||
start_time = context.full_get_segment_t0(i_segment) * 10
|
||||
end_time = context.full_get_segment_t1(i_segment) * 10
|
||||
text = context.full_get_segment_text(i_segment)
|
||||
|
||||
assert_kind_of Integer, start_time
|
||||
assert start_time >= 0
|
||||
assert_kind_of Integer, end_time
|
||||
assert end_time > 0
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
|
||||
end
|
||||
}
|
||||
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_new_segment_callback_closure
|
||||
search_word = "what"
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
n_segments = context.full_n_segments
|
||||
n_new.times do |i|
|
||||
i_segment = n_segments - 1 + i
|
||||
text = context.full_get_segment_text(i_segment)
|
||||
if text.include?(search_word)
|
||||
t0 = context.full_get_segment_t0(i_segment)
|
||||
t1 = context.full_get_segment_t1(i_segment)
|
||||
raise "search word '#{search_word}' found at between #{t0} and #{t1}"
|
||||
end
|
||||
end
|
||||
}
|
||||
|
||||
assert_raise RuntimeError do
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
end
|
||||
|
||||
def test_new_segment_callback_user_data
|
||||
udata = Object.new
|
||||
@params.new_segment_callback_user_data = udata
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
assert_same udata, user_data
|
||||
}
|
||||
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_new_segment_callback_user_data_gc
|
||||
@params.new_segment_callback_user_data = "My user data"
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
assert_equal "My user data", user_data
|
||||
}
|
||||
GC.start
|
||||
|
||||
assert_same @whisper, @whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_progress_callback
|
||||
first = nil
|
||||
last = nil
|
||||
@params.progress_callback = ->(context, state, progress, user_data) {
|
||||
assert_kind_of Integer, progress
|
||||
assert 0 <= progress && progress <= 100
|
||||
assert_same @whisper, context
|
||||
first = progress if first.nil?
|
||||
last = progress
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_equal 0, first
|
||||
assert_equal 100, last
|
||||
end
|
||||
|
||||
def test_progress_callback_user_data
|
||||
udata = Object.new
|
||||
@params.progress_callback_user_data = udata
|
||||
@params.progress_callback = ->(context, state, n_new, user_data) {
|
||||
assert_same udata, user_data
|
||||
}
|
||||
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_on_progress
|
||||
first = nil
|
||||
last = nil
|
||||
@params.on_progress do |progress|
|
||||
assert_kind_of Integer, progress
|
||||
assert 0 <= progress && progress <= 100
|
||||
first = progress if first.nil?
|
||||
last = progress
|
||||
end
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_equal 0, first
|
||||
assert_equal 100, last
|
||||
end
|
||||
|
||||
def test_abort_callback
|
||||
i = 0
|
||||
@params.abort_callback = ->(user_data) {
|
||||
assert_nil user_data
|
||||
i += 1
|
||||
return false
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert i > 0
|
||||
end
|
||||
|
||||
def test_abort_callback_abort
|
||||
i = 0
|
||||
@params.abort_callback = ->(user_data) {
|
||||
i += 1
|
||||
return i == 3
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_equal 3, i
|
||||
end
|
||||
|
||||
def test_abort_callback_user_data
|
||||
udata = Object.new
|
||||
@params.abort_callback_user_data = udata
|
||||
yielded = nil
|
||||
@params.abort_callback = ->(user_data) {
|
||||
yielded = user_data
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_same udata, yielded
|
||||
end
|
||||
|
||||
def test_abort_on
|
||||
do_abort = false
|
||||
aborted_from_callback = false
|
||||
@params.on_new_segment do |segment|
|
||||
do_abort = true if segment.text.match? /ask/
|
||||
end
|
||||
i = 0
|
||||
@params.abort_on do
|
||||
i += 1
|
||||
do_abort
|
||||
end
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert i > 0
|
||||
end
|
||||
end
|
@ -1,20 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestError < TestBase
|
||||
def test_error
|
||||
error = Whisper::Error.new(-2)
|
||||
assert_equal "failed to compute log mel spectrogram", error.message
|
||||
assert_equal -2, error.code
|
||||
end
|
||||
|
||||
def test_unknown_error
|
||||
error = Whisper::Error.new(-20)
|
||||
assert_equal "unknown error", error.message
|
||||
end
|
||||
|
||||
def test_non_int_code
|
||||
assert_raise TypeError do
|
||||
error = Whisper::Error.new("non int")
|
||||
end
|
||||
end
|
||||
end
|
@ -1,44 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestModel < TestBase
|
||||
def test_model
|
||||
whisper = Whisper::Context.new(MODEL)
|
||||
assert_instance_of Whisper::Model, whisper.model
|
||||
end
|
||||
|
||||
def test_attributes
|
||||
whisper = Whisper::Context.new(MODEL)
|
||||
model = whisper.model
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_gc
|
||||
model = Whisper::Context.new(MODEL).model
|
||||
GC.start
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
end
|
@ -1,31 +0,0 @@
|
||||
require_relative "helper"
|
||||
require 'tempfile'
|
||||
require 'tmpdir'
|
||||
require 'shellwords'
|
||||
|
||||
class TestPackage < TestBase
|
||||
def test_build
|
||||
Tempfile.create do |file|
|
||||
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
||||
assert file.size > 0
|
||||
assert_path_exist file.to_path
|
||||
end
|
||||
end
|
||||
|
||||
sub_test_case "Building binary on installation" do
|
||||
def setup
|
||||
system "rake", "build", exception: true
|
||||
end
|
||||
|
||||
def test_install
|
||||
match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
|
||||
filename = match_data[1]
|
||||
version = match_data[2]
|
||||
basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
|
||||
Dir.mktmpdir do |dir|
|
||||
system "gem", "install", "--install-dir", dir.shellescape, "pkg/#{filename.shellescape}", exception: true
|
||||
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -1,154 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestParams < TestBase
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
|
||||
def test_language
|
||||
@params.language = "en"
|
||||
assert_equal @params.language, "en"
|
||||
@params.language = "auto"
|
||||
assert_equal @params.language, "auto"
|
||||
end
|
||||
|
||||
def test_offset
|
||||
@params.offset = 10_000
|
||||
assert_equal @params.offset, 10_000
|
||||
@params.offset = 0
|
||||
assert_equal @params.offset, 0
|
||||
end
|
||||
|
||||
def test_duration
|
||||
@params.duration = 60_000
|
||||
assert_equal @params.duration, 60_000
|
||||
@params.duration = 0
|
||||
assert_equal @params.duration, 0
|
||||
end
|
||||
|
||||
def test_max_text_tokens
|
||||
@params.max_text_tokens = 300
|
||||
assert_equal @params.max_text_tokens, 300
|
||||
@params.max_text_tokens = 0
|
||||
assert_equal @params.max_text_tokens, 0
|
||||
end
|
||||
|
||||
def test_translate
|
||||
@params.translate = true
|
||||
assert @params.translate
|
||||
@params.translate = false
|
||||
assert !@params.translate
|
||||
end
|
||||
|
||||
def test_no_context
|
||||
@params.no_context = true
|
||||
assert @params.no_context
|
||||
@params.no_context = false
|
||||
assert !@params.no_context
|
||||
end
|
||||
|
||||
def test_single_segment
|
||||
@params.single_segment = true
|
||||
assert @params.single_segment
|
||||
@params.single_segment = false
|
||||
assert !@params.single_segment
|
||||
end
|
||||
|
||||
def test_print_special
|
||||
@params.print_special = true
|
||||
assert @params.print_special
|
||||
@params.print_special = false
|
||||
assert !@params.print_special
|
||||
end
|
||||
|
||||
def test_print_progress
|
||||
@params.print_progress = true
|
||||
assert @params.print_progress
|
||||
@params.print_progress = false
|
||||
assert !@params.print_progress
|
||||
end
|
||||
|
||||
def test_print_realtime
|
||||
@params.print_realtime = true
|
||||
assert @params.print_realtime
|
||||
@params.print_realtime = false
|
||||
assert !@params.print_realtime
|
||||
end
|
||||
|
||||
def test_print_timestamps
|
||||
@params.print_timestamps = true
|
||||
assert @params.print_timestamps
|
||||
@params.print_timestamps = false
|
||||
assert !@params.print_timestamps
|
||||
end
|
||||
|
||||
def test_suppress_blank
|
||||
@params.suppress_blank = true
|
||||
assert @params.suppress_blank
|
||||
@params.suppress_blank = false
|
||||
assert !@params.suppress_blank
|
||||
end
|
||||
|
||||
def test_suppress_non_speech_tokens
|
||||
@params.suppress_non_speech_tokens = true
|
||||
assert @params.suppress_non_speech_tokens
|
||||
@params.suppress_non_speech_tokens = false
|
||||
assert !@params.suppress_non_speech_tokens
|
||||
end
|
||||
|
||||
def test_token_timestamps
|
||||
@params.token_timestamps = true
|
||||
assert @params.token_timestamps
|
||||
@params.token_timestamps = false
|
||||
assert !@params.token_timestamps
|
||||
end
|
||||
|
||||
def test_split_on_word
|
||||
@params.split_on_word = true
|
||||
assert @params.split_on_word
|
||||
@params.split_on_word = false
|
||||
assert !@params.split_on_word
|
||||
end
|
||||
|
||||
def test_initial_prompt
|
||||
assert_nil @params.initial_prompt
|
||||
@params.initial_prompt = "You are a polite person."
|
||||
assert_equal "You are a polite person.", @params.initial_prompt
|
||||
end
|
||||
|
||||
def test_temperature
|
||||
assert_equal 0.0, @params.temperature
|
||||
@params.temperature = 0.5
|
||||
assert_equal 0.5, @params.temperature
|
||||
end
|
||||
|
||||
def test_max_initial_ts
|
||||
assert_equal 1.0, @params.max_initial_ts
|
||||
@params.max_initial_ts = 600.0
|
||||
assert_equal 600.0, @params.max_initial_ts
|
||||
end
|
||||
|
||||
def test_length_penalty
|
||||
assert_equal -1.0, @params.length_penalty
|
||||
@params.length_penalty = 0.5
|
||||
assert_equal 0.5, @params.length_penalty
|
||||
end
|
||||
|
||||
def test_temperature_inc
|
||||
assert_in_delta 0.2, @params.temperature_inc
|
||||
@params.temperature_inc = 0.5
|
||||
assert_in_delta 0.5, @params.temperature_inc
|
||||
end
|
||||
|
||||
def test_entropy_thold
|
||||
assert_in_delta 2.4, @params.entropy_thold
|
||||
@params.entropy_thold = 3.0
|
||||
assert_in_delta 3.0, @params.entropy_thold
|
||||
end
|
||||
|
||||
def test_logprob_thold
|
||||
assert_in_delta -1.0, @params.logprob_thold
|
||||
@params.logprob_thold = -0.5
|
||||
assert_in_delta -0.5, @params.logprob_thold
|
||||
end
|
||||
end
|
@ -1,83 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestSegment < TestBase
|
||||
class << self
|
||||
attr_reader :whisper
|
||||
|
||||
def startup
|
||||
@whisper = Whisper::Context.new(TestBase::MODEL)
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
@whisper.transcribe(TestBase::AUDIO, params)
|
||||
end
|
||||
end
|
||||
|
||||
def test_iteration
|
||||
whisper.each_segment do |segment|
|
||||
assert_instance_of Whisper::Segment, segment
|
||||
end
|
||||
end
|
||||
|
||||
def test_enumerator
|
||||
enum = whisper.each_segment
|
||||
assert_instance_of Enumerator, enum
|
||||
enum.to_a.each_with_index do |segment, index|
|
||||
assert_instance_of Whisper::Segment, segment
|
||||
assert_kind_of Integer, index
|
||||
end
|
||||
end
|
||||
|
||||
def test_start_time
|
||||
i = 0
|
||||
whisper.each_segment do |segment|
|
||||
assert_equal 0, segment.start_time if i == 0
|
||||
i += 1
|
||||
end
|
||||
end
|
||||
|
||||
def test_end_time
|
||||
i = 0
|
||||
whisper.each_segment do |segment|
|
||||
assert_equal whisper.full_get_segment_t1(i) * 10, segment.end_time
|
||||
i += 1
|
||||
end
|
||||
end
|
||||
|
||||
def test_on_new_segment
|
||||
params = Whisper::Params.new
|
||||
seg = nil
|
||||
index = 0
|
||||
params.on_new_segment do |segment|
|
||||
assert_instance_of Whisper::Segment, segment
|
||||
if index == 0
|
||||
seg = segment
|
||||
assert_equal 0, segment.start_time
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, segment.text
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
whisper.transcribe(AUDIO, params)
|
||||
assert_equal 0, seg.start_time
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
|
||||
end
|
||||
|
||||
def test_on_new_segment_twice
|
||||
params = Whisper::Params.new
|
||||
seg = nil
|
||||
params.on_new_segment do |segment|
|
||||
seg = segment
|
||||
return
|
||||
end
|
||||
params.on_new_segment do |segment|
|
||||
assert_same seg, segment
|
||||
return
|
||||
end
|
||||
whisper.transcribe(AUDIO, params)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def whisper
|
||||
self.class.whisper
|
||||
end
|
||||
end
|
@ -1,226 +0,0 @@
|
||||
require_relative "helper"
|
||||
require "stringio"
|
||||
require "etc"
|
||||
|
||||
# Exists to detect memory-related bug
|
||||
Whisper.log_set ->(level, buffer, user_data) {}, nil
|
||||
|
||||
class TestWhisper < TestBase
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
|
||||
def test_whisper
|
||||
@whisper = Whisper::Context.new(MODEL)
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
|
||||
@whisper.transcribe(AUDIO, params) {|text|
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
||||
}
|
||||
end
|
||||
|
||||
sub_test_case "After transcription" do
|
||||
class << self
|
||||
attr_reader :whisper
|
||||
|
||||
def startup
|
||||
@whisper = Whisper::Context.new(TestBase::MODEL)
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
@whisper.transcribe(TestBase::AUDIO, params)
|
||||
end
|
||||
end
|
||||
|
||||
def whisper
|
||||
self.class.whisper
|
||||
end
|
||||
|
||||
def test_full_n_segments
|
||||
assert_equal 1, whisper.full_n_segments
|
||||
end
|
||||
|
||||
def test_full_lang_id
|
||||
assert_equal 0, whisper.full_lang_id
|
||||
end
|
||||
|
||||
def test_full_get_segment_t0
|
||||
assert_equal 0, whisper.full_get_segment_t0(0)
|
||||
assert_raise IndexError do
|
||||
whisper.full_get_segment_t0(whisper.full_n_segments)
|
||||
end
|
||||
assert_raise IndexError do
|
||||
whisper.full_get_segment_t0(-1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_get_segment_t1
|
||||
t1 = whisper.full_get_segment_t1(0)
|
||||
assert_kind_of Integer, t1
|
||||
assert t1 > 0
|
||||
assert_raise IndexError do
|
||||
whisper.full_get_segment_t1(whisper.full_n_segments)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_get_segment_speaker_turn_next
|
||||
assert_false whisper.full_get_segment_speaker_turn_next(0)
|
||||
end
|
||||
|
||||
def test_full_get_segment_text
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0)
|
||||
end
|
||||
end
|
||||
|
||||
def test_lang_max_id
|
||||
assert_kind_of Integer, Whisper.lang_max_id
|
||||
end
|
||||
|
||||
def test_lang_id
|
||||
assert_equal 0, Whisper.lang_id("en")
|
||||
assert_raise ArgumentError do
|
||||
Whisper.lang_id("non existing language")
|
||||
end
|
||||
end
|
||||
|
||||
def test_lang_str
|
||||
assert_equal "en", Whisper.lang_str(0)
|
||||
assert_raise IndexError do
|
||||
Whisper.lang_str(Whisper.lang_max_id + 1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_lang_str_full
|
||||
assert_equal "english", Whisper.lang_str_full(0)
|
||||
assert_raise IndexError do
|
||||
Whisper.lang_str_full(Whisper.lang_max_id + 1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_log_set
|
||||
user_data = Object.new
|
||||
logs = []
|
||||
log_callback = ->(level, buffer, udata) {
|
||||
logs << [level, buffer, udata]
|
||||
}
|
||||
Whisper.log_set log_callback, user_data
|
||||
Whisper::Context.new(MODEL)
|
||||
|
||||
assert logs.length > 30
|
||||
logs.each do |log|
|
||||
assert_include [Whisper::LOG_LEVEL_DEBUG, Whisper::LOG_LEVEL_INFO, Whisper::LOG_LEVEL_WARN], log[0]
|
||||
assert_same user_data, log[2]
|
||||
end
|
||||
end
|
||||
|
||||
def test_log_suppress
|
||||
stderr = $stderr
|
||||
Whisper.log_set ->(level, buffer, user_data) {
|
||||
# do nothing
|
||||
}, nil
|
||||
dev = StringIO.new("")
|
||||
$stderr = dev
|
||||
Whisper::Context.new(MODEL)
|
||||
assert_empty dev.string
|
||||
ensure
|
||||
$stderr = stderr
|
||||
end
|
||||
|
||||
sub_test_case "full" do
|
||||
def setup
|
||||
super
|
||||
@whisper = Whisper::Context.new(MODEL)
|
||||
@samples = File.read(AUDIO, nil, 78).unpack("s<*").collect {|i| i.to_f / 2**15}
|
||||
end
|
||||
|
||||
def test_full
|
||||
@whisper.full(@params, @samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
end
|
||||
|
||||
def test_full_without_length
|
||||
@whisper.full(@params, @samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
end
|
||||
|
||||
def test_full_enumerator
|
||||
samples = @samples.each
|
||||
@whisper.full(@params, samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
end
|
||||
|
||||
def test_full_enumerator_without_length
|
||||
samples = @samples.each
|
||||
assert_raise ArgumentError do
|
||||
@whisper.full(@params, samples)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_enumerator_with_too_large_length
|
||||
samples = @samples.each.take(10).to_enum
|
||||
assert_raise StopIteration do
|
||||
@whisper.full(@params, samples, 11)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_with_memory_view
|
||||
samples = JFKReader.new(AUDIO)
|
||||
@whisper.full(@params, samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text
|
||||
end
|
||||
|
||||
def test_full_parallel
|
||||
@whisper.full_parallel(@params, @samples, @samples.length, Etc.nprocessors)
|
||||
|
||||
assert_equal Etc.nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
end
|
||||
|
||||
def test_full_parallel_with_memory_view
|
||||
samples = JFKReader.new(AUDIO)
|
||||
@whisper.full_parallel(@params, samples, nil, Etc.nprocessors)
|
||||
|
||||
assert_equal Etc.nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
end
|
||||
|
||||
def test_full_parallel_without_length_and_n_processors
|
||||
@whisper.full_parallel(@params, @samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
end
|
||||
|
||||
def test_full_parallel_without_length
|
||||
@whisper.full_parallel(@params, @samples, nil, Etc.nprocessors)
|
||||
|
||||
assert_equal Etc.nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
end
|
||||
|
||||
def test_full_parallel_without_n_processors
|
||||
@whisper.full_parallel(@params, @samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match /ask what you can do/i, text
|
||||
assert_match /for your country/i, text
|
||||
end
|
||||
end
|
||||
end
|
@ -1,36 +0,0 @@
|
||||
require_relative "extsources"
|
||||
|
||||
Gem::Specification.new do |s|
|
||||
s.name = "whispercpp"
|
||||
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
||||
s.version = '1.3.0'
|
||||
s.date = '2024-05-14'
|
||||
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
|
||||
s.email = 'todd.fisher@gmail.com'
|
||||
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
||||
|
||||
s.files = `git ls-files . -z`.split("\x0") +
|
||||
EXTSOURCES.collect {|file|
|
||||
basename = File.basename(file)
|
||||
if s.extra_rdoc_files.include?(basename)
|
||||
basename
|
||||
else
|
||||
file.sub("../..", "ext")
|
||||
end
|
||||
}
|
||||
|
||||
s.summary = %q{Ruby whisper.cpp bindings}
|
||||
s.test_files = s.files.select {|file| file.start_with? "tests/"}
|
||||
|
||||
s.extensions << 'ext/extconf.rb'
|
||||
|
||||
|
||||
#### Documentation and testing.
|
||||
s.homepage = 'https://github.com/ggerganov/whisper.cpp'
|
||||
s.rdoc_options = ['--main', 'README.md']
|
||||
|
||||
|
||||
s.platform = Gem::Platform::RUBY
|
||||
|
||||
s.licenses = ['MIT']
|
||||
end
|
54
cmake/BuildTypes.cmake
Normal file
54
cmake/BuildTypes.cmake
Normal file
@ -0,0 +1,54 @@
|
||||
# Add new build types
|
||||
|
||||
# ReleaseGG - Release with enabled asserts
|
||||
|
||||
SET(CMAKE_CXX_FLAGS_RELEASEGG
|
||||
"-O3"
|
||||
CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_C_FLAGS_RELEASEGG
|
||||
"-O3"
|
||||
CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
|
||||
""
|
||||
CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
|
||||
""
|
||||
CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
|
||||
FORCE )
|
||||
MARK_AS_ADVANCED(
|
||||
CMAKE_CXX_FLAGS_RELEASEGG
|
||||
CMAKE_C_FLAGS_RELEASEGG
|
||||
CMAKE_EXE_LINKER_FLAGS_RELEASEGG
|
||||
CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
|
||||
|
||||
# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
|
||||
|
||||
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
|
||||
"-O2 -g"
|
||||
CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
|
||||
"-O2 -g"
|
||||
CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
|
||||
""
|
||||
CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
|
||||
""
|
||||
CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
|
||||
FORCE )
|
||||
MARK_AS_ADVANCED(
|
||||
CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
|
||||
CMAKE_C_FLAGS_RELWITHDEBINFOGG
|
||||
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
|
||||
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
|
||||
|
||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
|
||||
endif()
|
@ -13,5 +13,5 @@ set_target_properties(${TARGET}
|
||||
PROPERTIES
|
||||
EXPORT_COMPILE_COMMANDS ON
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
|
||||
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib"
|
||||
INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib"
|
||||
)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user