mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-25 09:31:44 +00:00
Compare commits
1 Commits
master
...
fa-decoder
Author | SHA1 | Date | |
---|---|---|---|
e2aa556a99 |
@ -1,28 +0,0 @@
|
|||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=11.7.1
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git cmake libsdl2-dev wget git
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
# Enable cuBLAS
|
|
||||||
ENV GGML_CUDA=1
|
|
||||||
|
|
||||||
RUN make base.en
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/main"]
|
|
@ -1,40 +0,0 @@
|
|||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=12.3.1
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
# Target the CUDA runtime image
|
|
||||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
ARG CUDA_DOCKER_ARCH=all
|
|
||||||
# Set nvcc architecture
|
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
|
||||||
|
|
||||||
# Ref: https://stackoverflow.com/a/53464012
|
|
||||||
ENV CUDA_MAIN_VERSION=12.3
|
|
||||||
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
COPY .. .
|
|
||||||
# Enable cuBLAS
|
|
||||||
RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
|
||||||
ENV CUDA_MAIN_VERSION=12.3
|
|
||||||
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y curl ffmpeg wget cmake git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
|
||||||
|
|
||||||
COPY --from=build /app /app
|
|
||||||
ENV PATH=/app/build/bin:$PATH
|
|
||||||
ENTRYPOINT [ "bash", "-c" ]
|
|
@ -1,28 +0,0 @@
|
|||||||
ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
|
||||||
|
|
||||||
COPY .. .
|
|
||||||
# Enable SYCL
|
|
||||||
ARG GGML_SYCL_F16=OFF
|
|
||||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
|
||||||
echo "GGML_SYCL_F16 is set" \
|
|
||||||
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
|
||||||
fi && \
|
|
||||||
make base.en CMAKE_ARGS="-DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16}"
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
|
||||||
|
|
||||||
COPY --from=build /app /app
|
|
||||||
ENV PATH=/app/build/bin:$PATH
|
|
||||||
ENTRYPOINT [ "bash", "-c" ]
|
|
@ -1,39 +0,0 @@
|
|||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG MUSA_VERSION=rc4.0.1
|
|
||||||
# Target the MUSA build image
|
|
||||||
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
# Target the MUSA runtime image
|
|
||||||
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential libsdl2-dev wget cmake git && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/*
|
|
||||||
|
|
||||||
COPY .. .
|
|
||||||
# Enable muBLAS
|
|
||||||
RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
|
|
||||||
|
|
||||||
RUN find /app/build -name "*.o" -delete && \
|
|
||||||
find /app/build -name "*.a" -delete && \
|
|
||||||
rm -rf /app/build/CMakeFiles && \
|
|
||||||
rm -rf /app/build/cmake_install.cmake && \
|
|
||||||
rm -rf /app/build/_deps
|
|
||||||
|
|
||||||
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y curl ffmpeg wget cmake git && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/*
|
|
||||||
|
|
||||||
COPY --from=build /app /app
|
|
||||||
RUN du -sh /app/*
|
|
||||||
RUN find /app -type f -size +100M
|
|
||||||
ENV PATH=/app/build/bin:$PATH
|
|
||||||
ENTRYPOINT [ "bash", "-c" ]
|
|
@ -1,20 +0,0 @@
|
|||||||
FROM ubuntu:22.04 AS build
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential wget cmake git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
|
||||||
|
|
||||||
COPY .. .
|
|
||||||
RUN make base.en
|
|
||||||
|
|
||||||
FROM ubuntu:22.04 AS runtime
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
|
||||||
|
|
||||||
COPY --from=build /app /app
|
|
||||||
ENV PATH=/app/build/bin:$PATH
|
|
||||||
ENTRYPOINT [ "bash", "-c" ]
|
|
@ -1,3 +0,0 @@
|
|||||||
build*/
|
|
||||||
.github/
|
|
||||||
.devops/
|
|
22
.github/workflows/bindings-go.yml
vendored
22
.github/workflows/bindings-go.yml
vendored
@ -1,22 +0,0 @@
|
|||||||
name: Bindings Tests (Go)
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
paths:
|
|
||||||
- bindings/go/**
|
|
||||||
- whisper.h
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- bindings/go/**
|
|
||||||
- whisper.h
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
ubuntu-22:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
steps:
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '^1.23'
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- run: |
|
|
||||||
cd bindings/go
|
|
||||||
make test
|
|
21
.github/workflows/bindings-ruby.yml
vendored
21
.github/workflows/bindings-ruby.yml
vendored
@ -1,21 +0,0 @@
|
|||||||
name: Bindings Tests (Ruby)
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
ubuntu-22:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
working-directory: bindings/ruby
|
|
||||||
steps:
|
|
||||||
- uses: ruby/setup-ruby@v1
|
|
||||||
with:
|
|
||||||
ruby-version: '3.2'
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- run: rake test
|
|
1602
.github/workflows/build.yml
vendored
1602
.github/workflows/build.yml
vendored
File diff suppressed because it is too large
Load Diff
63
.github/workflows/docker.yml
vendored
63
.github/workflows/docker.yml
vendored
@ -1,63 +0,0 @@
|
|||||||
name: Publish Docker image
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
push_to_registry:
|
|
||||||
name: Push Docker image to Docker Hub
|
|
||||||
if: github.event.pull_request.draft == false
|
|
||||||
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
env:
|
|
||||||
COMMIT_SHA: ${{ github.sha }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
|
|
||||||
- { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
|
|
||||||
- { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" }
|
|
||||||
#TODO: the cuda image keeps failing - disable for now
|
|
||||||
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
|
|
||||||
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Check out the repo
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@v3
|
|
||||||
with:
|
|
||||||
image: tonistiigi/binfmt:qemu-v7.0.0-28
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.repository_owner }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Build and push Docker image (versioned)
|
|
||||||
if: github.event_name == 'push'
|
|
||||||
uses: docker/build-push-action@v5
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
push: true
|
|
||||||
platforms: ${{ matrix.config.platform }}
|
|
||||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
|
||||||
file: ${{ matrix.config.dockerfile }}
|
|
||||||
|
|
||||||
- name: Build and push Docker image (tagged)
|
|
||||||
uses: docker/build-push-action@v4
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
push: ${{ github.event_name == 'push' }}
|
|
||||||
platforms: ${{ matrix.config.platform }}
|
|
||||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
|
|
||||||
file: ${{ matrix.config.dockerfile }}
|
|
91
.github/workflows/examples-wasm.yml
vendored
91
.github/workflows/examples-wasm.yml
vendored
@ -1,91 +0,0 @@
|
|||||||
name: Examples WASM
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: ["master"]
|
|
||||||
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pages: write
|
|
||||||
id-token: write
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: "pages"
|
|
||||||
cancel-in-progress: false
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
deploy-wasm-github-pages:
|
|
||||||
environment:
|
|
||||||
name: github-pages
|
|
||||||
url: ${{ steps.deployment.outputs.page_url }}
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Setup Pages
|
|
||||||
uses: actions/configure-pages@v4
|
|
||||||
|
|
||||||
- name: Setup emsdk
|
|
||||||
uses: mymindstorm/setup-emsdk@v14
|
|
||||||
|
|
||||||
- name: Build WASM Examples
|
|
||||||
# Enable for real build later in whisper.cpp
|
|
||||||
run: |
|
|
||||||
mkdir -p build-em && cd build-em
|
|
||||||
emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
|
|
||||||
make -j
|
|
||||||
|
|
||||||
- name: Create staging directory
|
|
||||||
run: mkdir -p staging
|
|
||||||
|
|
||||||
- name: Create .nojekyll file in staging directory
|
|
||||||
run: touch staging/.nojekyll
|
|
||||||
|
|
||||||
- name: Copy application files
|
|
||||||
run: |
|
|
||||||
build_dir=build-em/bin
|
|
||||||
|
|
||||||
ls ${build_dir}
|
|
||||||
|
|
||||||
# command.wasm
|
|
||||||
target_dir=staging/command.wasm
|
|
||||||
mkdir -p ${target_dir}
|
|
||||||
cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
|
|
||||||
cp ${build_dir}/libcommand.js ${target_dir}
|
|
||||||
|
|
||||||
# bench.wasm
|
|
||||||
target_dir=staging/bench.wasm
|
|
||||||
mkdir -p ${target_dir}
|
|
||||||
cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
|
|
||||||
cp ${build_dir}/libbench.js ${target_dir}
|
|
||||||
|
|
||||||
# stream.wasm
|
|
||||||
target_dir=staging/stream.wasm
|
|
||||||
mkdir -p ${target_dir}
|
|
||||||
cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
|
|
||||||
cp ${build_dir}/libstream.js ${target_dir}
|
|
||||||
|
|
||||||
# whisper.wasm (this will be the main example page)
|
|
||||||
target_dir=staging
|
|
||||||
mkdir -p ${target_dir}
|
|
||||||
cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
|
|
||||||
cp ${build_dir}/libmain.js ${target_dir}
|
|
||||||
|
|
||||||
# Copy Cross-Origin Isolation service worker
|
|
||||||
cp -v examples/coi-serviceworker.js staging/
|
|
||||||
|
|
||||||
- name: List files in staging directory (for debugging)
|
|
||||||
run: |
|
|
||||||
echo "Files in staging directory:"
|
|
||||||
find staging -type f | sort
|
|
||||||
|
|
||||||
- name: Upload artifact
|
|
||||||
uses: actions/upload-pages-artifact@v3
|
|
||||||
with:
|
|
||||||
path: ./staging
|
|
||||||
|
|
||||||
- name: Deploy to GitHub Pages
|
|
||||||
id: deployment
|
|
||||||
uses: actions/deploy-pages@v4
|
|
48
.github/workflows/examples.yml
vendored
48
.github/workflows/examples.yml
vendored
@ -1,48 +0,0 @@
|
|||||||
name: Examples Tests
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
paths:
|
|
||||||
- examples/addon.node/**
|
|
||||||
- whisper.h
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- examples/addon.node/**
|
|
||||||
- whisper.h
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
addon_node-ubuntu-22:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
node-version: [ 16.x, 18.x ]
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v1
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential git
|
|
||||||
sudo apt-get install cmake
|
|
||||||
sudo apt-get install libsdl2-dev
|
|
||||||
|
|
||||||
- name: Use Node.js ${{ matrix.node-version }}
|
|
||||||
uses: actions/setup-node@v1
|
|
||||||
with:
|
|
||||||
node-version: ${{ matrix.node-version }}
|
|
||||||
cache: 'npm'
|
|
||||||
|
|
||||||
- name: Install package.json dependencies
|
|
||||||
working-directory: ./examples/addon.node
|
|
||||||
run: npm install
|
|
||||||
|
|
||||||
- name: Compile addon.node
|
|
||||||
run: npx cmake-js compile -T addon.node -B Release
|
|
||||||
|
|
||||||
- name: Download test model
|
|
||||||
run: |
|
|
||||||
bash ./models/download-ggml-model.sh base.en
|
|
||||||
- name: Test
|
|
||||||
run: |
|
|
||||||
cd examples/addon.node
|
|
||||||
npm run test
|
|
46
.gitignore
vendored
46
.gitignore
vendored
@ -1,42 +1,23 @@
|
|||||||
*.o
|
*.o
|
||||||
*.a
|
|
||||||
*.d
|
|
||||||
.cache/
|
.cache/
|
||||||
.coreml/
|
|
||||||
.test/
|
|
||||||
.venv/
|
|
||||||
.vs/
|
.vs/
|
||||||
.vscode/
|
.vscode/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.vimspector.json
|
|
||||||
/CMakeSettings.json
|
|
||||||
/talk-llama.dSYM/
|
|
||||||
|
|
||||||
build/
|
build/
|
||||||
build-*/
|
build-em/
|
||||||
build_*/
|
build-debug/
|
||||||
|
build-release/
|
||||||
# SPM
|
build-sanitize-addr/
|
||||||
.build/
|
build-sanitize-thread/
|
||||||
.swiftpm
|
|
||||||
*.metallib
|
|
||||||
|
|
||||||
ggml-metal-embed.metal
|
|
||||||
ggml-metal-embed.metal.tmp
|
|
||||||
|
|
||||||
/main
|
/main
|
||||||
/stream
|
/stream
|
||||||
/command
|
/command
|
||||||
/talk
|
/talk
|
||||||
/talk-llama
|
|
||||||
/bench
|
/bench
|
||||||
/quantize
|
|
||||||
/server
|
|
||||||
/lsp
|
|
||||||
|
|
||||||
arm_neon.h
|
|
||||||
sync.sh
|
sync.sh
|
||||||
libwhisper.a
|
|
||||||
libwhisper.so
|
libwhisper.so
|
||||||
compile_commands.json
|
compile_commands.json
|
||||||
|
|
||||||
@ -46,20 +27,3 @@ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
|
|||||||
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
|
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
|
||||||
|
|
||||||
extra/bench-gg.txt
|
extra/bench-gg.txt
|
||||||
|
|
||||||
models/*.mlmodel
|
|
||||||
models/*.mlmodelc
|
|
||||||
models/*.mlpackage
|
|
||||||
models/*-encoder-openvino.xml
|
|
||||||
models/*-encoder-openvino-cache/
|
|
||||||
bindings/java/.gradle/
|
|
||||||
bindings/java/.idea/
|
|
||||||
.idea/
|
|
||||||
|
|
||||||
benchmark_results.csv
|
|
||||||
cmake-build-debug/
|
|
||||||
.cxx/
|
|
||||||
.gradle/
|
|
||||||
local.properties
|
|
||||||
.log
|
|
||||||
.exe
|
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
[submodule "bindings/ios"]
|
||||||
|
path = bindings/ios
|
||||||
|
url = https://github.com/ggerganov/whisper.spm
|
510
AUTHORS
510
AUTHORS
@ -1,510 +0,0 @@
|
|||||||
# date: Tue Feb 4 13:03:35 EET 2025
|
|
||||||
# this file is auto-generated by scripts/gen-authors.sh
|
|
||||||
|
|
||||||
0/0 <zero@imaskeleton.me>
|
|
||||||
0cc4m <picard12@live.de>
|
|
||||||
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
|
|
||||||
65a <10104049+65a@users.noreply.github.com>
|
|
||||||
AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
|
|
||||||
AT <manyoso@users.noreply.github.com>
|
|
||||||
Aarni Koskela <akx@iki.fi>
|
|
||||||
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
|
|
||||||
Aaron Taylor <aaron@exphat.com>
|
|
||||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
|
||||||
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
|
|
||||||
Adam Jones <domdomegg+git@gmail.com>
|
|
||||||
Adrien Gallouët <adrien@gallouet.fr>
|
|
||||||
Adrien Gallouët <angt@huggingface.co>
|
|
||||||
AfryMask <AfryMask@163.com>
|
|
||||||
Ahmad Bilal <ahmad.bilal@empglabs.com>
|
|
||||||
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
|
||||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
|
||||||
AidanBeltonS <aidan.belton@codeplay.com>
|
|
||||||
Akarshan Biswas <akarshan.biswas@gmail.com>
|
|
||||||
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
|
||||||
Akash Mahajan <akash7190@gmail.com>
|
|
||||||
Akash Mahajan <akashmjn@stanford.edu>
|
|
||||||
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
|
|
||||||
Alan <unknown>
|
|
||||||
Albert Jin <albert.jin@gmail.com>
|
|
||||||
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
|
||||||
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
|
||||||
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
|
|
||||||
Alex Azarov <alex@azarov.by>
|
|
||||||
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
|
|
||||||
Alex Evgrashin <aevgrashin@yandex.ru>
|
|
||||||
Alex O'Connell <35843486+acon96@users.noreply.github.com>
|
|
||||||
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
|
|
||||||
Alexandru Mariuti <alex@mariuti.com>
|
|
||||||
Alexey Kharlamov <alexey@kharlamov.biz>
|
|
||||||
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
|
|
||||||
Ali Alameh <ali.alameh@isae.edu.lb>
|
|
||||||
Alter <0x7c48@gmail.com>
|
|
||||||
Ananta Bastola <anantarajbastola@gmail.com>
|
|
||||||
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
|
|
||||||
Andreas Lubbe <git@lubbe.org>
|
|
||||||
Andreu Huguet <andreuhuguet@gmail.com>
|
|
||||||
Andrew Huynh <a5thuynh@gmail.com>
|
|
||||||
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
|
||||||
Andrew S <andrews54757@gmail.com>
|
|
||||||
Andy Maloney <asmaloney@gmail.com>
|
|
||||||
Anton Kostin <masguit42@users.noreply.github.com>
|
|
||||||
Artyom Mezin <psycho.fading@gmail.com>
|
|
||||||
Asad Memon <asad.lionpk@gmail.com>
|
|
||||||
Ashraful Islam <ashraful.meche@gmail.com>
|
|
||||||
AsukaMinato <asukaminato@nyan.eu.org>
|
|
||||||
AustinMroz <austinmroz@utexas.edu>
|
|
||||||
Avik Sengupta <avik@sengupta.net>
|
|
||||||
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
|
|
||||||
Baffin Lee <baffinlee@gmail.com>
|
|
||||||
Ben Ashbaugh <ben.ashbaugh@intel.com>
|
|
||||||
Ben Nortier <bjnortier@gmail.com>
|
|
||||||
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
|
|
||||||
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
|
|
||||||
Binozo <70137898+Binozo@users.noreply.github.com>
|
|
||||||
Bo-Yi Wu <appleboy.tw@gmail.com>
|
|
||||||
Boris Bliznioukov <blib@mail.com>
|
|
||||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
|
||||||
Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
|
|
||||||
Brian Murray <brian@bmurray.ca>
|
|
||||||
CRD716 <crd716@gmail.com>
|
|
||||||
Canis Lupus <Canis-UK@users.noreply.github.com>
|
|
||||||
Carlos Zoido <mrgalleta@gmail.com>
|
|
||||||
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
|
||||||
CarterLi999 <664681047@qq.com>
|
|
||||||
ChangSeok Oh <shivamidow@users.noreply.github.com>
|
|
||||||
Changyeon Kim <cyzero.kim@samsung.com>
|
|
||||||
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
|
|
||||||
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
|
||||||
Charles Xu <charles.xu@arm.com>
|
|
||||||
Chen Xi <xi2.chen@intel.com>
|
|
||||||
Chen Xi <xixichen08@foxmail.com>
|
|
||||||
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
|
|
||||||
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
|
|
||||||
Chidi Williams <williamschidi1@gmail.com>
|
|
||||||
Chris Elrod <elrodc@gmail.com>
|
|
||||||
Christian <12550267+iceychris@users.noreply.github.com>
|
|
||||||
Christian Kastner <ckk@kvr.at>
|
|
||||||
Clifford Heath <clifford.heath@gmail.com>
|
|
||||||
Clint Herron <hanclinto@gmail.com>
|
|
||||||
Colin <github@whoisc.cc>
|
|
||||||
Conrad Kramer <conrad@conradkramer.com>
|
|
||||||
Corey Earwood <iamcgn+github@gmail.com>
|
|
||||||
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
|
||||||
DAN™ <dranger003@gmail.com>
|
|
||||||
DGdev91 <DGdev91@users.noreply.github.com>
|
|
||||||
Damian Czaja <trojan295@protonmail.com>
|
|
||||||
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
|
|
||||||
Dan Johansson <dan.johansson@arm.com>
|
|
||||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
|
||||||
Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
|
|
||||||
Daniel Ziegenberg <daniel@ziegenberg.at>
|
|
||||||
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
|
||||||
Dave <dave-fl@users.noreply.github.com>
|
|
||||||
Dave Airlie <airlied@gmail.com>
|
|
||||||
Dave Airlie <airlied@redhat.com>
|
|
||||||
Daven Sanassy <daven@vochlea.co.uk>
|
|
||||||
David <dnhkng@gmail.com>
|
|
||||||
David Thorpe <djt@mutablelogic.com>
|
|
||||||
DavidKorczynski <david@adalogics.com>
|
|
||||||
Davidson Francis <davidsondfgl@gmail.com>
|
|
||||||
Dener Stassun <denerstassun@gmail.com>
|
|
||||||
Dibakar Gope <dibakar.gope@arm.com>
|
|
||||||
Didzis Gosko <didzis@users.noreply.github.com>
|
|
||||||
Diego Devesa <slarengh@gmail.com>
|
|
||||||
Digipom <admin@digipom.com>
|
|
||||||
Dimo <dimo@ieee.org>
|
|
||||||
Djip007 <3705339+Djip007@users.noreply.github.com>
|
|
||||||
Djip007 <djip.perois@free.fr>
|
|
||||||
Dody Suria Wijaya <dodysw@gmail.com>
|
|
||||||
Dou Xinpeng <15529241576@163.com>
|
|
||||||
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
|
|
||||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
|
||||||
Duncan McConnell <ddmcconnell4@gmail.com>
|
|
||||||
Egor Egorov <me@egorfine.com>
|
|
||||||
Elkana Bardugo <ttv200@gmail.com>
|
|
||||||
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
|
|
||||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
|
||||||
Eric Curtin <ericcurtin17@gmail.com>
|
|
||||||
Eric Swanson <eswanson@alloscomp.com>
|
|
||||||
Eric Tendian <erictendian@gmail.com>
|
|
||||||
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
|
||||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
|
||||||
Evan Jones <evan.q.jones@gmail.com>
|
|
||||||
Evan Martin <evan.martin@gmail.com>
|
|
||||||
Eve <139727413+netrunnereve@users.noreply.github.com>
|
|
||||||
Evgeny Kuznetsov <evgeny@kuznetsov.md>
|
|
||||||
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
|
|
||||||
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
|
|
||||||
Fangjun Kuang <csukuangfj@gmail.com>
|
|
||||||
Felix <stenbackfelix@gmail.com>
|
|
||||||
Finn Voorhees <finnvoorhees@gmail.com>
|
|
||||||
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
|
||||||
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
|
|
||||||
Frankie Robertson <frankier@users.noreply.github.com>
|
|
||||||
Gang Chen <goncha@gmail.com>
|
|
||||||
Gavin Cai <gavin1818@hotmail.com>
|
|
||||||
George Hindle <george@georgehindle.com>
|
|
||||||
Georgi Gerganov <ggerganov@gmail.com>
|
|
||||||
Gilad S <7817232+giladgd@users.noreply.github.com>
|
|
||||||
Gilad S <giladgd@users.noreply.github.com>
|
|
||||||
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
|
||||||
GitAritron <103900385+GitAritron@users.noreply.github.com>
|
|
||||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
|
||||||
Gleicon Moraes <gleicon@gmail.com>
|
|
||||||
Gregor Jasny <gjasny@googlemail.com>
|
|
||||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
|
||||||
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
|
|
||||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
|
||||||
Hang <bebound@gmail.com>
|
|
||||||
Haus1 <haus.xda@gmail.com>
|
|
||||||
Herman Semenov <GermanAizek@yandex.ru>
|
|
||||||
HimariO <dsfhe49854@gmail.com>
|
|
||||||
Hong Bo PENG <penghb@cn.ibm.com>
|
|
||||||
Hrishikesh Barman <geekodour@users.noreply.github.com>
|
|
||||||
Hugo <hugo@whynothugo.nl>
|
|
||||||
Ian Bicking <ian@ianbicking.org>
|
|
||||||
Ian Bull <irbull@eclipsesource.com>
|
|
||||||
Ihar Hrachyshka <ihrachys@redhat.com>
|
|
||||||
Ikko Ashimine <eltociear@gmail.com>
|
|
||||||
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
|
||||||
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
|
|
||||||
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
|
|
||||||
Ivan <nekotekina@gmail.com>
|
|
||||||
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
|
||||||
Ivan Gorin <ivangorin21@gmail.com>
|
|
||||||
Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
|
|
||||||
JJ <103335846+computerscienceiscool@users.noreply.github.com>
|
|
||||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
|
||||||
JacobLinCool <jacoblincool@gmail.com>
|
|
||||||
Jakub Ráček <blizzcz@gmail.com>
|
|
||||||
Jared Van Bortel <jared@nomic.ai>
|
|
||||||
Jay Binks <jaybinks@gmail.com>
|
|
||||||
Jayant <jayantyadav202@gmail.com>
|
|
||||||
Jeff Bolz <jbolz@nvidia.com>
|
|
||||||
Jeroen Mostert <jeroen.mostert@cm.com>
|
|
||||||
Jhen-Jie Hong <developer@jhen.me>
|
|
||||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
|
||||||
JidongZhang-THU <1119708529@qq.com>
|
|
||||||
Jo Liss <joliss42@gmail.com>
|
|
||||||
Joe Todd <joe.todd@codeplay.com>
|
|
||||||
Johan <jr.raffin@gmail.com>
|
|
||||||
Johannes Gäßler <johannesg@5d6.de>
|
|
||||||
John Balis <phobossystems@gmail.com>
|
|
||||||
JohnnyB <jboero@users.noreply.github.com>
|
|
||||||
Jonathan Soo <jcsoo@agora.com>
|
|
||||||
Jonno <1160532+razodactyl@users.noreply.github.com>
|
|
||||||
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
|
|
||||||
Jose <34888496+Jerry-Master@users.noreply.github.com>
|
|
||||||
Josh Bleecher Snyder <josharian@gmail.com>
|
|
||||||
Josscii <jossciiweiyi@gmail.com>
|
|
||||||
Judd <foldl@users.noreply.github.com>
|
|
||||||
Jumper775 <78500318+jumpers775@users.noreply.github.com>
|
|
||||||
Jun Hee Yoo <contact.jhyoo@gmail.com>
|
|
||||||
Junil Kim <logyourself@gmail.com>
|
|
||||||
Justina Cho <justcho5@gmail.com>
|
|
||||||
Justine Tunney <jtunney@gmail.com>
|
|
||||||
Justine Tunney <jtunney@mozilla.com>
|
|
||||||
KITAITI Makoto <KitaitiMakoto@gmail.com>
|
|
||||||
KP Kaiser <kirk@zothcorp.com>
|
|
||||||
Kamilake <exjang0@gmail.com>
|
|
||||||
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
|
||||||
Karthick <j.karthic2004@gmail.com>
|
|
||||||
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
|
|
||||||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
|
||||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
|
||||||
Kendrick Taylor <kendrick@circuitsix.com>
|
|
||||||
Kevin Brothaler <admin@digipom.com>
|
|
||||||
Kevin Gibbons <bakkot@gmail.com>
|
|
||||||
Konosuke Sakai <konosuke@konosuke.work>
|
|
||||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
|
||||||
Kreijstal <rainb@tfwno.gf>
|
|
||||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
|
||||||
LBlue <153975653+lbluep@users.noreply.github.com>
|
|
||||||
Larry Battle <larry.battle.tech@gmail.com>
|
|
||||||
Laytan Laats <laytanlaats@hotmail.com>
|
|
||||||
Leo Moll <leo.moll@yeasoft.com>
|
|
||||||
Lexevolution <31176843+Lexevolution@users.noreply.github.com>
|
|
||||||
LittleLoli <26589867+WhichWho@users.noreply.github.com>
|
|
||||||
Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
|
|
||||||
Luis Herrera <herrera-luis@users.noreply.github.com>
|
|
||||||
Lukas Rist <glaslos@gmail.com>
|
|
||||||
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
|
|
||||||
M. Eren Akbiyik <erenakbiyik@gmail.com>
|
|
||||||
Ma Mingfei <mingfei.ma@intel.com>
|
|
||||||
Maciek <maciek.mab122@gmail.com>
|
|
||||||
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
|
|
||||||
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
|
|
||||||
Mark Karpelès <MagicalTux@users.noreply.github.com>
|
|
||||||
Mark Zhuang <zhuangqiubin@gmail.com>
|
|
||||||
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
|
||||||
Martin Delille <martin@delille.org>
|
|
||||||
Martin Warnaar <martinwarnaar@gmail.com>
|
|
||||||
Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
|
|
||||||
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
|
|
||||||
Mathieu Baudier <mbaudier@argeo.org>
|
|
||||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
|
||||||
Matija Pevec <mightymatth@users.noreply.github.com>
|
|
||||||
Matt Stephenson <mstephenson6@users.noreply.github.com>
|
|
||||||
Max Krasnyansky <max.krasnyansky@gmail.com>
|
|
||||||
Max Krasnyansky <quic_maxk@quicinc.com>
|
|
||||||
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
|
|
||||||
Meng, Hengyu <hengyu.meng@intel.com>
|
|
||||||
Mengqing Cao <cmq0113@163.com>
|
|
||||||
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
|
||||||
Michael Rienstra <mrienstra@gmail.com>
|
|
||||||
Mikhail Grigorev <sleuthhound@gmail.com>
|
|
||||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
|
||||||
Mohit Agarwal <mohit@sdf.org>
|
|
||||||
Molly Sophia <mollysophia379@gmail.com>
|
|
||||||
Murilo Santana <mvrilo@gmail.com>
|
|
||||||
NETZkultur GmbH <mulholland@netzkultur.de>
|
|
||||||
Natsu <chino@hotococoa.moe>
|
|
||||||
Neil Chudleigh <nchudleigh@users.noreply.github.com>
|
|
||||||
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
|
||||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
|
||||||
Neuman Vong <neuman.vong@gmail.com>
|
|
||||||
Nicholai Tukanov <nicholaitukanov@gmail.com>
|
|
||||||
Nicholas Albion <nalbion@yahoo.com>
|
|
||||||
Nico Bosshard <nico@bosshome.ch>
|
|
||||||
Nicolò Scipione <nicolo.scipione@codeplay.com>
|
|
||||||
Niels Mayer <Niels.Mayer@gmail.com>
|
|
||||||
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
|
|
||||||
Nikolaj Olsson <nikse.dk@gmail.com>
|
|
||||||
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
|
|
||||||
Oleg Sidorov <me@whitebox.io>
|
|
||||||
Oleg Sidorov <oleg@sidorov.nl>
|
|
||||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
|
||||||
Ondrej Kokes <ondrej.kokes@gmail.com>
|
|
||||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
|
||||||
PAB <pierreantoine.bannier@gmail.com>
|
|
||||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
|
||||||
Pedro Probst <pprobst@insiberia.net>
|
|
||||||
Peng <hzp1024@qq.com>
|
|
||||||
Peter <peter277@users.noreply.github.com>
|
|
||||||
Philipp Zabel <philipp.zabel@gmail.com>
|
|
||||||
Philippe Normand <phil@base-art.net>
|
|
||||||
Philippe Normand <philn@igalia.com>
|
|
||||||
Plamen Minev <pacominev@gmail.com>
|
|
||||||
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
|
||||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
|
||||||
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
|
|
||||||
R0CKSTAR <xiaodong.ye@mthreads.com>
|
|
||||||
R0CKSTAR <yeahdongcn@gmail.com>
|
|
||||||
Radoslav Gerganov <rgerganov@gmail.com>
|
|
||||||
Radosław Gryta <radek.gryta@gmail.com>
|
|
||||||
Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
|
|
||||||
Raiya Araki <83504221+rai62@users.noreply.github.com>
|
|
||||||
Reinforce-II <fate@eastal.com>
|
|
||||||
Reinis Muiznieks <muiznieks.reinis@gmail.com>
|
|
||||||
RelatedTitle <r3latedtitle@gmail.com>
|
|
||||||
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
|
||||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
|
||||||
Rich Jones <miserlou@gmail.com>
|
|
||||||
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
|
|
||||||
Robin <robin.xw@hotmail.com>
|
|
||||||
Roddur Dasgupta <roddurd@gmail.com>
|
|
||||||
Roland Rabien <figbug@gmail.com>
|
|
||||||
Romain Biessy <romain.biessy@codeplay.com>
|
|
||||||
Ronsor <ronsor@ronsor.pw>
|
|
||||||
Rotem Dan <rotemdan@gmail.com>
|
|
||||||
Ryan Hitchman <hitchmanr@gmail.com>
|
|
||||||
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
|
|
||||||
RyanChang <ftes90015@gmail.com>
|
|
||||||
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
|
||||||
SXX <sxx1136965276@gmail.com>
|
|
||||||
Sacha Arbonel <sacha.arbonel@hotmail.fr>
|
|
||||||
Salman Faroz <stsfaroz@gmail.com>
|
|
||||||
Salvatore Mesoraca <s.mesoraca16@gmail.com>
|
|
||||||
Sam <49637763+Onlyartist9@users.noreply.github.com>
|
|
||||||
Sam Pullara <spullara@gmail.com>
|
|
||||||
Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
|
|
||||||
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
|
|
||||||
Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
|
|
||||||
Sergio López <slp@redhat.com>
|
|
||||||
Sergio López <slp@sinrega.org>
|
|
||||||
Shanshan Shen <467638484@qq.com>
|
|
||||||
Shijie <821898965@qq.com>
|
|
||||||
Shupei Fan <dymarkfan@outlook.com>
|
|
||||||
Siddharth Ramakrishnan <srr2141@columbia.edu>
|
|
||||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
|
||||||
Simon Moisselin <simon.moisstoll@gmail.com>
|
|
||||||
Sindre Sorhus <sindresorhus@gmail.com>
|
|
||||||
Slava Primenko <primenko.s@gmail.com>
|
|
||||||
Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
|
|
||||||
Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
|
|
||||||
Stefan Sydow <s.sydow@heinlein-video.de>
|
|
||||||
Stefan Sydow <stefan@sydow.email>
|
|
||||||
Syahmi Azhar <prsyahmi@gmail.com>
|
|
||||||
Syed Jafri <syedjafri97@gmail.com>
|
|
||||||
Sơn Phan Trung <phantrungson17@gmail.com>
|
|
||||||
Taisei Mima <bhbstar.me@gmail.com>
|
|
||||||
Takeshi Inoue <inoue.takeshi@gmail.com>
|
|
||||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
|
||||||
Taras Glek <taras@thegp.com>
|
|
||||||
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
|
|
||||||
Thamster <Thamster@users.noreply.github.com>
|
|
||||||
Thijs Raymakers <thijs@raymakers.nl>
|
|
||||||
Thomas Fitzsimmons <fitzsim@fitzsim.org>
|
|
||||||
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
|
|
||||||
Tienshiao Ma <tienshiao@tienshiao.org>
|
|
||||||
Tim Miller <drasticactions@users.noreply.github.com>
|
|
||||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
|
||||||
Tobrun <tobrun.van.nuland@gmail.com>
|
|
||||||
Todd <taf2@users.noreply.github.com>
|
|
||||||
Toliver <teejae@gmail.com>
|
|
||||||
Tong Li <31761981+litongjava@users.noreply.github.com>
|
|
||||||
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
|
|
||||||
Topping1 <78745143+Topping1@users.noreply.github.com>
|
|
||||||
Travis Cline <travis.cline@gmail.com>
|
|
||||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
|
||||||
UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
|
|
||||||
Vadim Peretokin <vperetokin@hey.com>
|
|
||||||
Valentin Gosu <1454649+valenting@users.noreply.github.com>
|
|
||||||
Vin Misra <vinith@alum.mit.edu>
|
|
||||||
Vulcan <93451215+trholding@users.noreply.github.com>
|
|
||||||
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
|
|
||||||
William Tambellini <william.tambellini@gmail.com>
|
|
||||||
William Tambellini <wtambellini@sdl.com>
|
|
||||||
Wilson Silva <wilson.dsigns@gmail.com>
|
|
||||||
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
|
||||||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
|
||||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
|
||||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
|
||||||
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
|
|
||||||
Xuan Son Nguyen <thichthat@gmail.com>
|
|
||||||
Yajing Tang <phillis@google.com>
|
|
||||||
Yang Shen <aplshenyang@gmail.com>
|
|
||||||
Yunès <jean.baptiste.yunes@free.fr>
|
|
||||||
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
|
|
||||||
Yusuf Redžić <48274562+redzic@users.noreply.github.com>
|
|
||||||
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
|
|
||||||
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
|
|
||||||
Zhiyuan Li <lizhiyuan@uniartisan.com>
|
|
||||||
Zhiyuan Li <uniartisan2017@gmail.com>
|
|
||||||
Zigfrid Zvezdin <ziggerZZ@gmail.com>
|
|
||||||
Zollner <24618122+Zolliner@users.noreply.github.com>
|
|
||||||
a3sh <38979186+A3shTnT@users.noreply.github.com>
|
|
||||||
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
|
|
||||||
agray3 <agray3@users.noreply.github.com>
|
|
||||||
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
|
|
||||||
aldorof <aldorof@users.noreply.github.com>
|
|
||||||
alonfaraj <alonfaraj@gmail.com>
|
|
||||||
amd-dwang <dong.wang@amd.com>
|
|
||||||
amritahs-ibm <amritahs@linux.vnet.ibm.com>
|
|
||||||
andypayne <apayne@gmail.com>
|
|
||||||
ardfork <134447697+ardfork@users.noreply.github.com>
|
|
||||||
arizhih <40765267+arizhih@users.noreply.github.com>
|
|
||||||
automaticcat <daogiatuank54@gmail.com>
|
|
||||||
bandoti <141645996+bandoti@users.noreply.github.com>
|
|
||||||
be-next <jerome.ramette@gmail.com>
|
|
||||||
bert hubert <bert@hubertnet.nl>
|
|
||||||
billyct <billy_allen@126.com>
|
|
||||||
bmwl <brian.marshall@tolko.com>
|
|
||||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
|
||||||
bocytko <bocytko+github@gmail.com>
|
|
||||||
boolemancer <48014766+boolemancer@users.noreply.github.com>
|
|
||||||
boolemancer <boolemancer@gmail.com>
|
|
||||||
bradmit <151883577+bradmit@users.noreply.github.com>
|
|
||||||
brunofaustino <b.fa.amorim@gmail.com>
|
|
||||||
bssrdf <merlintiger@hotmail.com>
|
|
||||||
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
|
||||||
cdosoftei <ciprian.dosoftei@gmail.com>
|
|
||||||
clach04 <Chris.Clark@actian.com>
|
|
||||||
compilade <113953597+compilade@users.noreply.github.com>
|
|
||||||
compilade <git@compilade.net>
|
|
||||||
conradg <conradjgodfrey@gmail.com>
|
|
||||||
crummyh <elijah@crums.us>
|
|
||||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
|
||||||
denersc <denerstassun@gmail.com>
|
|
||||||
dscripka <dscripka@users.noreply.github.com>
|
|
||||||
duthils <duthils@duthils.net>
|
|
||||||
ecneladis <ecneladis@users.noreply.github.com>
|
|
||||||
faker <nspyia2002@gmail.com>
|
|
||||||
fitzsim <fitzsim@fitzsim.org>
|
|
||||||
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
|
||||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
|
||||||
genevera (she/her) <genevera@users.noreply.github.com>
|
|
||||||
geniusnut <geniusnut@gmail.com>
|
|
||||||
gilbertgong <gilbert.gong@gmail.com>
|
|
||||||
gn64 <yukikaze.jp@gmail.com>
|
|
||||||
goldwaving <77494627+goldwaving@users.noreply.github.com>
|
|
||||||
greeshmay <greeshmay@gmail.com>
|
|
||||||
haopeng <657407891@qq.com>
|
|
||||||
hipudding <huafengchun@gmail.com>
|
|
||||||
hsinhoyeh <yhh92u@gmail.com>
|
|
||||||
hydai <z54981220@gmail.com>
|
|
||||||
iamthad <thadeus.j.fleming@gmail.com>
|
|
||||||
issixx <46835150+issixx@users.noreply.github.com>
|
|
||||||
james wolf <contractorwolf@hotmail.com>
|
|
||||||
jdomke <28772296+jdomke@users.noreply.github.com>
|
|
||||||
jettoblack <jettoblack@gmail.com>
|
|
||||||
jiez <373447296@qq.com>
|
|
||||||
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
|
||||||
jorismertz <35079666+jorismertz@users.noreply.github.com>
|
|
||||||
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
|
||||||
junkfood <69683722+JunkFood02@users.noreply.github.com>
|
|
||||||
jwijffels <jwijffels@bnosac.be>
|
|
||||||
k.h.lai <adrian.k.h.lai@outlook.com>
|
|
||||||
kamranjon <kamranjon@gmail.com>
|
|
||||||
katsu560 <katsu560oo-@docomo.ne.jp>
|
|
||||||
kennethge <57784063+kenneth-ge@users.noreply.github.com>
|
|
||||||
keyehzy <msamuel@aluno.puc-rio.br>
|
|
||||||
kunnis <kunnis@users.noreply.github.com>
|
|
||||||
l3utterfly <gc.pthzfoldr@gmail.com>
|
|
||||||
leejet <leejet714@gmail.com>
|
|
||||||
leo-pony <nengjunma@outlook.com>
|
|
||||||
lhez <quic_lih@quicinc.com>
|
|
||||||
litong <31761981+litongjava@users.noreply.github.com>
|
|
||||||
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
|
||||||
lnyan <lkwq007@gmail.com>
|
|
||||||
luoyu-intel <yu.luo@intel.com>
|
|
||||||
m.bell <m.bell@techsmith.com>
|
|
||||||
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
|
||||||
mashizora <30516315+mashizora@users.noreply.github.com>
|
|
||||||
matt23654 <matthew.webber@protonmail.com>
|
|
||||||
matteo <matteogeniaccio@yahoo.it>
|
|
||||||
mgrachten <maarten@grachten.eu>
|
|
||||||
mkiol <mkiol@users.noreply.github.com>
|
|
||||||
mky_coder <47767389+mkycoder@users.noreply.github.com>
|
|
||||||
novag <7754358+novag@users.noreply.github.com>
|
|
||||||
pajowu <pajowu@pajowu.de>
|
|
||||||
pengxin99 <pengxin.yuan@intel.com>
|
|
||||||
petterreinholdtsen <pere-github@hungry.com>
|
|
||||||
polarmoon <90010972+polarmoon@users.noreply.github.com>
|
|
||||||
rlapray <lapray.romain@gmail.com>
|
|
||||||
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
|
|
||||||
semiformal-net <84111142+semiformal-net@users.noreply.github.com>
|
|
||||||
shibukazu <61775791+shibukazu@users.noreply.github.com>
|
|
||||||
shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
|
|
||||||
slaren <slarengh@gmail.com>
|
|
||||||
slashlib <slashlib@users.noreply.github.com>
|
|
||||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
|
||||||
someone13574 <81528246+someone13574@users.noreply.github.com>
|
|
||||||
st-gr <38470677+st-gr@users.noreply.github.com>
|
|
||||||
stduhpf <stephduh@live.fr>
|
|
||||||
stormofice <58337328+stormofice@users.noreply.github.com>
|
|
||||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
|
||||||
thefinaldegree <thefinaldegree@gmail.com>
|
|
||||||
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
|
||||||
toboil-features <160222185+toboil-features@users.noreply.github.com>
|
|
||||||
trixirt <trix@redhat.com>
|
|
||||||
ulatekh <ulatekh@yahoo.com>
|
|
||||||
undef <undefdev@gmail.com>
|
|
||||||
uvos <devnull@uvos.xyz>
|
|
||||||
uvos <philipp@uvos.xyz>
|
|
||||||
valVk <valVk@users.noreply.github.com>
|
|
||||||
venkr <venkateshrameshkumar+1@gmail.com>
|
|
||||||
vicalloy <zbirder@gmail.com>
|
|
||||||
wangshuai09 <391746016@qq.com>
|
|
||||||
woachk <24752637+woachk@users.noreply.github.com>
|
|
||||||
xctan <axunlei@gmail.com>
|
|
||||||
xdrudis <xavierdrudis@yahoo.es>
|
|
||||||
yuri@FreeBSD <yuri@FreeBSD>
|
|
||||||
zhangjixiong <code.zjx@gmail.com>
|
|
||||||
zhentaoyu <zhentao.yu@intel.com>
|
|
||||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
|
||||||
zhouwg <zhouwg2000@gmail.com>
|
|
||||||
谢乃闻 <sienaiwun@users.noreply.github.com>
|
|
||||||
布客飞龙 <562826179@qq.com>
|
|
||||||
Артём Земляк <azemlyak@smart-consulting.ru>
|
|
369
CMakeLists.txt
369
CMakeLists.txt
@ -1,31 +1,21 @@
|
|||||||
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
cmake_minimum_required (VERSION 3.0)
|
||||||
project("whisper.cpp" C CXX)
|
|
||||||
project("whisper.cpp" VERSION 1.7.5)
|
|
||||||
include(CheckIncludeFileCXX)
|
|
||||||
|
|
||||||
set(SOVERSION 1)
|
project(whisper.cpp VERSION 1.0.4)
|
||||||
|
|
||||||
#set(CMAKE_WARN_DEPRECATED YES)
|
|
||||||
set(CMAKE_WARN_UNUSED_CLI YES)
|
|
||||||
|
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
||||||
|
|
||||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
|
||||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Add path to modules
|
# Add path to modules
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||||
|
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|
||||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
set(WHISPER_STANDALONE ON)
|
set(WHISPER_STANDALONE ON)
|
||||||
|
include(GitVars)
|
||||||
include(git-vars)
|
include(BuildTypes)
|
||||||
|
|
||||||
# configure project version
|
# configure project version
|
||||||
|
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
|
||||||
|
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
|
||||||
|
endif()
|
||||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
||||||
else()
|
else()
|
||||||
set(WHISPER_STANDALONE OFF)
|
set(WHISPER_STANDALONE OFF)
|
||||||
@ -35,16 +25,6 @@ if (EMSCRIPTEN)
|
|||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
|
|
||||||
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
||||||
|
|
||||||
# TODO: without these, we get the following error:
|
|
||||||
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
|
||||||
|
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s TOTAL_STACK=5242880")
|
|
||||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s TOTAL_STACK=5242880")
|
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
|
|
||||||
else()
|
else()
|
||||||
if (MINGW)
|
if (MINGW)
|
||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
@ -53,199 +33,220 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
# options
|
||||||
|
|
||||||
#
|
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||||
# option list
|
|
||||||
#
|
|
||||||
|
|
||||||
# debug
|
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
|
||||||
|
|
||||||
# build
|
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||||
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||||
option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF)
|
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
|
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||||
|
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||||
|
|
||||||
|
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
|
||||||
|
|
||||||
|
if (APPLE)
|
||||||
|
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
||||||
|
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
||||||
|
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
||||||
|
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
||||||
|
else()
|
||||||
|
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
||||||
|
|
||||||
# sanitizers
|
# sanitizers
|
||||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
|
||||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
|
||||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
|
||||||
|
|
||||||
# extra artifacts
|
if (NOT MSVC)
|
||||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
if (WHISPER_SANITIZE_THREAD)
|
||||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
|
||||||
option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
|
||||||
|
|
||||||
# 3rd party libs
|
|
||||||
option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
|
|
||||||
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
|
||||||
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
||||||
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
|
||||||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
|
||||||
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
|
||||||
|
|
||||||
# Required for relocatable CMake package
|
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
||||||
|
|
||||||
# override ggml options
|
|
||||||
set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
|
|
||||||
set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
|
|
||||||
set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
|
|
||||||
set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
|
|
||||||
set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
|
|
||||||
|
|
||||||
# transition helpers
|
|
||||||
function (whisper_option_depr TYPE OLD NEW)
|
|
||||||
if (${OLD})
|
|
||||||
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
|
||||||
set(${NEW} ON)
|
|
||||||
endif()
|
endif()
|
||||||
endfunction()
|
|
||||||
|
|
||||||
whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
|
if (WHISPER_SANITIZE_ADDRESS)
|
||||||
whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
||||||
whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
||||||
whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
|
endif()
|
||||||
whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
|
||||||
whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
|
|
||||||
whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
|
|
||||||
whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
|
|
||||||
whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
|
|
||||||
whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
|
||||||
whisper_option_depr(WARNING WHISPER_CCACHE GGML_CCACHE)
|
|
||||||
|
|
||||||
if (GGML_CUDA AND NOT MSVC)
|
if (WHISPER_SANITIZE_UNDEFINED)
|
||||||
#GGML_CUDA enabled, add the necessary compile options -Wno-deprecated-gpu-targets
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
|
||||||
add_compile_options(-Wno-deprecated-gpu-targets)
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#
|
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
|
||||||
# build the library
|
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
||||||
#
|
|
||||||
|
|
||||||
if (NOT TARGET ggml)
|
# dependencies
|
||||||
if (WHISPER_USE_SYSTEM_GGML)
|
|
||||||
find_package(ggml REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
if (NOT ggml_FOUND)
|
|
||||||
message(FATAL_ERROR "System-installed GGML library not found.")
|
# on APPLE - include Accelerate framework
|
||||||
endif()
|
if (APPLE AND NOT WHISPER_NO_ACCELERATE)
|
||||||
add_library(ggml ALIAS ggml::ggml)
|
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||||
|
if (ACCELERATE_FRAMEWORK)
|
||||||
|
message(STATUS "Accelerate framework found")
|
||||||
|
|
||||||
|
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
|
||||||
|
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
|
||||||
else()
|
else()
|
||||||
add_subdirectory(ggml)
|
message(WARNING "Accelerate framework not found")
|
||||||
if(WIN32)
|
endif()
|
||||||
# The following adds a _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR macro and is a workaround for
|
endif()
|
||||||
# the Windows C++ standard library which does not support constexpr mutexes.
|
|
||||||
# From the release notes://github.com/microsoft/STL/wiki/Changelog
|
if (WHISPER_SUPPORT_OPENBLAS)
|
||||||
# Disable constexpr mutex constructor on Windows
|
find_library(OPENBLAS_LIB
|
||||||
# Fixed mutex's constructor to be constexpr. #3824 #4000 #4339
|
NAMES openblas libopenblas
|
||||||
# Note: Programs that aren't following the documented restrictions on binary compatibility may encounter
|
)
|
||||||
# null dereferences in mutex machinery. You must follow this rule:
|
if (OPENBLAS_LIB)
|
||||||
# When you mix binaries built by different supported versions of the toolset, the Redistributable version
|
message(STATUS "OpenBLAS found")
|
||||||
# must be at least as new as the latest toolset used by any app component.
|
|
||||||
# You can define _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR as an escape hatch.
|
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${OPENBLAS_LIB})
|
||||||
#
|
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
|
||||||
# Specifically to whisper.cpp this would cause a crash when using the Java bindings.
|
else()
|
||||||
# resulting in a Invalid memory access error.
|
message(WARNING "OpenBLAS not found")
|
||||||
target_compile_definitions(ggml-base PRIVATE _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# compiler flags
|
||||||
|
|
||||||
|
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||||
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||||
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (WHISPER_ALL_WARNINGS)
|
||||||
|
if (NOT MSVC)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
|
||||||
|
-Wall \
|
||||||
|
-Wextra \
|
||||||
|
-Wpedantic \
|
||||||
|
-Wshadow \
|
||||||
|
-Wcast-qual \
|
||||||
|
-Wstrict-prototypes \
|
||||||
|
-Wpointer-arith \
|
||||||
|
-Wno-unused-function \
|
||||||
|
")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
|
||||||
|
-Wall \
|
||||||
|
-Wextra \
|
||||||
|
-Wpedantic \
|
||||||
|
-Wcast-qual \
|
||||||
|
")
|
||||||
|
else()
|
||||||
|
# todo : msvc
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT MSVC)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
|
||||||
|
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
|
|
||||||
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||||
|
message(STATUS "ARM detected")
|
||||||
|
else()
|
||||||
|
message(STATUS "x86 detected")
|
||||||
|
if (MSVC)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
|
||||||
|
else()
|
||||||
|
if (EMSCRIPTEN)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||||
|
else()
|
||||||
|
if(NOT WHISPER_NO_AVX)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
|
||||||
|
endif()
|
||||||
|
if(NOT WHISPER_NO_AVX2)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
|
||||||
|
endif()
|
||||||
|
if(NOT WHISPER_NO_FMA)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
|
||||||
|
endif()
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
||||||
endif()
|
endif()
|
||||||
add_subdirectory(src)
|
|
||||||
|
if (WHISPER_PERF)
|
||||||
|
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
|
||||||
|
endif()
|
||||||
|
|
||||||
#
|
#
|
||||||
# install
|
# whisper - this is the main library of the project
|
||||||
#
|
#
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
set(TARGET whisper)
|
||||||
include(CMakePackageConfigHelpers)
|
|
||||||
|
|
||||||
set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
|
add_library(${TARGET}
|
||||||
set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
|
ggml.h
|
||||||
set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
|
ggml.c
|
||||||
|
whisper.h
|
||||||
|
whisper.cpp
|
||||||
|
)
|
||||||
|
|
||||||
set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
include(DefaultTargetOptions)
|
||||||
set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
|
||||||
set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
|
||||||
|
|
||||||
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
target_include_directories(${TARGET} PUBLIC
|
||||||
|
.
|
||||||
|
)
|
||||||
|
|
||||||
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
|
if (MSVC)
|
||||||
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
|
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
||||||
configure_package_config_file(
|
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
|
else()
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
|
endif()
|
||||||
PATH_VARS
|
|
||||||
WHISPER_INCLUDE_INSTALL_DIR
|
|
||||||
WHISPER_LIB_INSTALL_DIR
|
|
||||||
WHISPER_BIN_INSTALL_DIR )
|
|
||||||
|
|
||||||
write_basic_package_version_file(
|
if (BUILD_SHARED_LIBS)
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
target_link_libraries(${TARGET} PUBLIC
|
||||||
VERSION ${WHISPER_INSTALL_VERSION}
|
${CMAKE_DL_LIBS}
|
||||||
COMPATIBILITY SameMajorVersion)
|
)
|
||||||
|
|
||||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
target_compile_definitions(${TARGET} PUBLIC
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
WHISPER_SHARED
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
configure_file(cmake/whisper.pc.in
|
if (EMSCRIPTEN)
|
||||||
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
|
||||||
@ONLY)
|
endif()
|
||||||
|
|
||||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
target_compile_definitions(${TARGET} PUBLIC
|
||||||
DESTINATION lib/pkgconfig)
|
${WHISPER_EXTRA_FLAGS}
|
||||||
|
)
|
||||||
|
|
||||||
|
install(TARGETS ${TARGET}
|
||||||
|
LIBRARY DESTINATION lib
|
||||||
|
ARCHIVE DESTINATION lib/static
|
||||||
|
RUNTIME DESTINATION bin
|
||||||
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
# bindings
|
||||||
|
#
|
||||||
|
|
||||||
|
add_subdirectory(bindings)
|
||||||
|
|
||||||
#
|
#
|
||||||
# programs, examples and tests
|
# programs, examples and tests
|
||||||
#
|
#
|
||||||
|
|
||||||
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
if (WHISPER_BUILD_TESTS)
|
||||||
include(CTest)
|
enable_testing()
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (WHISPER_BUILD_EXAMPLES)
|
if (WHISPER_BUILD_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (MSVC)
|
|
||||||
set(MSVC_WARNING_FLAGS
|
|
||||||
/wd4101 # Unreferenced local variable
|
|
||||||
/wd4005 # Macro redefinition
|
|
||||||
/wd4065 # switch statement contains 'default' but no 'case' labels
|
|
||||||
/wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data
|
|
||||||
/wd4244 # Conversion from one type to another type, possible loss of ata
|
|
||||||
/wd4805 # Unsafe mix of type
|
|
||||||
/wd4305 # Truncation from 'type1' to 'type2' (often double to float)
|
|
||||||
/wd4996 # Function or variable may be unsafe/deprecated
|
|
||||||
)
|
|
||||||
function(disable_msvc_warnings target_name)
|
|
||||||
if(TARGET ${target_name})
|
|
||||||
target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS})
|
|
||||||
endif()
|
|
||||||
endfunction()
|
|
||||||
|
|
||||||
if (WHISPER_BUILD_EXAMPLES)
|
|
||||||
disable_msvc_warnings(whisper)
|
|
||||||
disable_msvc_warnings(common)
|
|
||||||
disable_msvc_warnings(common-sdl)
|
|
||||||
disable_msvc_warnings(lsp)
|
|
||||||
disable_msvc_warnings(wchess-core)
|
|
||||||
disable_msvc_warnings(whisper-command)
|
|
||||||
disable_msvc_warnings(whisper-cli)
|
|
||||||
disable_msvc_warnings(whisper-server)
|
|
||||||
disable_msvc_warnings(whisper-stream)
|
|
||||||
disable_msvc_warnings(whisper-talk-llama)
|
|
||||||
disable_msvc_warnings(whisper-bench)
|
|
||||||
disable_msvc_warnings(quantize)
|
|
||||||
disable_msvc_warnings(vad-speech-segments)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023-2024 The ggml authors
|
Copyright (c) 2022 Georgi Gerganov
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
246
Makefile
246
Makefile
@ -1,12 +1,218 @@
|
|||||||
|
ifndef UNAME_S
|
||||||
|
UNAME_S := $(shell uname -s)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef UNAME_P
|
||||||
|
UNAME_P := $(shell uname -p)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef UNAME_M
|
||||||
|
UNAME_M := $(shell uname -m)
|
||||||
|
endif
|
||||||
|
|
||||||
|
CCV := $(shell $(CC) --version | head -n 1)
|
||||||
|
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||||
|
|
||||||
|
# Mac OS + Arm can report x86_64
|
||||||
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
ifneq ($(UNAME_P),arm)
|
||||||
|
SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
|
||||||
|
ifeq ($(SYSCTL_M),1)
|
||||||
|
# UNAME_P := arm
|
||||||
|
# UNAME_M := arm64
|
||||||
|
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
#
|
||||||
|
# Compile flags
|
||||||
|
#
|
||||||
|
|
||||||
|
CFLAGS = -I. -O3 -std=c11 -fPIC
|
||||||
|
CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
|
||||||
|
LDFLAGS =
|
||||||
|
|
||||||
|
# OS specific
|
||||||
|
# TODO: support Windows
|
||||||
|
ifeq ($(UNAME_S),Linux)
|
||||||
|
CFLAGS += -pthread
|
||||||
|
CXXFLAGS += -pthread
|
||||||
|
endif
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
CFLAGS += -pthread
|
||||||
|
CXXFLAGS += -pthread
|
||||||
|
endif
|
||||||
|
ifeq ($(UNAME_S),FreeBSD)
|
||||||
|
CFLAGS += -pthread
|
||||||
|
CXXFLAGS += -pthread
|
||||||
|
endif
|
||||||
|
ifeq ($(UNAME_S),Haiku)
|
||||||
|
CFLAGS += -pthread
|
||||||
|
CXXFLAGS += -pthread
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Architecture specific
|
||||||
|
# TODO: probably these flags need to be tweaked on some architectures
|
||||||
|
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||||
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
CFLAGS += -mf16c
|
||||||
|
AVX1_M := $(shell sysctl machdep.cpu.features)
|
||||||
|
ifneq (,$(findstring FMA,$(AVX1_M)))
|
||||||
|
CFLAGS += -mfma
|
||||||
|
endif
|
||||||
|
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
|
||||||
|
CFLAGS += -mavx
|
||||||
|
endif
|
||||||
|
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
|
||||||
|
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
||||||
|
CFLAGS += -mavx2
|
||||||
|
endif
|
||||||
|
else ifeq ($(UNAME_S),Linux)
|
||||||
|
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
|
||||||
|
ifneq (,$(findstring avx,$(AVX1_M)))
|
||||||
|
CFLAGS += -mavx
|
||||||
|
endif
|
||||||
|
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
|
||||||
|
ifneq (,$(findstring avx2,$(AVX2_M)))
|
||||||
|
CFLAGS += -mavx2
|
||||||
|
endif
|
||||||
|
FMA_M := $(shell grep "fma " /proc/cpuinfo)
|
||||||
|
ifneq (,$(findstring fma,$(FMA_M)))
|
||||||
|
CFLAGS += -mfma
|
||||||
|
endif
|
||||||
|
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
|
||||||
|
ifneq (,$(findstring f16c,$(F16C_M)))
|
||||||
|
CFLAGS += -mf16c
|
||||||
|
endif
|
||||||
|
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
|
||||||
|
ifneq (,$(findstring sse3,$(SSE3_M)))
|
||||||
|
CFLAGS += -msse3
|
||||||
|
endif
|
||||||
|
else ifeq ($(UNAME_S),Haiku)
|
||||||
|
AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
|
||||||
|
ifneq (,$(findstring avx,$(AVX1_M)))
|
||||||
|
CFLAGS += -mavx
|
||||||
|
endif
|
||||||
|
AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ")
|
||||||
|
ifneq (,$(findstring avx2,$(AVX2_M)))
|
||||||
|
CFLAGS += -mavx2
|
||||||
|
endif
|
||||||
|
FMA_M := $(shell sysinfo -cpu | grep "FMA ")
|
||||||
|
ifneq (,$(findstring fma,$(FMA_M)))
|
||||||
|
CFLAGS += -mfma
|
||||||
|
endif
|
||||||
|
F16C_M := $(shell sysinfo -cpu | grep "F16C ")
|
||||||
|
ifneq (,$(findstring f16c,$(F16C_M)))
|
||||||
|
CFLAGS += -mf16c
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
CFLAGS += -mfma -mf16c -mavx -mavx2
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifeq ($(UNAME_M),amd64)
|
||||||
|
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
||||||
|
endif
|
||||||
|
ifeq ($(UNAME_M),ppc64le)
|
||||||
|
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
||||||
|
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
||||||
|
CFLAGS += -mpower9-vector
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifndef WHISPER_NO_ACCELERATE
|
||||||
|
# Mac M1 - include Accelerate framework
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
CFLAGS += -DGGML_USE_ACCELERATE
|
||||||
|
LDFLAGS += -framework Accelerate
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifdef WHISPER_OPENBLAS
|
||||||
|
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
||||||
|
LDFLAGS += -lopenblas
|
||||||
|
endif
|
||||||
|
ifdef WHISPER_GPROF
|
||||||
|
CFLAGS += -pg
|
||||||
|
CXXFLAGS += -pg
|
||||||
|
endif
|
||||||
|
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||||
|
endif
|
||||||
|
ifneq ($(filter armv6%,$(UNAME_M)),)
|
||||||
|
# Raspberry Pi 1, 2, 3
|
||||||
|
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
||||||
|
endif
|
||||||
|
ifneq ($(filter armv7%,$(UNAME_M)),)
|
||||||
|
# Raspberry Pi 4
|
||||||
|
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
||||||
|
endif
|
||||||
|
ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||||
|
# Raspberry Pi 4
|
||||||
|
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||||
|
endif
|
||||||
|
|
||||||
|
#
|
||||||
|
# Print build information
|
||||||
|
#
|
||||||
|
|
||||||
|
$(info I whisper.cpp build info: )
|
||||||
|
$(info I UNAME_S: $(UNAME_S))
|
||||||
|
$(info I UNAME_P: $(UNAME_P))
|
||||||
|
$(info I UNAME_M: $(UNAME_M))
|
||||||
|
$(info I CFLAGS: $(CFLAGS))
|
||||||
|
$(info I CXXFLAGS: $(CXXFLAGS))
|
||||||
|
$(info I LDFLAGS: $(LDFLAGS))
|
||||||
|
$(info I CC: $(CCV))
|
||||||
|
$(info I CXX: $(CXXV))
|
||||||
|
$(info )
|
||||||
|
|
||||||
|
default: main
|
||||||
|
|
||||||
|
#
|
||||||
|
# Build library
|
||||||
|
#
|
||||||
|
|
||||||
|
ggml.o: ggml.c ggml.h
|
||||||
|
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
||||||
|
|
||||||
|
whisper.o: whisper.cpp whisper.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
|
||||||
|
|
||||||
|
libwhisper.a: ggml.o whisper.o
|
||||||
|
$(AR) rcs libwhisper.a ggml.o whisper.o
|
||||||
|
|
||||||
|
libwhisper.so: ggml.o whisper.o
|
||||||
|
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
|
||||||
|
|
||||||
|
#
|
||||||
|
# Examples
|
||||||
|
#
|
||||||
|
|
||||||
|
CC_SDL=`sdl2-config --cflags --libs`
|
||||||
|
|
||||||
|
main: examples/main/main.cpp ggml.o whisper.o
|
||||||
|
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o whisper.o -o main $(LDFLAGS)
|
||||||
|
./main -h
|
||||||
|
|
||||||
|
stream: examples/stream/stream.cpp ggml.o whisper.o
|
||||||
|
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
|
command: examples/command/command.cpp ggml.o whisper.o
|
||||||
|
$(CXX) $(CXXFLAGS) examples/command/command.cpp ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
|
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o
|
||||||
|
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
|
||||||
|
|
||||||
|
bench: examples/bench/bench.cpp ggml.o whisper.o
|
||||||
|
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Audio samples
|
# Audio samples
|
||||||
#
|
#
|
||||||
|
|
||||||
.PHONY: build
|
|
||||||
build:
|
|
||||||
cmake -B build $(CMAKE_ARGS)
|
|
||||||
cmake --build build --config Release
|
|
||||||
|
|
||||||
# download a few audio samples into folder "./samples":
|
# download a few audio samples into folder "./samples":
|
||||||
.PHONY: samples
|
.PHONY: samples
|
||||||
samples:
|
samples:
|
||||||
@ -16,8 +222,12 @@ samples:
|
|||||||
@wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
@wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||||
@wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
|
@wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
|
||||||
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
|
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
|
||||||
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
|
@echo "Converting to 16-bit WAV ..."
|
||||||
@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
|
@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
|
||||||
|
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
|
||||||
|
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
|
||||||
|
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
|
||||||
|
@rm samples/mm1.wav
|
||||||
|
|
||||||
#
|
#
|
||||||
# Models
|
# Models
|
||||||
@ -35,24 +245,28 @@ samples:
|
|||||||
.PHONY: medium.en
|
.PHONY: medium.en
|
||||||
.PHONY: medium
|
.PHONY: medium
|
||||||
.PHONY: large-v1
|
.PHONY: large-v1
|
||||||
.PHONY: large-v2
|
.PHONY: large
|
||||||
.PHONY: large-v3
|
|
||||||
.PHONY: large-v3-turbo
|
|
||||||
|
|
||||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
|
tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
|
||||||
bash ./models/download-ggml-model.sh $@
|
bash ./models/download-ggml-model.sh $@
|
||||||
cmake -B build $(CMAKE_ARGS)
|
|
||||||
cmake --build build --config Release
|
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "==============================================="
|
@echo "==============================================="
|
||||||
@echo "Running $@ on all samples in ./samples ..."
|
@echo "Running $@ on all samples in ./samples ..."
|
||||||
@echo "==============================================="
|
@echo "==============================================="
|
||||||
@echo ""
|
@echo ""
|
||||||
@for f in samples/*.{flac,mp3,ogg,wav}; do \
|
@for f in samples/*.wav; do \
|
||||||
echo "----------------------------------------------" ; \
|
echo "----------------------------------------------" ; \
|
||||||
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
|
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
|
||||||
echo "----------------------------------------------" ; \
|
echo "----------------------------------------------" ; \
|
||||||
echo "" ; \
|
echo "" ; \
|
||||||
./build/bin/whisper-cli -m models/ggml-$@.bin -f $$f ; \
|
./main -m models/ggml-$@.bin -f $$f ; \
|
||||||
echo "" ; \
|
echo "" ; \
|
||||||
done
|
done
|
||||||
|
|
||||||
|
#
|
||||||
|
# Tests
|
||||||
|
#
|
||||||
|
|
||||||
|
.PHONY: tests
|
||||||
|
tests:
|
||||||
|
bash ./tests/run-tests.sh
|
||||||
|
249
README_sycl.md
249
README_sycl.md
@ -1,249 +0,0 @@
|
|||||||
# whisper.cpp for SYCL
|
|
||||||
|
|
||||||
[Background](#background)
|
|
||||||
|
|
||||||
[OS](#os)
|
|
||||||
|
|
||||||
[Intel GPU](#intel-gpu)
|
|
||||||
|
|
||||||
[Linux](#linux)
|
|
||||||
|
|
||||||
[Environment Variable](#environment-variable)
|
|
||||||
|
|
||||||
[Known Issue](#known-issue)
|
|
||||||
|
|
||||||
[Todo](#todo)
|
|
||||||
|
|
||||||
## Background
|
|
||||||
|
|
||||||
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
|
||||||
|
|
||||||
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
|
||||||
|
|
||||||
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
|
||||||
|
|
||||||
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
|
||||||
|
|
||||||
The whisper.cpp for SYCL is used to support Intel GPUs.
|
|
||||||
|
|
||||||
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
|
|
||||||
|
|
||||||
## OS
|
|
||||||
|
|
||||||
|OS|Status|Verified|
|
|
||||||
|-|-|-|
|
|
||||||
|Linux|Support|Ubuntu 22.04|
|
|
||||||
|Windows|Ongoing| |
|
|
||||||
|
|
||||||
|
|
||||||
## Intel GPU
|
|
||||||
|
|
||||||
|Intel GPU| Status | Verified Model|
|
|
||||||
|-|-|-|
|
|
||||||
|Intel Data Center Max Series| Support| Max 1550|
|
|
||||||
|Intel Data Center Flex Series| Support| Flex 170|
|
|
||||||
|Intel Arc Series| Support| Arc 770|
|
|
||||||
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
|
||||||
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
|
||||||
|
|
||||||
|
|
||||||
## Linux
|
|
||||||
|
|
||||||
### Setup Environment
|
|
||||||
|
|
||||||
1. Install Intel GPU driver.
|
|
||||||
|
|
||||||
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
|
||||||
|
|
||||||
Note: for iGPU, please install the client GPU driver.
|
|
||||||
|
|
||||||
b. Add user to group: video, render.
|
|
||||||
|
|
||||||
```
|
|
||||||
sudo usermod -aG render username
|
|
||||||
sudo usermod -aG video username
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: re-login to enable it.
|
|
||||||
|
|
||||||
c. Check
|
|
||||||
|
|
||||||
```
|
|
||||||
sudo apt install clinfo
|
|
||||||
sudo clinfo -l
|
|
||||||
```
|
|
||||||
|
|
||||||
Output (example):
|
|
||||||
|
|
||||||
```
|
|
||||||
Platform #0: Intel(R) OpenCL Graphics
|
|
||||||
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
|
||||||
|
|
||||||
|
|
||||||
Platform #0: Intel(R) OpenCL HD Graphics
|
|
||||||
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Install Intel® oneAPI Base toolkit.
|
|
||||||
|
|
||||||
|
|
||||||
a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
|
||||||
|
|
||||||
Recommend to install to default folder: **/opt/intel/oneapi**.
|
|
||||||
|
|
||||||
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
|
||||||
|
|
||||||
b. Check
|
|
||||||
|
|
||||||
```
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
|
|
||||||
sycl-ls
|
|
||||||
```
|
|
||||||
|
|
||||||
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
|
||||||
|
|
||||||
Output (example):
|
|
||||||
```
|
|
||||||
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
|
||||||
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
|
||||||
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
|
||||||
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Build locally:
|
|
||||||
|
|
||||||
```
|
|
||||||
mkdir -p build
|
|
||||||
cd build
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
|
|
||||||
#for FP16
|
|
||||||
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
|
|
||||||
|
|
||||||
#for FP32
|
|
||||||
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
|
||||||
|
|
||||||
#build example/main only
|
|
||||||
#cmake --build . --config Release --target main
|
|
||||||
|
|
||||||
#build all binary
|
|
||||||
cmake --build . --config Release -v
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
```
|
|
||||||
./examples/sycl/build.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Note:
|
|
||||||
|
|
||||||
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
|
||||||
|
|
||||||
### Run
|
|
||||||
|
|
||||||
1. Put model file to folder **models**
|
|
||||||
|
|
||||||
2. Enable oneAPI running environment
|
|
||||||
|
|
||||||
```
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
3. List device ID
|
|
||||||
|
|
||||||
Run without parameter:
|
|
||||||
|
|
||||||
```
|
|
||||||
./build/bin/ls-sycl-device
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
./build/bin/main
|
|
||||||
```
|
|
||||||
|
|
||||||
Check the ID in startup log, like:
|
|
||||||
|
|
||||||
```
|
|
||||||
found 4 SYCL devices:
|
|
||||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
|
||||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
|
||||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
|
||||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
|
||||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
|
||||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
|
||||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
|
||||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|Attribute|Note|
|
|
||||||
|-|-|
|
|
||||||
|compute capability 1.3|Level-zero running time, recommended |
|
|
||||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
|
||||||
|
|
||||||
4. Set device ID and execute whisper.cpp
|
|
||||||
|
|
||||||
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
|
||||||
|
|
||||||
```
|
|
||||||
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
|
||||||
```
|
|
||||||
or run by script:
|
|
||||||
|
|
||||||
```
|
|
||||||
./examples/sycl/run_whisper.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
5. Check the device ID in output
|
|
||||||
|
|
||||||
Like:
|
|
||||||
```
|
|
||||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Environment Variable
|
|
||||||
|
|
||||||
#### Build
|
|
||||||
|
|
||||||
|Name|Value|Function|
|
|
||||||
|-|-|-|
|
|
||||||
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
|
|
||||||
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
|
|
||||||
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
|
||||||
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
|
|
||||||
|
|
||||||
#### Running
|
|
||||||
|
|
||||||
|
|
||||||
|Name|Value|Function|
|
|
||||||
|-|-|-|
|
|
||||||
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
|
||||||
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
|
||||||
|
|
||||||
## Known Issue
|
|
||||||
|
|
||||||
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
|
||||||
|
|
||||||
Miss to enable oneAPI running environment.
|
|
||||||
|
|
||||||
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
|
||||||
|
|
||||||
|
|
||||||
- Hang during startup
|
|
||||||
|
|
||||||
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
|
||||||
|
|
||||||
Solution: add **--no-mmap**.
|
|
||||||
|
|
||||||
## Todo
|
|
||||||
|
|
||||||
- Support to build in Windows.
|
|
||||||
|
|
||||||
- Support multiple cards.
|
|
@ -1,47 +1,18 @@
|
|||||||
ifndef UNAME_S
|
BUILD_DIR := build
|
||||||
UNAME_S := $(shell uname -s)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef UNAME_P
|
|
||||||
UNAME_P := $(shell uname -p)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef UNAME_M
|
|
||||||
UNAME_M := $(shell uname -m)
|
|
||||||
endif
|
|
||||||
|
|
||||||
GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
|
||||||
BUILD_DIR := build_go
|
|
||||||
MODELS_DIR := models
|
MODELS_DIR := models
|
||||||
EXAMPLES_DIR := $(wildcard examples/*)
|
EXAMPLES_DIR := $(wildcard examples/*)
|
||||||
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
INCLUDE_PATH := $(abspath ../..)
|
||||||
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src:$(abspath ../../${BUILD_DIR}/ggml/src))
|
LIBRARY_PATH := $(abspath ../..)
|
||||||
|
|
||||||
ifeq ($(GGML_CUDA),1)
|
|
||||||
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
|
|
||||||
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
|
|
||||||
endif
|
|
||||||
|
|
||||||
all: clean whisper examples
|
all: clean whisper examples
|
||||||
|
|
||||||
whisper: mkdir
|
whisper: mkdir
|
||||||
cmake -S ../.. -B ../../${BUILD_DIR} \
|
@echo Build whisper
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
@${MAKE} -C ../.. libwhisper.a
|
||||||
-DBUILD_SHARED_LIBS=OFF
|
|
||||||
cmake --build ../../${BUILD_DIR} --target whisper
|
|
||||||
|
|
||||||
test: model-small whisper modtidy
|
test: model-small whisper modtidy
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
|
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
|
|
||||||
else
|
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
|
||||||
endif
|
|
||||||
|
|
||||||
examples: $(EXAMPLES_DIR)
|
examples: $(EXAMPLES_DIR)
|
||||||
|
|
||||||
@ -50,11 +21,7 @@ model-small: mkdir examples/go-model-download
|
|||||||
|
|
||||||
$(EXAMPLES_DIR): mkdir whisper modtidy
|
$(EXAMPLES_DIR): mkdir whisper modtidy
|
||||||
@echo Build example $(notdir $@)
|
@echo Build example $(notdir $@)
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
|
|
||||||
else
|
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
|
||||||
endif
|
|
||||||
|
|
||||||
mkdir:
|
mkdir:
|
||||||
@echo Mkdir ${BUILD_DIR}
|
@echo Mkdir ${BUILD_DIR}
|
||||||
@ -65,7 +32,7 @@ mkdir:
|
|||||||
modtidy:
|
modtidy:
|
||||||
@go mod tidy
|
@go mod tidy
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@echo Clean
|
@echo Clean
|
||||||
@rm -fr $(BUILD_DIR)
|
@rm -fr $(BUILD_DIR)
|
||||||
@go clean
|
@go clean
|
||||||
|
@ -31,7 +31,7 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
if err := context.Process(samples, nil, nil, nil); err != nil {
|
if err := context.Process(samples, nil); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -51,7 +51,7 @@ func main() {
|
|||||||
In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
|
In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/ggml-org/whisper.cpp.git
|
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||||
cd whisper.cpp/bindings/go
|
cd whisper.cpp/bindings/go
|
||||||
make test
|
make test
|
||||||
```
|
```
|
||||||
@ -62,12 +62,6 @@ This will compile a static `libwhisper.a` in a `build` folder, download a model
|
|||||||
make examples
|
make examples
|
||||||
```
|
```
|
||||||
|
|
||||||
To build using cuda support add `GGML_CUDA=1`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
GGML_CUDA=1 make examples
|
|
||||||
```
|
|
||||||
|
|
||||||
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@ -77,7 +71,7 @@ The examples are placed in the `build` directory. Once built, you can download a
|
|||||||
And you can then test a model against samples with the following command:
|
And you can then test a model against samples with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
|
./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
|
||||||
```
|
```
|
||||||
|
|
||||||
## Using the bindings
|
## Using the bindings
|
||||||
@ -98,7 +92,7 @@ The API Documentation:
|
|||||||
|
|
||||||
Getting help:
|
Getting help:
|
||||||
|
|
||||||
* Follow the discussion for the go bindings [here](https://github.com/ggml-org/whisper.cpp/discussions/312)
|
* Follow the discussion for the go bindings [here](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
github.com/ggml-org/whisper.cpp/bindings/go
|
github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
provides a speech-to-text service bindings for the Go programming language.
|
provides a speech-to-text service bindings for the Go programming language.
|
||||||
*/
|
*/
|
||||||
package whisper
|
package whisper
|
||||||
|
@ -9,23 +9,22 @@ import (
|
|||||||
// ContextForSignal returns a context object which is cancelled when a signal
|
// ContextForSignal returns a context object which is cancelled when a signal
|
||||||
// is received. It returns nil if no signal parameter is provided
|
// is received. It returns nil if no signal parameter is provided
|
||||||
func ContextForSignal(signals ...os.Signal) context.Context {
|
func ContextForSignal(signals ...os.Signal) context.Context {
|
||||||
if len(signals) == 0 {
|
if len(signals) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
|
ch := make(chan os.Signal)
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
// Send message on channel when signal received
|
// Send message on channel when signal received
|
||||||
signal.Notify(ch, signals...)
|
signal.Notify(ch, signals...)
|
||||||
|
|
||||||
// When any signal is received, call cancel
|
// When any signal received, call cancel
|
||||||
go func() {
|
go func() {
|
||||||
<-ch
|
<-ch
|
||||||
cancel()
|
cancel()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Return success
|
// Return success
|
||||||
return ctx
|
return ctx
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,7 +9,6 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@ -18,27 +17,14 @@ import (
|
|||||||
// CONSTANTS
|
// CONSTANTS
|
||||||
|
|
||||||
const (
|
const (
|
||||||
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
|
srcUrl = "https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main" // The location of the models
|
||||||
srcExt = ".bin" // Filename extension
|
srcExt = ".bin" // Filename extension
|
||||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// The models which will be downloaded, if no model is specified as an argument
|
// The models which will be downloaded, if no model is specified as an argument
|
||||||
modelNames = []string{
|
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
|
||||||
"tiny", "tiny-q5_1", "tiny-q8_0",
|
|
||||||
"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
|
|
||||||
"base", "base-q5_1", "base-q8_0",
|
|
||||||
"base.en", "base.en-q5_1", "base.en-q8_0",
|
|
||||||
"small", "small-q5_1", "small-q8_0",
|
|
||||||
"small.en", "small.en-q5_1", "small.en-q8_0",
|
|
||||||
"medium", "medium-q5_0", "medium-q8_0",
|
|
||||||
"medium.en", "medium.en-q5_0", "medium.en-q8_0",
|
|
||||||
"large-v1",
|
|
||||||
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
|
|
||||||
"large-v3", "large-v3-q5_0",
|
|
||||||
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -58,25 +44,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Usage = func() {
|
flag.Usage = func() {
|
||||||
name := filepath.Base(flag.CommandLine.Name())
|
name := filepath.Base(flag.CommandLine.Name())
|
||||||
fmt.Fprintf(flag.CommandLine.Output(), `
|
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
|
||||||
Usage: %s [options] [<model>...]
|
|
||||||
|
|
||||||
Options:
|
|
||||||
-out string Specify the output folder where models will be saved.
|
|
||||||
Default: Current working directory.
|
|
||||||
-timeout duration Set the maximum duration for downloading a model.
|
|
||||||
Example: 10m, 1h (default: 30m0s).
|
|
||||||
-quiet Suppress all output except errors.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
1. Download a specific model:
|
|
||||||
%s -out ./models tiny-q8_0
|
|
||||||
|
|
||||||
2. Download all models:
|
|
||||||
%s -out ./models
|
|
||||||
|
|
||||||
`, name, name, name)
|
|
||||||
|
|
||||||
flag.PrintDefaults()
|
flag.PrintDefaults()
|
||||||
}
|
}
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
@ -146,87 +114,23 @@ func GetOut() (string, error) {
|
|||||||
// GetModels returns the list of models to download
|
// GetModels returns the list of models to download
|
||||||
func GetModels() []string {
|
func GetModels() []string {
|
||||||
if flag.NArg() == 0 {
|
if flag.NArg() == 0 {
|
||||||
fmt.Println("No model specified.")
|
return modelNames
|
||||||
fmt.Println("Preparing to download all models...")
|
} else {
|
||||||
|
return flag.Args()
|
||||||
// Calculate total download size
|
|
||||||
fmt.Println("Calculating total download size...")
|
|
||||||
totalSize, err := CalculateTotalDownloadSize(modelNames)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Error calculating download sizes:", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
|
|
||||||
fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
|
|
||||||
fmt.Println("Would you like to download all models? (y/N)")
|
|
||||||
|
|
||||||
// Prompt for user input
|
|
||||||
var response string
|
|
||||||
fmt.Scanln(&response)
|
|
||||||
if response != "y" && response != "Y" {
|
|
||||||
fmt.Println("Aborting. Specify a model to download.")
|
|
||||||
os.Exit(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
return modelNames // Return all models if confirmed
|
|
||||||
}
|
}
|
||||||
return flag.Args() // Return specific models if arguments are provided
|
|
||||||
}
|
|
||||||
|
|
||||||
func CalculateTotalDownloadSize(models []string) (int64, error) {
|
|
||||||
var totalSize int64
|
|
||||||
client := http.Client{}
|
|
||||||
|
|
||||||
for _, model := range models {
|
|
||||||
modelURL, err := URLForModel(model)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Issue a HEAD request to get the file size
|
|
||||||
req, err := http.NewRequest("HEAD", modelURL, nil)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
resp, err := client.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
resp.Body.Close()
|
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
|
||||||
fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
size := resp.ContentLength
|
|
||||||
totalSize += size
|
|
||||||
}
|
|
||||||
return totalSize, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// URLForModel returns the URL for the given model on huggingface.co
|
// URLForModel returns the URL for the given model on huggingface.co
|
||||||
func URLForModel(model string) (string, error) {
|
func URLForModel(model string) (string, error) {
|
||||||
// Ensure "ggml-" prefix is added only once
|
|
||||||
if !strings.HasPrefix(model, "ggml-") {
|
|
||||||
model = "ggml-" + model
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure ".bin" extension is added only once
|
|
||||||
if filepath.Ext(model) != srcExt {
|
if filepath.Ext(model) != srcExt {
|
||||||
model += srcExt
|
model += srcExt
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the base URL
|
|
||||||
url, err := url.Parse(srcUrl)
|
url, err := url.Parse(srcUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
|
} else {
|
||||||
|
url.Path = filepath.Join(url.Path, model)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure no trailing slash in the base URL
|
|
||||||
url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
|
|
||||||
return url.String(), nil
|
return url.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,22 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import "fmt"
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// CONSTANTS
|
|
||||||
|
|
||||||
const (
|
|
||||||
Reset = "\033[0m"
|
|
||||||
RGBPrefix = "\033[38;5;" // followed by RGB values in decimal format separated by colons
|
|
||||||
RGBSuffix = "m"
|
|
||||||
)
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
// PUBLIC METHODS
|
|
||||||
|
|
||||||
// Colorize text with RGB values, from 0 to 23
|
|
||||||
func Colorize(text string, v int) string {
|
|
||||||
// https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
|
|
||||||
// Grayscale colors are in the range 232-255
|
|
||||||
return RGBPrefix + fmt.Sprint(v%24+232) + RGBSuffix + text + Reset
|
|
||||||
}
|
|
@ -2,12 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
// Packages
|
|
||||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -48,100 +42,20 @@ func (flags *Flags) GetLanguage() string {
|
|||||||
return flags.Lookup("language").Value.String()
|
return flags.Lookup("language").Value.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (flags *Flags) IsTranslate() bool {
|
func (flags *Flags) IsSpeedup() bool {
|
||||||
return flags.Lookup("translate").Value.(flag.Getter).Get().(bool)
|
return flags.Lookup("speedup").Value.String() == "true"
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) GetOffset() time.Duration {
|
|
||||||
return flags.Lookup("offset").Value.(flag.Getter).Get().(time.Duration)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) GetDuration() time.Duration {
|
|
||||||
return flags.Lookup("duration").Value.(flag.Getter).Get().(time.Duration)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) GetThreads() uint {
|
|
||||||
return flags.Lookup("threads").Value.(flag.Getter).Get().(uint)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) GetOut() string {
|
|
||||||
return strings.ToLower(flags.Lookup("out").Value.String())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (flags *Flags) IsTokens() bool {
|
func (flags *Flags) IsTokens() bool {
|
||||||
return flags.Lookup("tokens").Value.String() == "true"
|
return flags.Lookup("tokens").Value.String() == "true"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (flags *Flags) IsColorize() bool {
|
|
||||||
return flags.Lookup("colorize").Value.String() == "true"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) GetMaxLen() uint {
|
|
||||||
return flags.Lookup("max-len").Value.(flag.Getter).Get().(uint)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) GetMaxTokens() uint {
|
|
||||||
return flags.Lookup("max-tokens").Value.(flag.Getter).Get().(uint)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) GetWordThreshold() float32 {
|
|
||||||
return float32(flags.Lookup("word-thold").Value.(flag.Getter).Get().(float64))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) SetParams(context whisper.Context) error {
|
|
||||||
if lang := flags.GetLanguage(); lang != "" && lang != "auto" {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting language to %q\n", lang)
|
|
||||||
if err := context.SetLanguage(lang); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if flags.IsTranslate() && context.IsMultilingual() {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting translate to true\n")
|
|
||||||
context.SetTranslate(true)
|
|
||||||
}
|
|
||||||
if offset := flags.GetOffset(); offset != 0 {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting offset to %v\n", offset)
|
|
||||||
context.SetOffset(offset)
|
|
||||||
}
|
|
||||||
if duration := flags.GetDuration(); duration != 0 {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
|
|
||||||
context.SetDuration(duration)
|
|
||||||
}
|
|
||||||
if threads := flags.GetThreads(); threads != 0 {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
|
|
||||||
context.SetThreads(threads)
|
|
||||||
}
|
|
||||||
if max_len := flags.GetMaxLen(); max_len != 0 {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting max_segment_length to %d\n", max_len)
|
|
||||||
context.SetMaxSegmentLength(max_len)
|
|
||||||
}
|
|
||||||
if max_tokens := flags.GetMaxTokens(); max_tokens != 0 {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting max_tokens to %d\n", max_tokens)
|
|
||||||
context.SetMaxTokensPerSegment(max_tokens)
|
|
||||||
}
|
|
||||||
if word_threshold := flags.GetWordThreshold(); word_threshold != 0 {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting word_threshold to %f\n", word_threshold)
|
|
||||||
context.SetTokenThreshold(word_threshold)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return success
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// PRIVATE METHODS
|
// PRIVATE METHODS
|
||||||
|
|
||||||
func registerFlags(flag *Flags) {
|
func registerFlags(flag *Flags) {
|
||||||
flag.String("model", "", "Path to the model file")
|
flag.String("model", "", "Path to the model file")
|
||||||
flag.String("language", "", "Spoken language")
|
flag.String("language", "", "Language")
|
||||||
flag.Bool("translate", false, "Translate from source language to english")
|
flag.Bool("speedup", false, "Enable speedup")
|
||||||
flag.Duration("offset", 0, "Time offset")
|
|
||||||
flag.Duration("duration", 0, "Duration of audio to process")
|
|
||||||
flag.Uint("threads", 0, "Number of threads to use")
|
|
||||||
flag.Uint("max-len", 0, "Maximum segment length in characters")
|
|
||||||
flag.Uint("max-tokens", 0, "Maximum tokens per segment")
|
|
||||||
flag.Float64("word-thold", 0, "Maximum segment score")
|
|
||||||
flag.Bool("tokens", false, "Display tokens")
|
flag.Bool("tokens", false, "Display tokens")
|
||||||
flag.Bool("colorize", false, "Colorize tokens")
|
|
||||||
flag.String("out", "", "Output format (srt, none or leave as empty string)")
|
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,8 @@ func main() {
|
|||||||
|
|
||||||
// Process files
|
// Process files
|
||||||
for _, filename := range flags.Args() {
|
for _, filename := range flags.Args() {
|
||||||
if err := Process(model, filename, flags); err != nil {
|
fmt.Println("Processing", filename)
|
||||||
|
if err := Process(model, filename, flags.GetLanguage(), flags.IsSpeedup(), flags.IsTokens()); err != nil {
|
||||||
fmt.Fprintln(os.Stderr, err)
|
fmt.Fprintln(os.Stderr, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,7 @@ import (
|
|||||||
wav "github.com/go-audio/wav"
|
wav "github.com/go-audio/wav"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Process(model whisper.Model, path string, flags *Flags) error {
|
func Process(model whisper.Model, path string, lang string, speedup, tokens bool) error {
|
||||||
var data []float32
|
var data []float32
|
||||||
|
|
||||||
// Create processing context
|
// Create processing context
|
||||||
@ -20,22 +20,14 @@ func Process(model whisper.Model, path string, flags *Flags) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the parameters
|
|
||||||
if err := flags.SetParams(context); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("\n%s\n", context.SystemInfo())
|
|
||||||
|
|
||||||
// Open the file
|
// Open the file
|
||||||
fmt.Fprintf(flags.Output(), "Loading %q\n", path)
|
|
||||||
fh, err := os.Open(path)
|
fh, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer fh.Close()
|
defer fh.Close()
|
||||||
|
|
||||||
// Decode the WAV file - load the full buffer
|
// Decode the WAV file
|
||||||
dec := wav.NewDecoder(fh)
|
dec := wav.NewDecoder(fh)
|
||||||
if buf, err := dec.FullPCMBuffer(); err != nil {
|
if buf, err := dec.FullPCMBuffer(); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -47,86 +39,42 @@ func Process(model whisper.Model, path string, flags *Flags) error {
|
|||||||
data = buf.AsFloat32Buffer().Data
|
data = buf.AsFloat32Buffer().Data
|
||||||
}
|
}
|
||||||
|
|
||||||
// Segment callback when -tokens is specified
|
// Set the parameters
|
||||||
var cb whisper.SegmentCallback
|
var cb whisper.SegmentCallback
|
||||||
if flags.IsTokens() {
|
if lang != "" {
|
||||||
|
if err := context.SetLanguage(lang); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if speedup {
|
||||||
|
context.SetSpeedup(true)
|
||||||
|
}
|
||||||
|
if tokens {
|
||||||
cb = func(segment whisper.Segment) {
|
cb = func(segment whisper.Segment) {
|
||||||
fmt.Fprintf(flags.Output(), "%02d [%6s->%6s] ", segment.Num, segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
|
fmt.Printf("%02d [%6s->%6s] ", segment.Num, segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
|
||||||
for _, token := range segment.Tokens {
|
for _, token := range segment.Tokens {
|
||||||
if flags.IsColorize() && context.IsText(token) {
|
fmt.Printf("%q ", token.Text)
|
||||||
fmt.Fprint(flags.Output(), Colorize(token.Text, int(token.P*24.0)), " ")
|
|
||||||
} else {
|
|
||||||
fmt.Fprint(flags.Output(), token.Text, " ")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
fmt.Fprintln(flags.Output(), "")
|
fmt.Println("")
|
||||||
fmt.Fprintln(flags.Output(), "")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the data
|
// Process the data
|
||||||
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
|
if err := context.Process(data, cb); err != nil {
|
||||||
context.ResetTimings()
|
|
||||||
if err := context.Process(data, nil, cb, nil); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
context.PrintTimings()
|
|
||||||
|
|
||||||
// Print out the results
|
// Print out the results
|
||||||
switch {
|
|
||||||
case flags.GetOut() == "srt":
|
|
||||||
return OutputSRT(os.Stdout, context)
|
|
||||||
case flags.GetOut() == "none":
|
|
||||||
return nil
|
|
||||||
default:
|
|
||||||
return Output(os.Stdout, context, flags.IsColorize())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output text as SRT file
|
|
||||||
func OutputSRT(w io.Writer, context whisper.Context) error {
|
|
||||||
n := 1
|
|
||||||
for {
|
for {
|
||||||
segment, err := context.NextSegment()
|
segment, err := context.NextSegment()
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
return nil
|
break
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fmt.Fprintln(w, n)
|
fmt.Printf("[%6s->%6s] %s\n", segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond), segment.Text)
|
||||||
fmt.Fprintln(w, srtTimestamp(segment.Start), " --> ", srtTimestamp(segment.End))
|
|
||||||
fmt.Fprintln(w, segment.Text)
|
|
||||||
fmt.Fprintln(w, "")
|
|
||||||
n++
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Output text to terminal
|
// Return success
|
||||||
func Output(w io.Writer, context whisper.Context, colorize bool) error {
|
return nil
|
||||||
for {
|
|
||||||
segment, err := context.NextSegment()
|
|
||||||
if err == io.EOF {
|
|
||||||
return nil
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
fmt.Fprintf(w, "[%6s->%6s]", segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
|
|
||||||
if colorize {
|
|
||||||
for _, token := range segment.Tokens {
|
|
||||||
if !context.IsText(token) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fmt.Fprint(w, " ", Colorize(token.Text, int(token.P*24.0)))
|
|
||||||
}
|
|
||||||
fmt.Fprint(w, "\n")
|
|
||||||
} else {
|
|
||||||
fmt.Fprintln(w, " ", segment.Text)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return srtTimestamp
|
|
||||||
func srtTimestamp(t time.Duration) string {
|
|
||||||
return fmt.Sprintf("%02d:%02d:%02d,%03d", t/time.Hour, (t%time.Hour)/time.Minute, (t%time.Minute)/time.Second, (t%time.Second)/time.Millisecond)
|
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
module github.com/ggerganov/whisper.cpp/bindings/go
|
module github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
|
|
||||||
go 1.23
|
go 1.19
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/go-audio/wav v1.1.0
|
github.com/go-audio/wav v1.1.0
|
||||||
github.com/stretchr/testify v1.9.0
|
github.com/stretchr/testify v1.8.1
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||||
@ -8,9 +9,15 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
|||||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
|
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||||
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
@ -19,10 +19,6 @@ func (p *Params) SetTranslate(v bool) {
|
|||||||
p.translate = toBool(v)
|
p.translate = toBool(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Params) SetSplitOnWord(v bool) {
|
|
||||||
p.split_on_word = toBool(v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetNoContext(v bool) {
|
func (p *Params) SetNoContext(v bool) {
|
||||||
p.no_context = toBool(v)
|
p.no_context = toBool(v)
|
||||||
}
|
}
|
||||||
@ -47,12 +43,11 @@ func (p *Params) SetPrintTimestamps(v bool) {
|
|||||||
p.print_timestamps = toBool(v)
|
p.print_timestamps = toBool(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set language id
|
func (p *Params) SetSpeedup(v bool) {
|
||||||
|
p.speed_up = toBool(v)
|
||||||
|
}
|
||||||
|
|
||||||
func (p *Params) SetLanguage(lang int) error {
|
func (p *Params) SetLanguage(lang int) error {
|
||||||
if lang == -1 {
|
|
||||||
p.language = nil
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
str := C.whisper_lang_str(C.int(lang))
|
str := C.whisper_lang_str(C.int(lang))
|
||||||
if str == nil {
|
if str == nil {
|
||||||
return ErrInvalidLanguage
|
return ErrInvalidLanguage
|
||||||
@ -62,7 +57,6 @@ func (p *Params) SetLanguage(lang int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get language id
|
|
||||||
func (p *Params) Language() int {
|
func (p *Params) Language() int {
|
||||||
if p.language == nil {
|
if p.language == nil {
|
||||||
return -1
|
return -1
|
||||||
@ -70,82 +64,18 @@ func (p *Params) Language() int {
|
|||||||
return int(C.whisper_lang_id(p.language))
|
return int(C.whisper_lang_id(p.language))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Threads available
|
|
||||||
func (p *Params) Threads() int {
|
|
||||||
return int(p.n_threads)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set number of threads to use
|
|
||||||
func (p *Params) SetThreads(threads int) {
|
func (p *Params) SetThreads(threads int) {
|
||||||
p.n_threads = C.int(threads)
|
p.n_threads = C.int(threads)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set start offset in ms
|
|
||||||
func (p *Params) SetOffset(offset_ms int) {
|
func (p *Params) SetOffset(offset_ms int) {
|
||||||
p.offset_ms = C.int(offset_ms)
|
p.offset_ms = C.int(offset_ms)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set audio duration to process in ms
|
|
||||||
func (p *Params) SetDuration(duration_ms int) {
|
func (p *Params) SetDuration(duration_ms int) {
|
||||||
p.duration_ms = C.int(duration_ms)
|
p.duration_ms = C.int(duration_ms)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set timestamp token probability threshold (~0.01)
|
|
||||||
func (p *Params) SetTokenThreshold(t float32) {
|
|
||||||
p.thold_pt = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set timestamp token sum probability threshold (~0.01)
|
|
||||||
func (p *Params) SetTokenSumThreshold(t float32) {
|
|
||||||
p.thold_ptsum = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set max segment length in characters
|
|
||||||
func (p *Params) SetMaxSegmentLength(n int) {
|
|
||||||
p.max_len = C.int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetTokenTimestamps(b bool) {
|
|
||||||
p.token_timestamps = toBool(b)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set max tokens per segment (0 = no limit)
|
|
||||||
func (p *Params) SetMaxTokensPerSegment(n int) {
|
|
||||||
p.max_tokens = C.int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set audio encoder context
|
|
||||||
func (p *Params) SetAudioCtx(n int) {
|
|
||||||
p.audio_ctx = C.int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetMaxContext(n int) {
|
|
||||||
p.n_max_text_ctx = C.int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetBeamSize(n int) {
|
|
||||||
p.beam_search.beam_size = C.int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetEntropyThold(t float32) {
|
|
||||||
p.entropy_thold = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Params) SetTemperature(t float32) {
|
|
||||||
p.temperature = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sets the fallback temperature incrementation
|
|
||||||
// Pass -1.0 to disable this feature
|
|
||||||
func (p *Params) SetTemperatureFallback(t float32) {
|
|
||||||
p.temperature_inc = C.float(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set initial prompt
|
|
||||||
func (p *Params) SetInitialPrompt(prompt string) {
|
|
||||||
p.initial_prompt = C.CString(prompt)
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// PRIVATE METHODS
|
// PRIVATE METHODS
|
||||||
|
|
||||||
@ -169,12 +99,6 @@ func (p *Params) String() string {
|
|||||||
str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
|
str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
|
||||||
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
|
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
|
||||||
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
||||||
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
|
||||||
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
|
||||||
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
|
|
||||||
str += fmt.Sprintf(" temperature=%f", p.temperature)
|
|
||||||
str += fmt.Sprintf(" temperature_inc=%f", p.temperature_inc)
|
|
||||||
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
|
|
||||||
if p.translate {
|
if p.translate {
|
||||||
str += " translate"
|
str += " translate"
|
||||||
}
|
}
|
||||||
@ -199,6 +123,9 @@ func (p *Params) String() string {
|
|||||||
if p.token_timestamps {
|
if p.token_timestamps {
|
||||||
str += " token_timestamps"
|
str += " token_timestamps"
|
||||||
}
|
}
|
||||||
|
if p.speed_up {
|
||||||
|
str += " speed_up"
|
||||||
|
}
|
||||||
|
|
||||||
return str + ">"
|
return str + ">"
|
||||||
}
|
}
|
||||||
|
@ -11,11 +11,10 @@ import (
|
|||||||
// ERRORS
|
// ERRORS
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ErrUnableToLoadModel = errors.New("unable to load model")
|
ErrUnableToLoadModel = errors.New("unable to load model")
|
||||||
ErrInternalAppError = errors.New("internal application error")
|
ErrInternalAppError = errors.New("internal application error")
|
||||||
ErrProcessingFailed = errors.New("processing failed")
|
ErrProcessingFailed = errors.New("processing failed")
|
||||||
ErrUnsupportedLanguage = errors.New("unsupported language")
|
ErrUnsupportedLanguage = errors.New("unsupported language")
|
||||||
ErrModelNotMultilingual = errors.New("model is not multilingual")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
package whisper
|
package whisper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -26,7 +24,7 @@ var _ Context = (*context)(nil)
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// LIFECYCLE
|
// LIFECYCLE
|
||||||
|
|
||||||
func newContext(model *model, params whisper.Params) (Context, error) {
|
func NewContext(model *model, params whisper.Params) (Context, error) {
|
||||||
context := new(context)
|
context := new(context)
|
||||||
context.model = model
|
context.model = model
|
||||||
context.params = params
|
context.params = params
|
||||||
@ -43,13 +41,7 @@ func (context *context) SetLanguage(lang string) error {
|
|||||||
if context.model.ctx == nil {
|
if context.model.ctx == nil {
|
||||||
return ErrInternalAppError
|
return ErrInternalAppError
|
||||||
}
|
}
|
||||||
if !context.model.IsMultilingual() {
|
if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
|
||||||
return ErrModelNotMultilingual
|
|
||||||
}
|
|
||||||
|
|
||||||
if lang == "auto" {
|
|
||||||
context.params.SetLanguage(-1)
|
|
||||||
} else if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
|
|
||||||
return ErrUnsupportedLanguage
|
return ErrUnsupportedLanguage
|
||||||
} else if err := context.params.SetLanguage(id); err != nil {
|
} else if err := context.params.SetLanguage(id); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -58,182 +50,49 @@ func (context *context) SetLanguage(lang string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (context *context) IsMultilingual() bool {
|
|
||||||
return context.model.IsMultilingual()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get language
|
// Get language
|
||||||
func (context *context) Language() string {
|
func (context *context) Language() string {
|
||||||
id := context.params.Language()
|
|
||||||
if id == -1 {
|
|
||||||
return "auto"
|
|
||||||
}
|
|
||||||
return whisper.Whisper_lang_str(context.params.Language())
|
return whisper.Whisper_lang_str(context.params.Language())
|
||||||
}
|
}
|
||||||
|
|
||||||
func (context *context) DetectedLanguage() string {
|
// Set speedup flag
|
||||||
return whisper.Whisper_lang_str(context.model.ctx.Whisper_full_lang_id())
|
func (context *context) SetSpeedup(v bool) {
|
||||||
}
|
context.params.SetSpeedup(v)
|
||||||
|
|
||||||
// Set translate flag
|
|
||||||
func (context *context) SetTranslate(v bool) {
|
|
||||||
context.params.SetTranslate(v)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (context *context) SetSplitOnWord(v bool) {
|
|
||||||
context.params.SetSplitOnWord(v)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set number of threads to use
|
|
||||||
func (context *context) SetThreads(v uint) {
|
|
||||||
context.params.SetThreads(int(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set time offset
|
|
||||||
func (context *context) SetOffset(v time.Duration) {
|
|
||||||
context.params.SetOffset(int(v.Milliseconds()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set duration of audio to process
|
|
||||||
func (context *context) SetDuration(v time.Duration) {
|
|
||||||
context.params.SetDuration(int(v.Milliseconds()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set timestamp token probability threshold (~0.01)
|
|
||||||
func (context *context) SetTokenThreshold(t float32) {
|
|
||||||
context.params.SetTokenThreshold(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set timestamp token sum probability threshold (~0.01)
|
|
||||||
func (context *context) SetTokenSumThreshold(t float32) {
|
|
||||||
context.params.SetTokenSumThreshold(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set max segment length in characters
|
|
||||||
func (context *context) SetMaxSegmentLength(n uint) {
|
|
||||||
context.params.SetMaxSegmentLength(int(n))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set token timestamps flag
|
|
||||||
func (context *context) SetTokenTimestamps(b bool) {
|
|
||||||
context.params.SetTokenTimestamps(b)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set max tokens per segment (0 = no limit)
|
|
||||||
func (context *context) SetMaxTokensPerSegment(n uint) {
|
|
||||||
context.params.SetMaxTokensPerSegment(int(n))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set audio encoder context
|
|
||||||
func (context *context) SetAudioCtx(n uint) {
|
|
||||||
context.params.SetAudioCtx(int(n))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set maximum number of text context tokens to store
|
|
||||||
func (context *context) SetMaxContext(n int) {
|
|
||||||
context.params.SetMaxContext(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Beam Size
|
|
||||||
func (context *context) SetBeamSize(n int) {
|
|
||||||
context.params.SetBeamSize(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Entropy threshold
|
|
||||||
func (context *context) SetEntropyThold(t float32) {
|
|
||||||
context.params.SetEntropyThold(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Temperature
|
|
||||||
func (context *context) SetTemperature(t float32) {
|
|
||||||
context.params.SetTemperature(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the fallback temperature incrementation
|
|
||||||
// Pass -1.0 to disable this feature
|
|
||||||
func (context *context) SetTemperatureFallback(t float32) {
|
|
||||||
context.params.SetTemperatureFallback(t)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set initial prompt
|
|
||||||
func (context *context) SetInitialPrompt(prompt string) {
|
|
||||||
context.params.SetInitialPrompt(prompt)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResetTimings resets the mode timings. Should be called before processing
|
|
||||||
func (context *context) ResetTimings() {
|
|
||||||
context.model.ctx.Whisper_reset_timings()
|
|
||||||
}
|
|
||||||
|
|
||||||
// PrintTimings prints the model timings to stdout.
|
|
||||||
func (context *context) PrintTimings() {
|
|
||||||
context.model.ctx.Whisper_print_timings()
|
|
||||||
}
|
|
||||||
|
|
||||||
// SystemInfo returns the system information
|
|
||||||
func (context *context) SystemInfo() string {
|
|
||||||
return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n",
|
|
||||||
context.params.Threads(),
|
|
||||||
runtime.NumCPU(),
|
|
||||||
whisper.Whisper_print_system_info(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use mel data at offset_ms to try and auto-detect the spoken language
|
|
||||||
// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
|
||||||
// Returns the probabilities of all languages.
|
|
||||||
func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) {
|
|
||||||
langProbs, err := context.model.ctx.Whisper_lang_auto_detect(offset_ms, n_threads)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return langProbs, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process new sample data and return any errors
|
// Process new sample data and return any errors
|
||||||
func (context *context) Process(
|
func (context *context) Process(data []float32, cb SegmentCallback) error {
|
||||||
data []float32,
|
|
||||||
callEncoderBegin EncoderBeginCallback,
|
|
||||||
callNewSegment SegmentCallback,
|
|
||||||
callProgress ProgressCallback,
|
|
||||||
) error {
|
|
||||||
if context.model.ctx == nil {
|
if context.model.ctx == nil {
|
||||||
return ErrInternalAppError
|
return ErrInternalAppError
|
||||||
}
|
}
|
||||||
// If the callback is defined then we force on single_segment mode
|
// If the callback is defined then we force on single_segment mode
|
||||||
if callNewSegment != nil {
|
if cb != nil {
|
||||||
context.params.SetSingleSegment(true)
|
context.params.SetSingleSegment(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// We don't do parallel processing at the moment
|
// We don't do parallel processing at the moment
|
||||||
processors := 0
|
processors := 0
|
||||||
if processors > 1 {
|
if processors > 1 {
|
||||||
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, callEncoderBegin,
|
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
|
||||||
func(new int) {
|
if cb != nil {
|
||||||
if callNewSegment != nil {
|
|
||||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
|
||||||
s0 := num_segments - new
|
|
||||||
for i := s0; i < num_segments; i++ {
|
|
||||||
callNewSegment(toSegment(context.model.ctx, i))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
} else if err := context.model.ctx.Whisper_full(context.params, data, callEncoderBegin,
|
|
||||||
func(new int) {
|
|
||||||
if callNewSegment != nil {
|
|
||||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||||
s0 := num_segments - new
|
s0 := num_segments - new
|
||||||
for i := s0; i < num_segments; i++ {
|
for i := s0; i < num_segments; i++ {
|
||||||
callNewSegment(toSegment(context.model.ctx, i))
|
cb(toSegment(context.model.ctx, i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, func(progress int) {
|
|
||||||
if callProgress != nil {
|
|
||||||
callProgress(progress)
|
|
||||||
}
|
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
|
||||||
|
if cb != nil {
|
||||||
|
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||||
|
s0 := num_segments - new
|
||||||
|
for i := s0; i < num_segments; i++ {
|
||||||
|
cb(toSegment(context.model.ctx, i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,65 +119,6 @@ func (context *context) NextSegment() (Segment, error) {
|
|||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test for text tokens
|
|
||||||
func (context *context) IsText(t Token) bool {
|
|
||||||
switch {
|
|
||||||
case context.IsBEG(t):
|
|
||||||
return false
|
|
||||||
case context.IsSOT(t):
|
|
||||||
return false
|
|
||||||
case whisper.Token(t.Id) >= context.model.ctx.Whisper_token_eot():
|
|
||||||
return false
|
|
||||||
case context.IsPREV(t):
|
|
||||||
return false
|
|
||||||
case context.IsSOLM(t):
|
|
||||||
return false
|
|
||||||
case context.IsNOT(t):
|
|
||||||
return false
|
|
||||||
default:
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for "begin" token
|
|
||||||
func (context *context) IsBEG(t Token) bool {
|
|
||||||
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_beg()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for "start of transcription" token
|
|
||||||
func (context *context) IsSOT(t Token) bool {
|
|
||||||
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_sot()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for "end of transcription" token
|
|
||||||
func (context *context) IsEOT(t Token) bool {
|
|
||||||
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_eot()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for "start of prev" token
|
|
||||||
func (context *context) IsPREV(t Token) bool {
|
|
||||||
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_prev()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for "start of lm" token
|
|
||||||
func (context *context) IsSOLM(t Token) bool {
|
|
||||||
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_solm()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for "No timestamps" token
|
|
||||||
func (context *context) IsNOT(t Token) bool {
|
|
||||||
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_not()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test for token associated with a specific language
|
|
||||||
func (context *context) IsLANG(t Token, lang string) bool {
|
|
||||||
if id := context.model.ctx.Whisper_lang_id(lang); id >= 0 {
|
|
||||||
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_lang(id)
|
|
||||||
} else {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// PRIVATE METHODS
|
// PRIVATE METHODS
|
||||||
|
|
||||||
@ -335,14 +135,10 @@ func toSegment(ctx *whisper.Context, n int) Segment {
|
|||||||
func toTokens(ctx *whisper.Context, n int) []Token {
|
func toTokens(ctx *whisper.Context, n int) []Token {
|
||||||
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
||||||
for i := 0; i < len(result); i++ {
|
for i := 0; i < len(result); i++ {
|
||||||
data := ctx.Whisper_full_get_token_data(n, i)
|
|
||||||
|
|
||||||
result[i] = Token{
|
result[i] = Token{
|
||||||
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
||||||
Text: ctx.Whisper_full_get_token_text(n, i),
|
Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
|
||||||
P: ctx.Whisper_full_get_token_p(n, i),
|
P: ctx.Whisper_full_get_token_p(n, i),
|
||||||
Start: time.Duration(data.T0()) * time.Millisecond * 10,
|
|
||||||
End: time.Duration(data.T1()) * time.Millisecond * 10,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
@ -4,121 +4,52 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
// Packages
|
||||||
"github.com/go-audio/wav"
|
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
assert "github.com/stretchr/testify/assert"
|
assert "github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestSetLanguage(t *testing.T) {
|
const (
|
||||||
assert := assert.New(t)
|
ModelPath = "../../models/ggml-tiny.bin"
|
||||||
|
SamplePath = "../../samples/jfk.wav"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test_Whisper_000(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, model not found:", ModelPath)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load model
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
assert.NoError(model.Close())
|
||||||
|
|
||||||
|
t.Log("languages=", model.Languages())
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_Whisper_001(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, model not found:", ModelPath)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||||
|
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load model
|
||||||
model, err := whisper.New(ModelPath)
|
model, err := whisper.New(ModelPath)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
assert.NotNil(model)
|
assert.NotNil(model)
|
||||||
defer model.Close()
|
defer model.Close()
|
||||||
|
|
||||||
context, err := model.NewContext()
|
// Get context for decoding
|
||||||
|
ctx, err := model.NewContext()
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
assert.NotNil(ctx)
|
||||||
|
|
||||||
// This returns an error since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
err = context.SetLanguage("en")
|
|
||||||
assert.Error(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestContextModelIsMultilingual(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
isMultilingual := context.IsMultilingual()
|
|
||||||
|
|
||||||
// This returns false since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
assert.False(isMultilingual)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLanguage(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
// This always returns en since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
expectedLanguage := "en"
|
|
||||||
actualLanguage := context.Language()
|
|
||||||
assert.Equal(expectedLanguage, actualLanguage)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProcess(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
fh, err := os.Open(SamplePath)
|
|
||||||
assert.NoError(err)
|
|
||||||
defer fh.Close()
|
|
||||||
|
|
||||||
// Decode the WAV file - load the full buffer
|
|
||||||
dec := wav.NewDecoder(fh)
|
|
||||||
buf, err := dec.FullPCMBuffer()
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.Equal(uint16(1), dec.NumChans)
|
|
||||||
|
|
||||||
data := buf.AsFloat32Buffer().Data
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
err = context.Process(data, nil, nil, nil)
|
|
||||||
assert.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDetectedLanguage(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
fh, err := os.Open(SamplePath)
|
|
||||||
assert.NoError(err)
|
|
||||||
defer fh.Close()
|
|
||||||
|
|
||||||
// Decode the WAV file - load the full buffer
|
|
||||||
dec := wav.NewDecoder(fh)
|
|
||||||
buf, err := dec.FullPCMBuffer()
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.Equal(uint16(1), dec.NumChans)
|
|
||||||
|
|
||||||
data := buf.AsFloat32Buffer().Data
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
err = context.Process(data, nil, nil, nil)
|
|
||||||
assert.NoError(err)
|
|
||||||
|
|
||||||
expectedLanguage := "en"
|
|
||||||
actualLanguage := context.DetectedLanguage()
|
|
||||||
assert.Equal(expectedLanguage, actualLanguage)
|
|
||||||
}
|
}
|
||||||
|
@ -12,14 +12,6 @@ import (
|
|||||||
// time. It is called during the Process function
|
// time. It is called during the Process function
|
||||||
type SegmentCallback func(Segment)
|
type SegmentCallback func(Segment)
|
||||||
|
|
||||||
// ProgressCallback is the callback function for reporting progress during
|
|
||||||
// processing. It is called during the Process function
|
|
||||||
type ProgressCallback func(int)
|
|
||||||
|
|
||||||
// EncoderBeginCallback is the callback function for checking if we want to
|
|
||||||
// continue processing. It is called during the Process function
|
|
||||||
type EncoderBeginCallback func() bool
|
|
||||||
|
|
||||||
// Model is the interface to a whisper model. Create a new model with the
|
// Model is the interface to a whisper model. Create a new model with the
|
||||||
// function whisper.New(string)
|
// function whisper.New(string)
|
||||||
type Model interface {
|
type Model interface {
|
||||||
@ -28,61 +20,24 @@ type Model interface {
|
|||||||
// Return a new speech-to-text context.
|
// Return a new speech-to-text context.
|
||||||
NewContext() (Context, error)
|
NewContext() (Context, error)
|
||||||
|
|
||||||
// Return true if the model is multilingual.
|
|
||||||
IsMultilingual() bool
|
|
||||||
|
|
||||||
// Return all languages supported.
|
// Return all languages supported.
|
||||||
Languages() []string
|
Languages() []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Context is the speech recognition context.
|
// Context is the speach recognition context.
|
||||||
type Context interface {
|
type Context interface {
|
||||||
SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
|
SetLanguage(string) error // Set the language to use for speech recognition.
|
||||||
SetTranslate(bool) // Set translate flag
|
|
||||||
IsMultilingual() bool // Return true if the model is multilingual.
|
|
||||||
Language() string // Get language
|
Language() string // Get language
|
||||||
DetectedLanguage() string // Get detected language
|
SetSpeedup(bool) // Set speedup flag
|
||||||
|
|
||||||
SetOffset(time.Duration) // Set offset
|
|
||||||
SetDuration(time.Duration) // Set duration
|
|
||||||
SetThreads(uint) // Set number of threads to use
|
|
||||||
SetSplitOnWord(bool) // Set split on word flag
|
|
||||||
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
|
||||||
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
|
||||||
SetMaxSegmentLength(uint) // Set max segment length in characters
|
|
||||||
SetTokenTimestamps(bool) // Set token timestamps flag
|
|
||||||
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
|
||||||
SetAudioCtx(uint) // Set audio encoder context
|
|
||||||
SetMaxContext(n int) // Set maximum number of text context tokens to store
|
|
||||||
SetBeamSize(n int) // Set Beam Size
|
|
||||||
SetEntropyThold(t float32) // Set Entropy threshold
|
|
||||||
SetInitialPrompt(prompt string) // Set initial prompt
|
|
||||||
SetTemperature(t float32) // Set temperature
|
|
||||||
SetTemperatureFallback(t float32) // Set temperature incrementation
|
|
||||||
|
|
||||||
// Process mono audio data and return any errors.
|
// Process mono audio data and return any errors.
|
||||||
// If defined, newly generated segments are passed to the
|
// If defined, newly generated segments are passed to the
|
||||||
// callback function during processing.
|
// callback function during processing.
|
||||||
Process([]float32, EncoderBeginCallback, SegmentCallback, ProgressCallback) error
|
Process([]float32, SegmentCallback) error
|
||||||
|
|
||||||
// After process is called, return segments until the end of the stream
|
// After process is called, return segments until the end of the stream
|
||||||
// is reached, when io.EOF is returned.
|
// is reached, when io.EOF is returned.
|
||||||
NextSegment() (Segment, error)
|
NextSegment() (Segment, error)
|
||||||
|
|
||||||
IsBEG(Token) bool // Test for "begin" token
|
|
||||||
IsSOT(Token) bool // Test for "start of transcription" token
|
|
||||||
IsEOT(Token) bool // Test for "end of transcription" token
|
|
||||||
IsPREV(Token) bool // Test for "start of prev" token
|
|
||||||
IsSOLM(Token) bool // Test for "start of lm" token
|
|
||||||
IsNOT(Token) bool // Test for "No timestamps" token
|
|
||||||
IsLANG(Token, string) bool // Test for token associated with a specific language
|
|
||||||
IsText(Token) bool // Test for text token
|
|
||||||
|
|
||||||
// Timings
|
|
||||||
PrintTimings()
|
|
||||||
ResetTimings()
|
|
||||||
|
|
||||||
SystemInfo() string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Segment is the text result of a speech recognition.
|
// Segment is the text result of a speech recognition.
|
||||||
@ -102,8 +57,7 @@ type Segment struct {
|
|||||||
|
|
||||||
// Token is a text or special token
|
// Token is a text or special token
|
||||||
type Token struct {
|
type Token struct {
|
||||||
Id int
|
Id int
|
||||||
Text string
|
Text string
|
||||||
P float32
|
P float32
|
||||||
Start, End time.Duration
|
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ var _ Model = (*model)(nil)
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// LIFECYCLE
|
// LIFECYCLE
|
||||||
|
|
||||||
func New(path string) (Model, error) {
|
func New(path string) (*model, error) {
|
||||||
model := new(model)
|
model := new(model)
|
||||||
if _, err := os.Stat(path); err != nil {
|
if _, err := os.Stat(path); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -64,11 +64,6 @@ func (model *model) String() string {
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// PUBLIC METHODS
|
// PUBLIC METHODS
|
||||||
|
|
||||||
// Return true if model is multilingual (language and translation options are supported)
|
|
||||||
func (model *model) IsMultilingual() bool {
|
|
||||||
return model.ctx.Whisper_is_multilingual() != 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return all recognized languages. Initially it is set to auto-detect
|
// Return all recognized languages. Initially it is set to auto-detect
|
||||||
func (model *model) Languages() []string {
|
func (model *model) Languages() []string {
|
||||||
result := make([]string, 0, whisper.Whisper_lang_max_id())
|
result := make([]string, 0, whisper.Whisper_lang_max_id())
|
||||||
@ -94,8 +89,7 @@ func (model *model) NewContext() (Context, error) {
|
|||||||
params.SetPrintRealtime(false)
|
params.SetPrintRealtime(false)
|
||||||
params.SetPrintTimestamps(false)
|
params.SetPrintTimestamps(false)
|
||||||
params.SetThreads(runtime.NumCPU())
|
params.SetThreads(runtime.NumCPU())
|
||||||
params.SetNoContext(true)
|
|
||||||
|
|
||||||
// Return new context
|
// Return new context
|
||||||
return newContext(model, params)
|
return NewContext(model, params)
|
||||||
}
|
}
|
||||||
|
@ -1,91 +0,0 @@
|
|||||||
package whisper_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
||||||
assert "github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNew(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
t.Run("valid model path", func(t *testing.T) {
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("invalid model path", func(t *testing.T) {
|
|
||||||
invalidModelPath := "invalid-model-path.bin"
|
|
||||||
model, err := whisper.New(invalidModelPath)
|
|
||||||
assert.Error(err)
|
|
||||||
assert.Nil(model)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestClose(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
|
|
||||||
err = model.Close()
|
|
||||||
assert.NoError(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNewContext(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
context, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(context)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsMultilingual(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
isMultilingual := model.IsMultilingual()
|
|
||||||
|
|
||||||
// This returns false since
|
|
||||||
// the model 'models/ggml-small.en.bin'
|
|
||||||
// that is loaded is not multilingual
|
|
||||||
assert.False(isMultilingual)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLanguages(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
defer model.Close()
|
|
||||||
|
|
||||||
expectedLanguages := []string{
|
|
||||||
"en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl",
|
|
||||||
"ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk",
|
|
||||||
"el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr",
|
|
||||||
"bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn",
|
|
||||||
"sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne",
|
|
||||||
"mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn",
|
|
||||||
"yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi",
|
|
||||||
"lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my",
|
|
||||||
"bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su",
|
|
||||||
}
|
|
||||||
|
|
||||||
actualLanguages := model.Languages()
|
|
||||||
|
|
||||||
assert.Equal(expectedLanguages, actualLanguages)
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
package whisper_test
|
|
||||||
|
|
||||||
const (
|
|
||||||
ModelPath = "../../models/ggml-small.en.bin"
|
|
||||||
SamplePath = "../../samples/jfk.wav"
|
|
||||||
)
|
|
@ -9,37 +9,27 @@ import (
|
|||||||
// CGO
|
// CGO
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++ -fopenmp
|
#cgo LDFLAGS: -lwhisper -lm -lstdc++
|
||||||
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
#cgo darwin LDFLAGS: -framework Accelerate
|
||||||
#include <whisper.h>
|
#include <whisper.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
extern void callNewSegment(void* user_data, int new);
|
extern void callNewSegment(void* user_data, int new);
|
||||||
extern void callProgress(void* user_data, int progress);
|
|
||||||
extern bool callEncoderBegin(void* user_data);
|
extern bool callEncoderBegin(void* user_data);
|
||||||
|
|
||||||
// Text segment callback
|
// Text segment callback
|
||||||
// Called on every newly generated text segment
|
// Called on every newly generated text segment
|
||||||
// Use the whisper_full_...() functions to obtain the text segments
|
// Use the whisper_full_...() functions to obtain the text segments
|
||||||
static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_state* state, int n_new, void* user_data) {
|
static void whisper_new_segment_cb(struct whisper_context* ctx, int n_new, void* user_data) {
|
||||||
if(user_data != NULL && ctx != NULL) {
|
if(user_data != NULL && ctx != NULL) {
|
||||||
callNewSegment(user_data, n_new);
|
callNewSegment(user_data, n_new);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Progress callback
|
|
||||||
// Called on every newly generated text segment
|
|
||||||
// Use the whisper_full_...() functions to obtain the text segments
|
|
||||||
static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
|
|
||||||
if(user_data != NULL && ctx != NULL) {
|
|
||||||
callProgress(user_data, progress);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encoder begin callback
|
// Encoder begin callback
|
||||||
// If not NULL, called before the encoder starts
|
// If not NULL, called before the encoder starts
|
||||||
// If it returns false, the computation is aborted
|
// If it returns false, the computation is aborted
|
||||||
static bool whisper_encoder_begin_cb(struct whisper_context* ctx, struct whisper_state* state, void* user_data) {
|
static bool whisper_encoder_begin_cb(struct whisper_context* ctx, void* user_data) {
|
||||||
if(user_data != NULL && ctx != NULL) {
|
if(user_data != NULL && ctx != NULL) {
|
||||||
return callEncoderBegin(user_data);
|
return callEncoderBegin(user_data);
|
||||||
}
|
}
|
||||||
@ -53,8 +43,6 @@ static struct whisper_full_params whisper_full_default_params_cb(struct whisper_
|
|||||||
params.new_segment_callback_user_data = (void*)(ctx);
|
params.new_segment_callback_user_data = (void*)(ctx);
|
||||||
params.encoder_begin_callback = whisper_encoder_begin_cb;
|
params.encoder_begin_callback = whisper_encoder_begin_cb;
|
||||||
params.encoder_begin_callback_user_data = (void*)(ctx);
|
params.encoder_begin_callback_user_data = (void*)(ctx);
|
||||||
params.progress_callback = whisper_progress_cb;
|
|
||||||
params.progress_callback_user_data = (void*)(ctx);
|
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
@ -83,6 +71,7 @@ const (
|
|||||||
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
|
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
|
||||||
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
|
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
|
||||||
NumFFT = C.WHISPER_N_FFT
|
NumFFT = C.WHISPER_N_FFT
|
||||||
|
NumMEL = C.WHISPER_N_MEL
|
||||||
HopLength = C.WHISPER_HOP_LENGTH
|
HopLength = C.WHISPER_HOP_LENGTH
|
||||||
ChunkSize = C.WHISPER_CHUNK_SIZE
|
ChunkSize = C.WHISPER_CHUNK_SIZE
|
||||||
)
|
)
|
||||||
@ -102,7 +91,7 @@ var (
|
|||||||
func Whisper_init(path string) *Context {
|
func Whisper_init(path string) *Context {
|
||||||
cPath := C.CString(path)
|
cPath := C.CString(path)
|
||||||
defer C.free(unsafe.Pointer(cPath))
|
defer C.free(unsafe.Pointer(cPath))
|
||||||
if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
|
if ctx := C.whisper_init(cPath); ctx != nil {
|
||||||
return (*Context)(ctx)
|
return (*Context)(ctx)
|
||||||
} else {
|
} else {
|
||||||
return nil
|
return nil
|
||||||
@ -158,6 +147,16 @@ func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// whisper_sample_best() returns the token with the highest probability
|
||||||
|
func (ctx *Context) Whisper_sample_best() TokenData {
|
||||||
|
return TokenData(C.whisper_sample_best((*C.struct_whisper_context)(ctx)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// whisper_sample_timestamp() returns the most probable timestamp token
|
||||||
|
func (ctx *Context) Whisper_sample_timestamp(is_initial bool) TokenData {
|
||||||
|
return TokenData(C.whisper_sample_timestamp((*C.struct_whisper_context)(ctx), C.bool(is_initial)))
|
||||||
|
}
|
||||||
|
|
||||||
// Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
|
// Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
|
||||||
// Returns the number of tokens on success
|
// Returns the number of tokens on success
|
||||||
func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
|
func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
|
||||||
@ -269,13 +268,13 @@ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Task tokens
|
// Task tokens
|
||||||
func (ctx *Context) Whisper_token_translate() Token {
|
func Whisper_token_translate() Token {
|
||||||
return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
|
return Token(C.whisper_token_translate())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Task tokens
|
// Task tokens
|
||||||
func (ctx *Context) Whisper_token_transcribe() Token {
|
func Whisper_token_transcribe() Token {
|
||||||
return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
|
return Token(C.whisper_token_transcribe())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Performance information
|
// Performance information
|
||||||
@ -301,19 +300,11 @@ func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Param
|
|||||||
|
|
||||||
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||||
// Uses the specified decoding strategy to obtain the text.
|
// Uses the specified decoding strategy to obtain the text.
|
||||||
func (ctx *Context) Whisper_full(
|
func (ctx *Context) Whisper_full(params Params, samples []float32, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
|
||||||
params Params,
|
|
||||||
samples []float32,
|
|
||||||
encoderBeginCallback func() bool,
|
|
||||||
newSegmentCallback func(int),
|
|
||||||
progressCallback func(int),
|
|
||||||
) error {
|
|
||||||
registerEncoderBeginCallback(ctx, encoderBeginCallback)
|
registerEncoderBeginCallback(ctx, encoderBeginCallback)
|
||||||
registerNewSegmentCallback(ctx, newSegmentCallback)
|
registerNewSegmentCallback(ctx, newSegmentCallback)
|
||||||
registerProgressCallback(ctx, progressCallback)
|
|
||||||
defer registerEncoderBeginCallback(ctx, nil)
|
defer registerEncoderBeginCallback(ctx, nil)
|
||||||
defer registerNewSegmentCallback(ctx, nil)
|
defer registerNewSegmentCallback(ctx, nil)
|
||||||
defer registerProgressCallback(ctx, nil)
|
|
||||||
if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
|
if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
|
||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
@ -337,18 +328,6 @@ func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, proc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the id of the autodetected language, returns -1 if not found
|
|
||||||
// Added to whisper.cpp in
|
|
||||||
// https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
|
|
||||||
//
|
|
||||||
// Examples:
|
|
||||||
//
|
|
||||||
// "de" -> 2
|
|
||||||
// "german" -> 2
|
|
||||||
func (ctx *Context) Whisper_full_lang_id() int {
|
|
||||||
return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Number of generated text segments.
|
// Number of generated text segments.
|
||||||
// A segment can be a few words, a sentence, or even a paragraph.
|
// A segment can be a few words, a sentence, or even a paragraph.
|
||||||
func (ctx *Context) Whisper_full_n_segments() int {
|
func (ctx *Context) Whisper_full_n_segments() int {
|
||||||
@ -387,7 +366,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
|
|||||||
|
|
||||||
// Get token data for the specified token in the specified segment.
|
// Get token data for the specified token in the specified segment.
|
||||||
// This contains probabilities, timestamps, etc.
|
// This contains probabilities, timestamps, etc.
|
||||||
func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
|
func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
|
||||||
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -401,7 +380,6 @@ func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
cbNewSegment = make(map[unsafe.Pointer]func(int))
|
cbNewSegment = make(map[unsafe.Pointer]func(int))
|
||||||
cbProgress = make(map[unsafe.Pointer]func(int))
|
|
||||||
cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
|
cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -413,14 +391,6 @@ func registerNewSegmentCallback(ctx *Context, fn func(int)) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func registerProgressCallback(ctx *Context, fn func(int)) {
|
|
||||||
if fn == nil {
|
|
||||||
delete(cbProgress, unsafe.Pointer(ctx))
|
|
||||||
} else {
|
|
||||||
cbProgress[unsafe.Pointer(ctx)] = fn
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
|
func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
|
||||||
if fn == nil {
|
if fn == nil {
|
||||||
delete(cbEncoderBegin, unsafe.Pointer(ctx))
|
delete(cbEncoderBegin, unsafe.Pointer(ctx))
|
||||||
@ -436,13 +406,6 @@ func callNewSegment(user_data unsafe.Pointer, new C.int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//export callProgress
|
|
||||||
func callProgress(user_data unsafe.Pointer, progress C.int) {
|
|
||||||
if fn, ok := cbProgress[user_data]; ok {
|
|
||||||
fn(int(progress))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//export callEncoderBegin
|
//export callEncoderBegin
|
||||||
func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
||||||
if fn, ok := cbEncoderBegin[user_data]; ok {
|
if fn, ok := cbEncoderBegin[user_data]; ok {
|
||||||
@ -454,15 +417,3 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t TokenData) T0() int64 {
|
|
||||||
return int64(t.t0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t TokenData) T1() int64 {
|
|
||||||
return int64(t.t1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t TokenData) Id() Token {
|
|
||||||
return Token(t.id)
|
|
||||||
}
|
|
||||||
|
@ -52,7 +52,7 @@ func Test_Whisper_001(t *testing.T) {
|
|||||||
defer ctx.Whisper_free()
|
defer ctx.Whisper_free()
|
||||||
params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
|
params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
|
||||||
data := buf.AsFloat32Buffer().Data
|
data := buf.AsFloat32Buffer().Data
|
||||||
err = ctx.Whisper_full(params, data, nil, nil, nil)
|
err = ctx.Whisper_full(params, data, nil, nil)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
// Print out tokens
|
// Print out tokens
|
||||||
|
1
bindings/ios
Submodule
1
bindings/ios
Submodule
Submodule bindings/ios added at 6707f1ea1c
124
bindings/java/.idea/uiDesigner.xml
generated
124
bindings/java/.idea/uiDesigner.xml
generated
@ -1,124 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="Palette2">
|
|
||||||
<group name="Swing">
|
|
||||||
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
|
|
||||||
</item>
|
|
||||||
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
|
|
||||||
<initial-values>
|
|
||||||
<property name="text" value="Button" />
|
|
||||||
</initial-values>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
|
||||||
<initial-values>
|
|
||||||
<property name="text" value="RadioButton" />
|
|
||||||
</initial-values>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
|
||||||
<initial-values>
|
|
||||||
<property name="text" value="CheckBox" />
|
|
||||||
</initial-values>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
|
|
||||||
<initial-values>
|
|
||||||
<property name="text" value="Label" />
|
|
||||||
</initial-values>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
|
||||||
<preferred-size width="150" height="-1" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
|
||||||
<preferred-size width="150" height="-1" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
|
||||||
<preferred-size width="150" height="-1" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
|
||||||
<preferred-size width="150" height="50" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
|
||||||
<preferred-size width="150" height="50" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
|
||||||
<preferred-size width="150" height="50" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
|
||||||
<preferred-size width="150" height="50" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
|
|
||||||
<preferred-size width="150" height="50" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
|
||||||
<preferred-size width="150" height="50" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
|
||||||
<preferred-size width="200" height="200" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
|
||||||
<preferred-size width="200" height="200" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
|
|
||||||
<preferred-size width="-1" height="20" />
|
|
||||||
</default-constraints>
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
|
|
||||||
</item>
|
|
||||||
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
|
||||||
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
|
|
||||||
</item>
|
|
||||||
</group>
|
|
||||||
</component>
|
|
||||||
</project>
|
|
@ -1,87 +0,0 @@
|
|||||||
# Java JNI bindings for Whisper
|
|
||||||
|
|
||||||
This package provides Java JNI bindings for whisper.cpp. They have been tested on:
|
|
||||||
|
|
||||||
* <strike>Darwin (OS X) 12.6 on x64_64</strike>
|
|
||||||
* Ubuntu on x86_64
|
|
||||||
* Windows on x86_64
|
|
||||||
|
|
||||||
The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
|
|
||||||
|
|
||||||
JNA will attempt to load the `whispercpp` shared library from:
|
|
||||||
|
|
||||||
- jna.library.path
|
|
||||||
- jna.platform.library
|
|
||||||
- ~/Library/Frameworks
|
|
||||||
- /Library/Frameworks
|
|
||||||
- /System/Library/Frameworks
|
|
||||||
- classpath
|
|
||||||
|
|
||||||
```java
|
|
||||||
import io.github.ggerganov.whispercpp.WhisperCpp;
|
|
||||||
|
|
||||||
public class Example {
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
|
|
||||||
WhisperCpp whisper = new WhisperCpp();
|
|
||||||
try {
|
|
||||||
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
|
||||||
// or you can provide the absolute path to the model file.
|
|
||||||
whisper.initContext("../ggml-base.en.bin");
|
|
||||||
WhisperFullParams.ByValue whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
|
||||||
|
|
||||||
// custom configuration if required
|
|
||||||
//whisperParams.n_threads = 8;
|
|
||||||
whisperParams.temperature = 0.0f;
|
|
||||||
whisperParams.temperature_inc = 0.2f;
|
|
||||||
//whisperParams.language = "en";
|
|
||||||
|
|
||||||
float[] samples = readAudio(); // divide each value by 32767.0f
|
|
||||||
List<WhisperSegment> whisperSegmentList = whisper.fullTranscribeWithTime(whisperParams, samples);
|
|
||||||
|
|
||||||
for (WhisperSegment whisperSegment : whisperSegmentList) {
|
|
||||||
|
|
||||||
long start = whisperSegment.getStart();
|
|
||||||
long end = whisperSegment.getEnd();
|
|
||||||
|
|
||||||
String text = whisperSegment.getSentence();
|
|
||||||
|
|
||||||
System.out.println("start: "+start);
|
|
||||||
System.out.println("end: "+end);
|
|
||||||
System.out.println("text: "+text);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
whisper.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Building & Testing
|
|
||||||
|
|
||||||
In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/ggml-org/whisper.cpp.git
|
|
||||||
cd whisper.cpp/bindings/java
|
|
||||||
|
|
||||||
./gradlew build
|
|
||||||
```
|
|
||||||
|
|
||||||
You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
The license for the Java bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
|
|
||||||
|
|
@ -1,159 +0,0 @@
|
|||||||
plugins {
|
|
||||||
id 'java'
|
|
||||||
id 'java-library'
|
|
||||||
id 'maven-publish'
|
|
||||||
id 'signing'
|
|
||||||
}
|
|
||||||
|
|
||||||
archivesBaseName = 'whispercpp'
|
|
||||||
group = 'io.github.ggerganov'
|
|
||||||
version = '1.4.0'
|
|
||||||
|
|
||||||
|
|
||||||
sourceCompatibility = 1.8
|
|
||||||
targetCompatibility = 1.8
|
|
||||||
|
|
||||||
sourceSets {
|
|
||||||
main {
|
|
||||||
resources {
|
|
||||||
srcDirs = ['src/main/resources', 'build/generated/resources/main']
|
|
||||||
}
|
|
||||||
}
|
|
||||||
test {
|
|
||||||
runtimeClasspath += files('build/generated/resources/main')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('copyLibwhisperDynlib', Copy) {
|
|
||||||
from '../../build/src'
|
|
||||||
include 'libwhisper.dylib'
|
|
||||||
into 'build/generated/resources/main'
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('copyLibwhisperSo', Copy) {
|
|
||||||
from '../../build/src'
|
|
||||||
include 'libwhisper.so'
|
|
||||||
into 'build/generated/resources/main'
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('copyWhisperDLL', Copy) {
|
|
||||||
from '../../build/bin/Release'
|
|
||||||
include 'whisper.dll'
|
|
||||||
into 'build/generated/resources/main'
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('copyGGML_BASE_DLL', Copy) {
|
|
||||||
from '../../build/bin/Release'
|
|
||||||
include 'ggml-base.dll'
|
|
||||||
into 'build/generated/resources/main'
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('copyGGML_DLL', Copy) {
|
|
||||||
from '../../build/bin/Release'
|
|
||||||
include 'ggml.dll'
|
|
||||||
into 'build/generated/resources/main'
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('copyGGML_CPU_DLL', Copy) {
|
|
||||||
from '../../build/bin/Release'
|
|
||||||
include 'ggml-cpu.dll'
|
|
||||||
into 'build/generated/resources/main'
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('copyLibs') {
|
|
||||||
dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDLL, copyGGML_BASE_DLL, copyGGML_DLL, copyGGML_CPU_DLL
|
|
||||||
}
|
|
||||||
|
|
||||||
test {
|
|
||||||
systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
|
|
||||||
}
|
|
||||||
|
|
||||||
java {
|
|
||||||
withSourcesJar()
|
|
||||||
withJavadocJar()
|
|
||||||
}
|
|
||||||
|
|
||||||
sourcesJar() {
|
|
||||||
dependsOn copyLibs
|
|
||||||
}
|
|
||||||
|
|
||||||
jar {
|
|
||||||
dependsOn copyLibs
|
|
||||||
exclude '**/whisper_java.exp', '**/whisper_java.lib'
|
|
||||||
}
|
|
||||||
|
|
||||||
javadoc {
|
|
||||||
options.addStringOption('Xdoclint:none', '-quiet')
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.withType(Test) {
|
|
||||||
useJUnitPlatform()
|
|
||||||
}
|
|
||||||
|
|
||||||
test.dependsOn copyLibs
|
|
||||||
processResources.dependsOn copyLibs
|
|
||||||
|
|
||||||
dependencies {
|
|
||||||
implementation "net.java.dev.jna:jna:5.13.0"
|
|
||||||
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
|
|
||||||
testImplementation "org.assertj:assertj-core:3.24.2"
|
|
||||||
}
|
|
||||||
|
|
||||||
repositories {
|
|
||||||
mavenCentral()
|
|
||||||
}
|
|
||||||
|
|
||||||
publishing {
|
|
||||||
publications {
|
|
||||||
mavenJava(MavenPublication) {
|
|
||||||
artifactId = 'whispercpp'
|
|
||||||
from components.java
|
|
||||||
pom {
|
|
||||||
name = 'whispercpp'
|
|
||||||
description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
|
|
||||||
url = 'https://github.com/ggerganov/whisper.cpp'
|
|
||||||
licenses {
|
|
||||||
license {
|
|
||||||
name = 'MIT licence'
|
|
||||||
url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
developers {
|
|
||||||
developer {
|
|
||||||
id = 'ggerganov'
|
|
||||||
name = 'Georgi Gerganov'
|
|
||||||
email = 'ggerganov@gmail.com'
|
|
||||||
}
|
|
||||||
developer {
|
|
||||||
id = 'nalbion'
|
|
||||||
name = 'Nicholas Albion'
|
|
||||||
email = 'nalbion@yahoo.com'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
scm {
|
|
||||||
connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
|
|
||||||
url = 'https://github.com/ggerganov/whisper.cpp'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
repositories {
|
|
||||||
maven {
|
|
||||||
def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
|
|
||||||
def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
|
|
||||||
url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
|
|
||||||
credentials {
|
|
||||||
username = System.getenv("MAVEN_USERNAME")
|
|
||||||
password = System.getenv("MAVEN_PASSWORD")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
signing {
|
|
||||||
def signingKey = System.getenv("PGP_SECRET")
|
|
||||||
def signingPassword = System.getenv("PGP_PASSPHRASE")
|
|
||||||
useInMemoryPgpKeys(signingKey, signingPassword)
|
|
||||||
sign publishing.publications.mavenJava
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
org.gradle.jvmargs=-Xms256m -Xmx1024m
|
|
||||||
system.include.dir=/usr/include
|
|
||||||
#system.local.include.dir=../../include
|
|
||||||
system.local.include.dir=./build/generated/sources/headers/java/main
|
|
||||||
jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
|
|
||||||
jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
|
|
BIN
bindings/java/gradle/wrapper/gradle-wrapper.jar
vendored
BIN
bindings/java/gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
@ -1,6 +0,0 @@
|
|||||||
distributionBase=GRADLE_USER_HOME
|
|
||||||
distributionPath=wrapper/dists
|
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
|
|
||||||
networkTimeout=10000
|
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
|
||||||
zipStorePath=wrapper/dists
|
|
244
bindings/java/gradlew
vendored
244
bindings/java/gradlew
vendored
@ -1,244 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
#
|
|
||||||
# Copyright © 2015-2021 the original authors.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# https://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
#
|
|
||||||
# Gradle start up script for POSIX generated by Gradle.
|
|
||||||
#
|
|
||||||
# Important for running:
|
|
||||||
#
|
|
||||||
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
|
||||||
# noncompliant, but you have some other compliant shell such as ksh or
|
|
||||||
# bash, then to run this script, type that shell name before the whole
|
|
||||||
# command line, like:
|
|
||||||
#
|
|
||||||
# ksh Gradle
|
|
||||||
#
|
|
||||||
# Busybox and similar reduced shells will NOT work, because this script
|
|
||||||
# requires all of these POSIX shell features:
|
|
||||||
# * functions;
|
|
||||||
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
|
||||||
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
|
||||||
# * compound commands having a testable exit status, especially «case»;
|
|
||||||
# * various built-in commands including «command», «set», and «ulimit».
|
|
||||||
#
|
|
||||||
# Important for patching:
|
|
||||||
#
|
|
||||||
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
|
||||||
# by Bash, Ksh, etc; in particular arrays are avoided.
|
|
||||||
#
|
|
||||||
# The "traditional" practice of packing multiple parameters into a
|
|
||||||
# space-separated string is a well documented source of bugs and security
|
|
||||||
# problems, so this is (mostly) avoided, by progressively accumulating
|
|
||||||
# options in "$@", and eventually passing that to Java.
|
|
||||||
#
|
|
||||||
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
|
||||||
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
|
||||||
# see the in-line comments for details.
|
|
||||||
#
|
|
||||||
# There are tweaks for specific operating systems such as AIX, CygWin,
|
|
||||||
# Darwin, MinGW, and NonStop.
|
|
||||||
#
|
|
||||||
# (3) This script is generated from the Groovy template
|
|
||||||
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
|
||||||
# within the Gradle project.
|
|
||||||
#
|
|
||||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
|
||||||
#
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
# Attempt to set APP_HOME
|
|
||||||
|
|
||||||
# Resolve links: $0 may be a link
|
|
||||||
app_path=$0
|
|
||||||
|
|
||||||
# Need this for daisy-chained symlinks.
|
|
||||||
while
|
|
||||||
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
|
||||||
[ -h "$app_path" ]
|
|
||||||
do
|
|
||||||
ls=$( ls -ld "$app_path" )
|
|
||||||
link=${ls#*' -> '}
|
|
||||||
case $link in #(
|
|
||||||
/*) app_path=$link ;; #(
|
|
||||||
*) app_path=$APP_HOME$link ;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# This is normally unused
|
|
||||||
# shellcheck disable=SC2034
|
|
||||||
APP_BASE_NAME=${0##*/}
|
|
||||||
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
|
||||||
|
|
||||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
|
||||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
|
||||||
|
|
||||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
|
||||||
MAX_FD=maximum
|
|
||||||
|
|
||||||
warn () {
|
|
||||||
echo "$*"
|
|
||||||
} >&2
|
|
||||||
|
|
||||||
die () {
|
|
||||||
echo
|
|
||||||
echo "$*"
|
|
||||||
echo
|
|
||||||
exit 1
|
|
||||||
} >&2
|
|
||||||
|
|
||||||
# OS specific support (must be 'true' or 'false').
|
|
||||||
cygwin=false
|
|
||||||
msys=false
|
|
||||||
darwin=false
|
|
||||||
nonstop=false
|
|
||||||
case "$( uname )" in #(
|
|
||||||
CYGWIN* ) cygwin=true ;; #(
|
|
||||||
Darwin* ) darwin=true ;; #(
|
|
||||||
MSYS* | MINGW* ) msys=true ;; #(
|
|
||||||
NONSTOP* ) nonstop=true ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
|
||||||
|
|
||||||
|
|
||||||
# Determine the Java command to use to start the JVM.
|
|
||||||
if [ -n "$JAVA_HOME" ] ; then
|
|
||||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
|
||||||
# IBM's JDK on AIX uses strange locations for the executables
|
|
||||||
JAVACMD=$JAVA_HOME/jre/sh/java
|
|
||||||
else
|
|
||||||
JAVACMD=$JAVA_HOME/bin/java
|
|
||||||
fi
|
|
||||||
if [ ! -x "$JAVACMD" ] ; then
|
|
||||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
|
||||||
|
|
||||||
Please set the JAVA_HOME variable in your environment to match the
|
|
||||||
location of your Java installation."
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
JAVACMD=java
|
|
||||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
|
||||||
|
|
||||||
Please set the JAVA_HOME variable in your environment to match the
|
|
||||||
location of your Java installation."
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Increase the maximum file descriptors if we can.
|
|
||||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
|
||||||
case $MAX_FD in #(
|
|
||||||
max*)
|
|
||||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
|
||||||
# shellcheck disable=SC3045
|
|
||||||
MAX_FD=$( ulimit -H -n ) ||
|
|
||||||
warn "Could not query maximum file descriptor limit"
|
|
||||||
esac
|
|
||||||
case $MAX_FD in #(
|
|
||||||
'' | soft) :;; #(
|
|
||||||
*)
|
|
||||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
|
||||||
# shellcheck disable=SC3045
|
|
||||||
ulimit -n "$MAX_FD" ||
|
|
||||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
|
||||||
esac
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Collect all arguments for the java command, stacking in reverse order:
|
|
||||||
# * args from the command line
|
|
||||||
# * the main class name
|
|
||||||
# * -classpath
|
|
||||||
# * -D...appname settings
|
|
||||||
# * --module-path (only if needed)
|
|
||||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
|
||||||
|
|
||||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
|
||||||
if "$cygwin" || "$msys" ; then
|
|
||||||
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
|
||||||
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
|
||||||
|
|
||||||
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
|
||||||
|
|
||||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
|
||||||
for arg do
|
|
||||||
if
|
|
||||||
case $arg in #(
|
|
||||||
-*) false ;; # don't mess with options #(
|
|
||||||
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
|
||||||
[ -e "$t" ] ;; #(
|
|
||||||
*) false ;;
|
|
||||||
esac
|
|
||||||
then
|
|
||||||
arg=$( cygpath --path --ignore --mixed "$arg" )
|
|
||||||
fi
|
|
||||||
# Roll the args list around exactly as many times as the number of
|
|
||||||
# args, so each arg winds up back in the position where it started, but
|
|
||||||
# possibly modified.
|
|
||||||
#
|
|
||||||
# NB: a `for` loop captures its iteration list before it begins, so
|
|
||||||
# changing the positional parameters here affects neither the number of
|
|
||||||
# iterations, nor the values presented in `arg`.
|
|
||||||
shift # remove old arg
|
|
||||||
set -- "$@" "$arg" # push replacement arg
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Collect all arguments for the java command;
|
|
||||||
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
|
||||||
# shell script including quotes and variable substitutions, so put them in
|
|
||||||
# double quotes to make sure that they get re-expanded; and
|
|
||||||
# * put everything else in single quotes, so that it's not re-expanded.
|
|
||||||
|
|
||||||
set -- \
|
|
||||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
|
||||||
-classpath "$CLASSPATH" \
|
|
||||||
org.gradle.wrapper.GradleWrapperMain \
|
|
||||||
"$@"
|
|
||||||
|
|
||||||
# Stop when "xargs" is not available.
|
|
||||||
if ! command -v xargs >/dev/null 2>&1
|
|
||||||
then
|
|
||||||
die "xargs is not available"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Use "xargs" to parse quoted args.
|
|
||||||
#
|
|
||||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
|
||||||
#
|
|
||||||
# In Bash we could simply go:
|
|
||||||
#
|
|
||||||
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
|
||||||
# set -- "${ARGS[@]}" "$@"
|
|
||||||
#
|
|
||||||
# but POSIX shell has neither arrays nor command substitution, so instead we
|
|
||||||
# post-process each arg (as a line of input to sed) to backslash-escape any
|
|
||||||
# character that might be a shell metacharacter, then use eval to reverse
|
|
||||||
# that process (while maintaining the separation between arguments), and wrap
|
|
||||||
# the whole thing up as a single "set" statement.
|
|
||||||
#
|
|
||||||
# This will of course break if any of these variables contains a newline or
|
|
||||||
# an unmatched quote.
|
|
||||||
#
|
|
||||||
|
|
||||||
eval "set -- $(
|
|
||||||
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
|
||||||
xargs -n1 |
|
|
||||||
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
|
||||||
tr '\n' ' '
|
|
||||||
)" '"$@"'
|
|
||||||
|
|
||||||
exec "$JAVACMD" "$@"
|
|
92
bindings/java/gradlew.bat
vendored
92
bindings/java/gradlew.bat
vendored
@ -1,92 +0,0 @@
|
|||||||
@rem
|
|
||||||
@rem Copyright 2015 the original author or authors.
|
|
||||||
@rem
|
|
||||||
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
@rem you may not use this file except in compliance with the License.
|
|
||||||
@rem You may obtain a copy of the License at
|
|
||||||
@rem
|
|
||||||
@rem https://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
@rem
|
|
||||||
@rem Unless required by applicable law or agreed to in writing, software
|
|
||||||
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
@rem See the License for the specific language governing permissions and
|
|
||||||
@rem limitations under the License.
|
|
||||||
@rem
|
|
||||||
|
|
||||||
@if "%DEBUG%"=="" @echo off
|
|
||||||
@rem ##########################################################################
|
|
||||||
@rem
|
|
||||||
@rem Gradle startup script for Windows
|
|
||||||
@rem
|
|
||||||
@rem ##########################################################################
|
|
||||||
|
|
||||||
@rem Set local scope for the variables with windows NT shell
|
|
||||||
if "%OS%"=="Windows_NT" setlocal
|
|
||||||
|
|
||||||
set DIRNAME=%~dp0
|
|
||||||
if "%DIRNAME%"=="" set DIRNAME=.
|
|
||||||
@rem This is normally unused
|
|
||||||
set APP_BASE_NAME=%~n0
|
|
||||||
set APP_HOME=%DIRNAME%
|
|
||||||
|
|
||||||
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
|
||||||
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
|
||||||
|
|
||||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
|
||||||
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
|
||||||
|
|
||||||
@rem Find java.exe
|
|
||||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
|
||||||
|
|
||||||
set JAVA_EXE=java.exe
|
|
||||||
%JAVA_EXE% -version >NUL 2>&1
|
|
||||||
if %ERRORLEVEL% equ 0 goto execute
|
|
||||||
|
|
||||||
echo.
|
|
||||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
|
||||||
echo.
|
|
||||||
echo Please set the JAVA_HOME variable in your environment to match the
|
|
||||||
echo location of your Java installation.
|
|
||||||
|
|
||||||
goto fail
|
|
||||||
|
|
||||||
:findJavaFromJavaHome
|
|
||||||
set JAVA_HOME=%JAVA_HOME:"=%
|
|
||||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
|
||||||
|
|
||||||
if exist "%JAVA_EXE%" goto execute
|
|
||||||
|
|
||||||
echo.
|
|
||||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
|
||||||
echo.
|
|
||||||
echo Please set the JAVA_HOME variable in your environment to match the
|
|
||||||
echo location of your Java installation.
|
|
||||||
|
|
||||||
goto fail
|
|
||||||
|
|
||||||
:execute
|
|
||||||
@rem Setup the command line
|
|
||||||
|
|
||||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
|
||||||
|
|
||||||
|
|
||||||
@rem Execute Gradle
|
|
||||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
|
||||||
|
|
||||||
:end
|
|
||||||
@rem End local scope for the variables with windows NT shell
|
|
||||||
if %ERRORLEVEL% equ 0 goto mainEnd
|
|
||||||
|
|
||||||
:fail
|
|
||||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
|
||||||
rem the _cmd.exe /c_ return code!
|
|
||||||
set EXIT_CODE=%ERRORLEVEL%
|
|
||||||
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
|
||||||
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
|
||||||
exit /b %EXIT_CODE%
|
|
||||||
|
|
||||||
:mainEnd
|
|
||||||
if "%OS%"=="Windows_NT" endlocal
|
|
||||||
|
|
||||||
:omega
|
|
@ -1 +0,0 @@
|
|||||||
rootProject.name = "whispercpp"
|
|
@ -1,24 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Presets for alignment heads in DTW token timestamps
|
|
||||||
*/
|
|
||||||
public class WhisperConstants {
|
|
||||||
// Alignment heads presets
|
|
||||||
public static final int WHISPER_AHEADS_NONE = 0;
|
|
||||||
public static final int WHISPER_AHEADS_TINY_EN = 1;
|
|
||||||
public static final int WHISPER_AHEADS_TINY = 2;
|
|
||||||
public static final int WHISPER_AHEADS_BASE_EN = 3;
|
|
||||||
public static final int WHISPER_AHEADS_BASE = 4;
|
|
||||||
public static final int WHISPER_AHEADS_SMALL_EN = 5;
|
|
||||||
public static final int WHISPER_AHEADS_SMALL = 6;
|
|
||||||
public static final int WHISPER_AHEADS_MEDIUM_EN = 7;
|
|
||||||
public static final int WHISPER_AHEADS_MEDIUM = 8;
|
|
||||||
public static final int WHISPER_AHEADS_LARGE_V1 = 9;
|
|
||||||
public static final int WHISPER_AHEADS_LARGE_V2 = 10;
|
|
||||||
public static final int WHISPER_AHEADS_LARGE_V3 = 11;
|
|
||||||
public static final int WHISPER_AHEADS_LARGE_V3_TURBO = 12;
|
|
||||||
public static final int WHISPER_AHEADS_CUSTOM = 13;
|
|
||||||
public static final int WHISPER_AHEADS_N_TOP_MOST = 14;
|
|
||||||
public static final int WHISPER_AHEADS_COUNT = 15;
|
|
||||||
}
|
|
@ -1,36 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp;
|
|
||||||
|
|
||||||
import com.sun.jna.NativeLong;
|
|
||||||
import com.sun.jna.Structure;
|
|
||||||
import com.sun.jna.ptr.PointerByReference;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
|
||||||
import io.github.ggerganov.whispercpp.WhisperModel;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class WhisperContext extends Structure {
|
|
||||||
public NativeLong t_load_us;
|
|
||||||
public NativeLong t_start_us;
|
|
||||||
|
|
||||||
/** weight type (FP32 / FP16 / QX) */
|
|
||||||
public GgmlType wtype = GgmlType.GGML_TYPE_F16;
|
|
||||||
/** intermediate type (FP32 or FP16) */
|
|
||||||
public GgmlType itype = GgmlType.GGML_TYPE_F16;
|
|
||||||
|
|
||||||
public WhisperContextParams.ByValue params;
|
|
||||||
|
|
||||||
public Pointer model;
|
|
||||||
public Pointer vocab;
|
|
||||||
public Pointer state;
|
|
||||||
|
|
||||||
/** populated by whisper_init_from_file_with_params() */
|
|
||||||
public Pointer path_model;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return List.of("t_load_us", "t_start_us", "wtype", "itype",
|
|
||||||
"params", "model", "vocab", "state", "path_model");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,219 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp;
|
|
||||||
|
|
||||||
import com.sun.jna.Native;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer.
|
|
||||||
*/
|
|
||||||
public class WhisperCpp implements AutoCloseable {
|
|
||||||
private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
|
|
||||||
private Pointer ctx = null;
|
|
||||||
private Pointer paramsPointer = null;
|
|
||||||
private Pointer greedyParamsPointer = null;
|
|
||||||
private Pointer beamParamsPointer = null;
|
|
||||||
|
|
||||||
public File modelDir() {
|
|
||||||
String modelDirPath = System.getenv("XDG_CACHE_HOME");
|
|
||||||
if (modelDirPath == null) {
|
|
||||||
modelDirPath = System.getProperty("user.home") + "/.cache";
|
|
||||||
}
|
|
||||||
|
|
||||||
return new File(modelDirPath, "whisper");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
|
||||||
*/
|
|
||||||
public void initContext(String modelPath) throws FileNotFoundException {
|
|
||||||
initContextImpl(modelPath, getContextDefaultParams());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
|
||||||
* @param params - params to use when initialising the context
|
|
||||||
*/
|
|
||||||
public void initContext(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
|
|
||||||
initContextImpl(modelPath, params);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void initContextImpl(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
|
|
||||||
if (ctx != null) {
|
|
||||||
lib.whisper_free(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!modelPath.contains("/") && !modelPath.contains("\\")) {
|
|
||||||
if (!modelPath.endsWith(".bin")) {
|
|
||||||
modelPath = "ggml-" + modelPath.replace("-", ".") + ".bin";
|
|
||||||
}
|
|
||||||
|
|
||||||
modelPath = new File(modelDir(), modelPath).getAbsolutePath();
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx = lib.whisper_init_from_file_with_params(modelPath, params);
|
|
||||||
|
|
||||||
if (ctx == null) {
|
|
||||||
throw new FileNotFoundException(modelPath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
|
||||||
* Returns a ByValue instance to ensure proper parameter passing to native code.
|
|
||||||
*/
|
|
||||||
public WhisperContextParams.ByValue getContextDefaultParams() {
|
|
||||||
WhisperContextParams.ByValue valueParams = new WhisperContextParams.ByValue(
|
|
||||||
lib.whisper_context_default_params_by_ref());
|
|
||||||
valueParams.read();
|
|
||||||
return valueParams;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides default params which can be used with `whisper_full()` etc.
|
|
||||||
* Because this function allocates memory for the params, the caller must call either:
|
|
||||||
* - call `whisper_free_params()`
|
|
||||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
|
||||||
*
|
|
||||||
* @param strategy - GREEDY
|
|
||||||
*/
|
|
||||||
public WhisperFullParams.ByValue getFullDefaultParams(WhisperSamplingStrategy strategy) {
|
|
||||||
Pointer pointer;
|
|
||||||
|
|
||||||
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
|
||||||
if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
|
|
||||||
if (greedyParamsPointer == null) {
|
|
||||||
greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
|
||||||
}
|
|
||||||
pointer = greedyParamsPointer;
|
|
||||||
} else {
|
|
||||||
if (beamParamsPointer == null) {
|
|
||||||
beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
|
||||||
}
|
|
||||||
pointer = beamParamsPointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
WhisperFullParams.ByValue params = new WhisperFullParams.ByValue(pointer);
|
|
||||||
params.read();
|
|
||||||
return params;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
freeContext();
|
|
||||||
freeParams();
|
|
||||||
System.out.println("Whisper closed");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void freeContext() {
|
|
||||||
if (ctx != null) {
|
|
||||||
lib.whisper_free(ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void freeParams() {
|
|
||||||
if (paramsPointer != null) {
|
|
||||||
Native.free(Pointer.nativeValue(paramsPointer));
|
|
||||||
paramsPointer = null;
|
|
||||||
}
|
|
||||||
if (greedyParamsPointer != null) {
|
|
||||||
Native.free(Pointer.nativeValue(greedyParamsPointer));
|
|
||||||
greedyParamsPointer = null;
|
|
||||||
}
|
|
||||||
if (beamParamsPointer != null) {
|
|
||||||
Native.free(Pointer.nativeValue(beamParamsPointer));
|
|
||||||
beamParamsPointer = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
|
|
||||||
* Not thread safe for same context
|
|
||||||
* Uses the specified decoding strategy to obtain the text.
|
|
||||||
*/
|
|
||||||
public String fullTranscribe(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
|
|
||||||
if (ctx == null) {
|
|
||||||
throw new IllegalStateException("Model not initialised");
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
|
|
||||||
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
|
|
||||||
valueParams.read();
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
|
||||||
throw new IOException("Failed to process audio");
|
|
||||||
}
|
|
||||||
|
|
||||||
int nSegments = lib.whisper_full_n_segments(ctx);
|
|
||||||
|
|
||||||
StringBuilder str = new StringBuilder();
|
|
||||||
|
|
||||||
for (int i = 0; i < nSegments; i++) {
|
|
||||||
String text = lib.whisper_full_get_segment_text(ctx, i);
|
|
||||||
System.out.println("Segment:" + text);
|
|
||||||
str.append(text);
|
|
||||||
}
|
|
||||||
|
|
||||||
return str.toString().trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Full transcribe with time list.
|
|
||||||
*
|
|
||||||
* @param whisperParams the whisper params
|
|
||||||
* @param audioData the audio data
|
|
||||||
* @return the list
|
|
||||||
* @throws IOException the io exception
|
|
||||||
*/
|
|
||||||
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
|
|
||||||
if (ctx == null) {
|
|
||||||
throw new IllegalStateException("Model not initialised");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
|
||||||
throw new IOException("Failed to process audio");
|
|
||||||
}
|
|
||||||
|
|
||||||
int nSegments = lib.whisper_full_n_segments(ctx);
|
|
||||||
List<WhisperSegment> segments= new ArrayList<>(nSegments);
|
|
||||||
|
|
||||||
for (int i = 0; i < nSegments; i++) {
|
|
||||||
long t0 = lib.whisper_full_get_segment_t0(ctx, i);
|
|
||||||
String text = lib.whisper_full_get_segment_text(ctx, i);
|
|
||||||
long t1 = lib.whisper_full_get_segment_t1(ctx, i);
|
|
||||||
segments.add(new WhisperSegment(t0,t1,text));
|
|
||||||
}
|
|
||||||
|
|
||||||
return segments;
|
|
||||||
}
|
|
||||||
|
|
||||||
// public int getTextSegmentCount(Pointer ctx) {
|
|
||||||
// return lib.whisper_full_n_segments(ctx);
|
|
||||||
// }
|
|
||||||
// public String getTextSegment(Pointer ctx, int index) {
|
|
||||||
// return lib.whisper_full_get_segment_text(ctx, index);
|
|
||||||
// }
|
|
||||||
|
|
||||||
public String getSystemInfo() {
|
|
||||||
return lib.whisper_print_system_info();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int benchMemcpy(int nthread) {
|
|
||||||
return lib.whisper_bench_memcpy(nthread);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int benchGgmlMulMat(int nthread) {
|
|
||||||
return lib.whisper_bench_ggml_mul_mat(nthread);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,390 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp;
|
|
||||||
|
|
||||||
import com.sun.jna.Library;
|
|
||||||
import com.sun.jna.Native;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
|
|
||||||
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
|
||||||
|
|
||||||
public interface WhisperCppJnaLibrary extends Library {
|
|
||||||
|
|
||||||
WhisperCppJnaLibrary instance = Native.load("whisper", WhisperCppJnaLibrary.class);
|
|
||||||
|
|
||||||
String whisper_print_system_info();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
|
|
||||||
*
|
|
||||||
* @param path_model Path to the model file
|
|
||||||
* @return Whisper context on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init_from_file(String path_model);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
|
||||||
* Because this function allocates memory for the params, the caller must call either:
|
|
||||||
* - call `whisper_free_context_params()`
|
|
||||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
|
||||||
*/
|
|
||||||
Pointer whisper_context_default_params_by_ref();
|
|
||||||
|
|
||||||
void whisper_free_context_params(Pointer params);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate (almost) all memory needed for the model by loading from a file.
|
|
||||||
*
|
|
||||||
* @param path_model Path to the model file
|
|
||||||
* @param params Pointer to whisper_context_params
|
|
||||||
* @return Whisper context on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams.ByValue params);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
|
||||||
*
|
|
||||||
* @param buffer Model buffer
|
|
||||||
* @param buffer_size Size of the model buffer
|
|
||||||
* @return Whisper context on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init_from_buffer(Pointer buffer, int buffer_size);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate (almost) all memory needed for the model using a model loader.
|
|
||||||
*
|
|
||||||
* @param loader Model loader
|
|
||||||
* @return Whisper context on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init(WhisperModelLoader loader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate (almost) all memory needed for the model by loading from a file without allocating the state.
|
|
||||||
*
|
|
||||||
* @param path_model Path to the model file
|
|
||||||
* @return Whisper context on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init_from_file_no_state(String path_model);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate (almost) all memory needed for the model by loading from a buffer without allocating the state.
|
|
||||||
*
|
|
||||||
* @param buffer Model buffer
|
|
||||||
* @param buffer_size Size of the model buffer
|
|
||||||
* @return Whisper context on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init_from_buffer_no_state(Pointer buffer, int buffer_size);
|
|
||||||
|
|
||||||
// Pointer whisper_init_from_buffer_no_state(Pointer buffer, long buffer_size);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate (almost) all memory needed for the model using a model loader without allocating the state.
|
|
||||||
*
|
|
||||||
* @param loader Model loader
|
|
||||||
* @return Whisper context on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init_no_state(WhisperModelLoader loader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allocate memory for the Whisper state.
|
|
||||||
*
|
|
||||||
* @param ctx Whisper context
|
|
||||||
* @return Whisper state on success, null on failure
|
|
||||||
*/
|
|
||||||
Pointer whisper_init_state(Pointer ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free all allocated memory associated with the Whisper context.
|
|
||||||
*
|
|
||||||
* @param ctx Whisper context
|
|
||||||
*/
|
|
||||||
void whisper_free(Pointer ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Free all allocated memory associated with the Whisper state.
|
|
||||||
*
|
|
||||||
* @param state Whisper state
|
|
||||||
*/
|
|
||||||
void whisper_free_state(Pointer state);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert RAW PCM audio to log mel spectrogram.
|
|
||||||
* The resulting spectrogram is stored inside the default state of the provided whisper context.
|
|
||||||
*
|
|
||||||
* @param ctx - Pointer to a WhisperContext
|
|
||||||
* @return 0 on success
|
|
||||||
*/
|
|
||||||
int whisper_pcm_to_mel(Pointer ctx, final float[] samples, int n_samples, int n_threads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param ctx Pointer to a WhisperContext
|
|
||||||
* @param state Pointer to WhisperState
|
|
||||||
* @param n_samples
|
|
||||||
* @param n_threads
|
|
||||||
* @return 0 on success
|
|
||||||
*/
|
|
||||||
int whisper_pcm_to_mel_with_state(Pointer ctx, Pointer state, final float[] samples, int n_samples, int n_threads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
|
|
||||||
* Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
|
||||||
* n_mel must be 80
|
|
||||||
* @return 0 on success
|
|
||||||
*/
|
|
||||||
int whisper_set_mel(Pointer ctx, final float[] data, int n_len, int n_mel);
|
|
||||||
int whisper_set_mel_with_state(Pointer ctx, Pointer state, final float[] data, int n_len, int n_mel);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context.
|
|
||||||
* Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
|
||||||
* Offset can be used to specify the offset of the first frame in the spectrogram.
|
|
||||||
* @return 0 on success
|
|
||||||
*/
|
|
||||||
int whisper_encode(Pointer ctx, int offset, int n_threads);
|
|
||||||
|
|
||||||
int whisper_encode_with_state(Pointer ctx, Pointer state, int offset, int n_threads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run the Whisper decoder to obtain the logits and probabilities for the next token.
|
|
||||||
* Make sure to call whisper_encode() first.
|
|
||||||
* tokens + n_tokens is the provided context for the decoder.
|
|
||||||
* n_past is the number of tokens to use from previous decoder calls.
|
|
||||||
* Returns 0 on success
|
|
||||||
* TODO: add support for multiple decoders
|
|
||||||
*/
|
|
||||||
int whisper_decode(Pointer ctx, Pointer tokens, int n_tokens, int n_past, int n_threads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param ctx
|
|
||||||
* @param state
|
|
||||||
* @param tokens Pointer to int tokens
|
|
||||||
* @param n_tokens
|
|
||||||
* @param n_past
|
|
||||||
* @param n_threads
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
int whisper_decode_with_state(Pointer ctx, Pointer state, Pointer tokens, int n_tokens, int n_past, int n_threads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert the provided text into tokens.
|
|
||||||
* The tokens pointer must be large enough to hold the resulting tokens.
|
|
||||||
* Returns the number of tokens on success, no more than n_max_tokens
|
|
||||||
* Returns -1 on failure
|
|
||||||
* TODO: not sure if correct
|
|
||||||
*/
|
|
||||||
int whisper_tokenize(Pointer ctx, String text, Pointer tokens, int n_max_tokens);
|
|
||||||
|
|
||||||
/** Largest language id (i.e. number of available languages - 1) */
|
|
||||||
int whisper_lang_max_id();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the id of the specified language, returns -1 if not found.
|
|
||||||
* Examples:
|
|
||||||
* "de" -> 2
|
|
||||||
* "german" -> 2
|
|
||||||
*/
|
|
||||||
int whisper_lang_id(String lang);
|
|
||||||
|
|
||||||
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
|
|
||||||
String whisper_lang_str(int id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Use mel data at offset_ms to try and auto-detect the spoken language.
|
|
||||||
* Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
|
|
||||||
* Returns the top language id or negative on failure
|
|
||||||
* If not null, fills the lang_probs array with the probabilities of all languages
|
|
||||||
* The array must be whisper_lang_max_id() + 1 in size
|
|
||||||
*
|
|
||||||
* ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
|
|
||||||
*/
|
|
||||||
int whisper_lang_auto_detect(Pointer ctx, int offset_ms, int n_threads, float[] lang_probs);
|
|
||||||
|
|
||||||
int whisper_lang_auto_detect_with_state(Pointer ctx, Pointer state, int offset_ms, int n_threads, float[] lang_probs);
|
|
||||||
|
|
||||||
int whisper_n_len (Pointer ctx); // mel length
|
|
||||||
int whisper_n_len_from_state(Pointer state); // mel length
|
|
||||||
int whisper_n_vocab (Pointer ctx);
|
|
||||||
int whisper_n_text_ctx (Pointer ctx);
|
|
||||||
int whisper_n_audio_ctx (Pointer ctx);
|
|
||||||
int whisper_is_multilingual (Pointer ctx);
|
|
||||||
|
|
||||||
int whisper_model_n_vocab (Pointer ctx);
|
|
||||||
int whisper_model_n_audio_ctx (Pointer ctx);
|
|
||||||
int whisper_model_n_audio_state(Pointer ctx);
|
|
||||||
int whisper_model_n_audio_head (Pointer ctx);
|
|
||||||
int whisper_model_n_audio_layer(Pointer ctx);
|
|
||||||
int whisper_model_n_text_ctx (Pointer ctx);
|
|
||||||
int whisper_model_n_text_state (Pointer ctx);
|
|
||||||
int whisper_model_n_text_head (Pointer ctx);
|
|
||||||
int whisper_model_n_text_layer (Pointer ctx);
|
|
||||||
int whisper_model_n_mels (Pointer ctx);
|
|
||||||
int whisper_model_ftype (Pointer ctx);
|
|
||||||
int whisper_model_type (Pointer ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Token logits obtained from the last call to whisper_decode().
|
|
||||||
* The logits for the last token are stored in the last row
|
|
||||||
* Rows: n_tokens
|
|
||||||
* Cols: n_vocab
|
|
||||||
*/
|
|
||||||
float[] whisper_get_logits (Pointer ctx);
|
|
||||||
float[] whisper_get_logits_from_state(Pointer state);
|
|
||||||
|
|
||||||
// Token Id -> String. Uses the vocabulary in the provided context
|
|
||||||
String whisper_token_to_str(Pointer ctx, int token);
|
|
||||||
String whisper_model_type_readable(Pointer ctx);
|
|
||||||
|
|
||||||
// Special tokens
|
|
||||||
int whisper_token_eot (Pointer ctx);
|
|
||||||
int whisper_token_sot (Pointer ctx);
|
|
||||||
int whisper_token_prev(Pointer ctx);
|
|
||||||
int whisper_token_solm(Pointer ctx);
|
|
||||||
int whisper_token_not (Pointer ctx);
|
|
||||||
int whisper_token_beg (Pointer ctx);
|
|
||||||
int whisper_token_lang(Pointer ctx, int lang_id);
|
|
||||||
|
|
||||||
// Task tokens
|
|
||||||
int whisper_token_translate (Pointer ctx);
|
|
||||||
int whisper_token_transcribe(Pointer ctx);
|
|
||||||
|
|
||||||
// Performance information from the default state.
|
|
||||||
void whisper_print_timings(Pointer ctx);
|
|
||||||
void whisper_reset_timings(Pointer ctx);
|
|
||||||
|
|
||||||
// Note: Even if `whisper_full_params is stripped back to just 4 ints, JNA throws "Invalid memory access"
|
|
||||||
// when `whisper_full_default_params()` tries to return a struct.
|
|
||||||
// WhisperFullParams whisper_full_default_params(int strategy);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides default params which can be used with `whisper_full()` etc.
|
|
||||||
* Because this function allocates memory for the params, the caller must call either:
|
|
||||||
* - call `whisper_free_params()`
|
|
||||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
|
||||||
*
|
|
||||||
* @param strategy - WhisperSamplingStrategy.value
|
|
||||||
*/
|
|
||||||
Pointer whisper_full_default_params_by_ref(int strategy);
|
|
||||||
|
|
||||||
void whisper_free_params(Pointer params);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
|
||||||
* Not thread safe for same context
|
|
||||||
* Uses the specified decoding strategy to obtain the text.
|
|
||||||
*/
|
|
||||||
int whisper_full(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples);
|
|
||||||
|
|
||||||
public int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams.ByValue params, float[] samples, int n_samples);
|
|
||||||
//int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
|
|
||||||
|
|
||||||
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
|
||||||
// Result is stored in the default state of the context
|
|
||||||
// Not thread safe if executed in parallel on the same context.
|
|
||||||
// It seems this approach can offer some speedup in some cases.
|
|
||||||
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
|
||||||
int whisper_full_parallel(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples, int n_processors);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of generated text segments.
|
|
||||||
* A segment can be a few words, a sentence, or even a paragraph.
|
|
||||||
* @param ctx Pointer to WhisperContext
|
|
||||||
*/
|
|
||||||
int whisper_full_n_segments (Pointer ctx);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param state Pointer to WhisperState
|
|
||||||
*/
|
|
||||||
int whisper_full_n_segments_from_state(Pointer state);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Language id associated with the context's default state.
|
|
||||||
* @param ctx Pointer to WhisperContext
|
|
||||||
*/
|
|
||||||
int whisper_full_lang_id(Pointer ctx);
|
|
||||||
|
|
||||||
/** Language id associated with the provided state */
|
|
||||||
int whisper_full_lang_id_from_state(Pointer state);
|
|
||||||
|
|
||||||
|
|
||||||
/** Get the start time of the specified segment. */
|
|
||||||
long whisper_full_get_segment_t0(Pointer ctx, int i_segment);
|
|
||||||
|
|
||||||
/** Get the start time of the specified segment from the state. */
|
|
||||||
long whisper_full_get_segment_t0_from_state(Pointer state, int i_segment);
|
|
||||||
|
|
||||||
/** Get the end time of the specified segment. */
|
|
||||||
long whisper_full_get_segment_t1(Pointer ctx, int i_segment);
|
|
||||||
|
|
||||||
/** Get the end time of the specified segment from the state. */
|
|
||||||
long whisper_full_get_segment_t1_from_state(Pointer state, int i_segment);
|
|
||||||
|
|
||||||
/** Get the text of the specified segment. */
|
|
||||||
String whisper_full_get_segment_text(Pointer ctx, int i_segment);
|
|
||||||
|
|
||||||
/** Get the text of the specified segment from the state. */
|
|
||||||
String whisper_full_get_segment_text_from_state(Pointer state, int i_segment);
|
|
||||||
|
|
||||||
/** Get the number of tokens in the specified segment. */
|
|
||||||
int whisper_full_n_tokens(Pointer ctx, int i_segment);
|
|
||||||
|
|
||||||
/** Get the number of tokens in the specified segment from the state. */
|
|
||||||
int whisper_full_n_tokens_from_state(Pointer state, int i_segment);
|
|
||||||
|
|
||||||
/** Get the token text of the specified token in the specified segment. */
|
|
||||||
String whisper_full_get_token_text(Pointer ctx, int i_segment, int i_token);
|
|
||||||
|
|
||||||
|
|
||||||
/** Get the token text of the specified token in the specified segment from the state. */
|
|
||||||
String whisper_full_get_token_text_from_state(Pointer ctx, Pointer state, int i_segment, int i_token);
|
|
||||||
|
|
||||||
/** Get the token ID of the specified token in the specified segment. */
|
|
||||||
int whisper_full_get_token_id(Pointer ctx, int i_segment, int i_token);
|
|
||||||
|
|
||||||
/** Get the token ID of the specified token in the specified segment from the state. */
|
|
||||||
int whisper_full_get_token_id_from_state(Pointer state, int i_segment, int i_token);
|
|
||||||
|
|
||||||
/** Get token data for the specified token in the specified segment. */
|
|
||||||
WhisperTokenData whisper_full_get_token_data(Pointer ctx, int i_segment, int i_token);
|
|
||||||
|
|
||||||
/** Get token data for the specified token in the specified segment from the state. */
|
|
||||||
WhisperTokenData whisper_full_get_token_data_from_state(Pointer state, int i_segment, int i_token);
|
|
||||||
|
|
||||||
/** Get the probability of the specified token in the specified segment. */
|
|
||||||
float whisper_full_get_token_p(Pointer ctx, int i_segment, int i_token);
|
|
||||||
|
|
||||||
/** Get the probability of the specified token in the specified segment from the state. */
|
|
||||||
float whisper_full_get_token_p_from_state(Pointer state, int i_segment, int i_token);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Benchmark function for memcpy.
|
|
||||||
*
|
|
||||||
* @param nThreads Number of threads to use for the benchmark.
|
|
||||||
* @return The result of the benchmark.
|
|
||||||
*/
|
|
||||||
int whisper_bench_memcpy(int nThreads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Benchmark function for memcpy as a string.
|
|
||||||
*
|
|
||||||
* @param nThreads Number of threads to use for the benchmark.
|
|
||||||
* @return The result of the benchmark as a string.
|
|
||||||
*/
|
|
||||||
String whisper_bench_memcpy_str(int nThreads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Benchmark function for ggml_mul_mat.
|
|
||||||
*
|
|
||||||
* @param nThreads Number of threads to use for the benchmark.
|
|
||||||
* @return The result of the benchmark.
|
|
||||||
*/
|
|
||||||
int whisper_bench_ggml_mul_mat(int nThreads);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Benchmark function for ggml_mul_mat as a string.
|
|
||||||
*
|
|
||||||
* @param nThreads Number of threads to use for the benchmark.
|
|
||||||
* @return The result of the benchmark as a string.
|
|
||||||
*/
|
|
||||||
String whisper_bench_ggml_mul_mat_str(int nThreads);
|
|
||||||
}
|
|
@ -1,47 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.bean;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by litonglinux@qq.com on 10/21/2023_7:48 AM
|
|
||||||
*/
|
|
||||||
public class WhisperSegment {
|
|
||||||
private long start, end;
|
|
||||||
private String sentence;
|
|
||||||
|
|
||||||
public WhisperSegment() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public WhisperSegment(long start, long end, String sentence) {
|
|
||||||
this.start = start;
|
|
||||||
this.end = end;
|
|
||||||
this.sentence = sentence;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getStart() {
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getEnd() {
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSentence() {
|
|
||||||
return sentence;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setStart(long start) {
|
|
||||||
this.start = start;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEnd(long end) {
|
|
||||||
this.end = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSentence(String sentence) {
|
|
||||||
this.sentence = sentence;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "[" + start + " --> " + end + "]:" + sentence;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,17 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.callbacks;
|
|
||||||
|
|
||||||
import com.sun.jna.Callback;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback for aborting GGML computation
|
|
||||||
* Maps to the C typedef: bool (*ggml_abort_callback)(void * data)
|
|
||||||
*/
|
|
||||||
public interface GgmlAbortCallback extends Callback {
|
|
||||||
/**
|
|
||||||
* Return true to abort the computation, false to continue
|
|
||||||
*
|
|
||||||
* @param data User data passed to the callback
|
|
||||||
* @return true to abort, false to continue
|
|
||||||
*/
|
|
||||||
boolean invoke(com.sun.jna.Pointer data);
|
|
||||||
}
|
|
@ -1,24 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.callbacks;
|
|
||||||
|
|
||||||
import com.sun.jna.Callback;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import io.github.ggerganov.whispercpp.WhisperContext;
|
|
||||||
import io.github.ggerganov.whispercpp.model.WhisperState;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback before the encoder starts.
|
|
||||||
* If not null, called before the encoder starts.
|
|
||||||
* If it returns false, the computation is aborted.
|
|
||||||
*/
|
|
||||||
public interface WhisperEncoderBeginCallback extends Callback {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback method before the encoder starts.
|
|
||||||
*
|
|
||||||
* @param ctx The whisper context.
|
|
||||||
* @param state The whisper state.
|
|
||||||
* @param user_data User data.
|
|
||||||
* @return True if the computation should proceed, false otherwise.
|
|
||||||
*/
|
|
||||||
boolean callback(Pointer ctx, Pointer state, Pointer user_data);
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.callbacks;
|
|
||||||
|
|
||||||
import com.sun.jna.Callback;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback to filter logits.
|
|
||||||
* Can be used to modify the logits before sampling.
|
|
||||||
* If not null, called after applying temperature to logits.
|
|
||||||
*/
|
|
||||||
public interface WhisperLogitsFilterCallback extends Callback {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback method to filter logits.
|
|
||||||
*
|
|
||||||
* @param ctx The whisper context.
|
|
||||||
* @param state The whisper state.
|
|
||||||
* @param tokens The array of whisper_token_data.
|
|
||||||
* @param n_tokens The number of tokens.
|
|
||||||
* @param logits The array of logits.
|
|
||||||
* @param user_data User data.
|
|
||||||
*/
|
|
||||||
void callback(Pointer ctx, Pointer state, WhisperTokenData[] tokens, int n_tokens, float[] logits, Pointer user_data);
|
|
||||||
}
|
|
@ -1,24 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.callbacks;
|
|
||||||
|
|
||||||
import com.sun.jna.Callback;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import io.github.ggerganov.whispercpp.WhisperContext;
|
|
||||||
import io.github.ggerganov.whispercpp.model.WhisperState;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback for the text segment.
|
|
||||||
* Called on every newly generated text segment.
|
|
||||||
* Use the whisper_full_...() functions to obtain the text segments.
|
|
||||||
*/
|
|
||||||
public interface WhisperNewSegmentCallback extends Callback {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback method for the text segment.
|
|
||||||
*
|
|
||||||
* @param ctx The whisper context.
|
|
||||||
* @param state The whisper state.
|
|
||||||
* @param n_new The number of newly generated text segments.
|
|
||||||
* @param user_data User data.
|
|
||||||
*/
|
|
||||||
void callback(Pointer ctx, Pointer state, int n_new, Pointer user_data);
|
|
||||||
}
|
|
@ -1,22 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.callbacks;
|
|
||||||
|
|
||||||
import com.sun.jna.Callback;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import io.github.ggerganov.whispercpp.WhisperContext;
|
|
||||||
import io.github.ggerganov.whispercpp.model.WhisperState;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback for progress updates.
|
|
||||||
*/
|
|
||||||
public interface WhisperProgressCallback extends Callback {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback method for progress updates.
|
|
||||||
*
|
|
||||||
* @param ctx The whisper context.
|
|
||||||
* @param state The whisper state.
|
|
||||||
* @param progress The progress value.
|
|
||||||
* @param user_data User data.
|
|
||||||
*/
|
|
||||||
void callback(Pointer ctx, Pointer state, int progress, Pointer user_data);
|
|
||||||
}
|
|
@ -1,4 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.ggml;
|
|
||||||
|
|
||||||
public class GgmlTensor {
|
|
||||||
}
|
|
@ -1,18 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.ggml;
|
|
||||||
|
|
||||||
public enum GgmlType {
|
|
||||||
GGML_TYPE_F32,
|
|
||||||
GGML_TYPE_F16,
|
|
||||||
GGML_TYPE_Q4_0,
|
|
||||||
GGML_TYPE_Q4_1,
|
|
||||||
REMOVED_GGML_TYPE_Q4_2, // support has been removed
|
|
||||||
REMOVED_GGML_TYPE_Q4_3, // support has been removed
|
|
||||||
GGML_TYPE_Q5_0,
|
|
||||||
GGML_TYPE_Q5_1,
|
|
||||||
GGML_TYPE_Q8_0,
|
|
||||||
GGML_TYPE_Q8_1,
|
|
||||||
GGML_TYPE_I8,
|
|
||||||
GGML_TYPE_I16,
|
|
||||||
GGML_TYPE_I32,
|
|
||||||
GGML_TYPE_COUNT,
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.model;
|
|
||||||
|
|
||||||
public enum EModel {
|
|
||||||
MODEL_UNKNOWN,
|
|
||||||
MODEL_TINY,
|
|
||||||
MODEL_BASE,
|
|
||||||
MODEL_SMALL,
|
|
||||||
MODEL_MEDIUM,
|
|
||||||
MODEL_LARGE,
|
|
||||||
}
|
|
@ -1,49 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp;
|
|
||||||
|
|
||||||
import io.github.ggerganov.whispercpp.ggml.GgmlTensor;
|
|
||||||
import io.github.ggerganov.whispercpp.model.EModel;
|
|
||||||
|
|
||||||
public class WhisperModel {
|
|
||||||
// EModel type = EModel.MODEL_UNKNOWN;
|
|
||||||
//
|
|
||||||
// WhisperHParams hparams;
|
|
||||||
// WhisperFilters filters;
|
|
||||||
//
|
|
||||||
// // encoder.positional_embedding
|
|
||||||
// GgmlTensor e_pe;
|
|
||||||
//
|
|
||||||
// // encoder.conv1
|
|
||||||
// GgmlTensor e_conv_1_w;
|
|
||||||
// GgmlTensor e_conv_1_b;
|
|
||||||
//
|
|
||||||
// // encoder.conv2
|
|
||||||
// GgmlTensor e_conv_2_w;
|
|
||||||
// GgmlTensor e_conv_2_b;
|
|
||||||
//
|
|
||||||
// // encoder.ln_post
|
|
||||||
// GgmlTensor e_ln_w;
|
|
||||||
// GgmlTensor e_ln_b;
|
|
||||||
//
|
|
||||||
// // decoder.positional_embedding
|
|
||||||
// GgmlTensor d_pe;
|
|
||||||
//
|
|
||||||
// // decoder.token_embedding
|
|
||||||
// GgmlTensor d_te;
|
|
||||||
//
|
|
||||||
// // decoder.ln
|
|
||||||
// GgmlTensor d_ln_w;
|
|
||||||
// GgmlTensor d_ln_b;
|
|
||||||
//
|
|
||||||
// std::vector<whisper_layer_encoder> layers_encoder;
|
|
||||||
// std::vector<whisper_layer_decoder> layers_decoder;
|
|
||||||
//
|
|
||||||
// // context
|
|
||||||
// struct ggml_context * ctx;
|
|
||||||
//
|
|
||||||
// // the model memory buffer is read-only and can be shared between processors
|
|
||||||
// std::vector<uint8_t> * buf;
|
|
||||||
//
|
|
||||||
// // tensors
|
|
||||||
// int n_loaded;
|
|
||||||
// Map<String, GgmlTensor> tensors;
|
|
||||||
}
|
|
@ -1,62 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.model;
|
|
||||||
|
|
||||||
import com.sun.jna.Callback;
|
|
||||||
import com.sun.jna.Pointer;
|
|
||||||
import com.sun.jna.Structure;
|
|
||||||
|
|
||||||
|
|
||||||
public class WhisperModelLoader extends Structure {
|
|
||||||
public Pointer context;
|
|
||||||
public ReadFunction read;
|
|
||||||
public EOFFunction eof;
|
|
||||||
public CloseFunction close;
|
|
||||||
|
|
||||||
public static class ReadFunction implements Callback {
|
|
||||||
public Pointer invoke(Pointer ctx, Pointer output, int readSize) {
|
|
||||||
// TODO
|
|
||||||
return ctx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class EOFFunction implements Callback {
|
|
||||||
public boolean invoke(Pointer ctx) {
|
|
||||||
// TODO
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class CloseFunction implements Callback {
|
|
||||||
public void invoke(Pointer ctx) {
|
|
||||||
// TODO
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// public WhisperModelLoader(Pointer p) {
|
|
||||||
// super(p);
|
|
||||||
// read = new ReadFunction();
|
|
||||||
// eof = new EOFFunction();
|
|
||||||
// close = new CloseFunction();
|
|
||||||
// read.setCallback(this);
|
|
||||||
// eof.setCallback(this);
|
|
||||||
// close.setCallback(this);
|
|
||||||
// read.write();
|
|
||||||
// eof.write();
|
|
||||||
// close.write();
|
|
||||||
// }
|
|
||||||
|
|
||||||
public WhisperModelLoader() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
public interface ReadCallback extends Callback {
|
|
||||||
Pointer invoke(Pointer ctx, Pointer output, int readSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
public interface EOFCallback extends Callback {
|
|
||||||
boolean invoke(Pointer ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
public interface CloseCallback extends Callback {
|
|
||||||
void invoke(Pointer ctx);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,4 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.model;
|
|
||||||
|
|
||||||
public class WhisperState {
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.model;
|
|
||||||
|
|
||||||
import com.sun.jna.Structure;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Structure representing token data.
|
|
||||||
*/
|
|
||||||
public class WhisperTokenData extends Structure {
|
|
||||||
|
|
||||||
/** Token ID. */
|
|
||||||
public int id;
|
|
||||||
|
|
||||||
/** Forced timestamp token ID. */
|
|
||||||
public int tid;
|
|
||||||
|
|
||||||
/** Probability of the token. */
|
|
||||||
public float p;
|
|
||||||
|
|
||||||
/** Log probability of the token. */
|
|
||||||
public float plog;
|
|
||||||
|
|
||||||
/** Probability of the timestamp token. */
|
|
||||||
public float pt;
|
|
||||||
|
|
||||||
/** Sum of probabilities of all timestamp tokens. */
|
|
||||||
public float ptsum;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start time of the token (token-level timestamp data).
|
|
||||||
* Do not use if you haven't computed token-level timestamps.
|
|
||||||
*/
|
|
||||||
public long t0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* End time of the token (token-level timestamp data).
|
|
||||||
* Do not use if you haven't computed token-level timestamps.
|
|
||||||
*/
|
|
||||||
public long t1;
|
|
||||||
|
|
||||||
/** Voice length of the token. */
|
|
||||||
public float vlen;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return Arrays.asList("id", "tid", "p", "plog", "pt", "ptsum", "t0", "t1", "vlen");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,19 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
|
|
||||||
import com.sun.jna.Structure;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class BeamSearchParams extends Structure {
|
|
||||||
/** ref: <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L265">...</a> */
|
|
||||||
public int beam_size;
|
|
||||||
|
|
||||||
/** ref: <a href="https://arxiv.org/pdf/2204.05424.pdf">...</a> */
|
|
||||||
public float patience;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return Arrays.asList("beam_size", "patience");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,30 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
|
|
||||||
import com.sun.jna.IntegerType;
|
|
||||||
|
|
||||||
import java.util.function.BooleanSupplier;
|
|
||||||
|
|
||||||
public class CBool extends IntegerType implements BooleanSupplier {
|
|
||||||
public static final int SIZE = 1;
|
|
||||||
public static final CBool FALSE = new CBool(0);
|
|
||||||
public static final CBool TRUE = new CBool(1);
|
|
||||||
|
|
||||||
|
|
||||||
public CBool() {
|
|
||||||
this(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public CBool(long value) {
|
|
||||||
super(SIZE, value, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean getAsBoolean() {
|
|
||||||
return intValue() == 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return intValue() == 1 ? "true" : "false";
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,16 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
|
|
||||||
import com.sun.jna.Structure;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class GreedyParams extends Structure {
|
|
||||||
/** <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264">...</a> */
|
|
||||||
public int best_of;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return Collections.singletonList("best_of");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,30 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
import com.sun.jna.*;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class WhisperAhead extends Structure {
|
|
||||||
|
|
||||||
public int n_text_layer;
|
|
||||||
|
|
||||||
public int n_head;
|
|
||||||
|
|
||||||
public WhisperAhead() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
public WhisperAhead(int textLayer, int head) {
|
|
||||||
super();
|
|
||||||
this.n_text_layer = textLayer;
|
|
||||||
this.n_head = head;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return Arrays.asList("n_text_layer", "n_head");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class ByReference extends WhisperAhead implements Structure.ByReference {}
|
|
||||||
|
|
||||||
public static class ByValue extends WhisperAhead implements Structure.ByValue {}
|
|
||||||
}
|
|
@ -1,41 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
import com.sun.jna.*;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class WhisperAheads extends Structure {
|
|
||||||
public NativeLong n_heads;
|
|
||||||
|
|
||||||
public Pointer heads;
|
|
||||||
|
|
||||||
public WhisperAheads() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create alignment heads from an array of WhisperAhead objects
|
|
||||||
*/
|
|
||||||
public void setHeads(WhisperAhead[] aheadsArray) {
|
|
||||||
this.n_heads = new NativeLong(aheadsArray.length);
|
|
||||||
|
|
||||||
int structSize = aheadsArray[0].size();
|
|
||||||
Memory mem = new Memory(structSize * aheadsArray.length);
|
|
||||||
|
|
||||||
for (int i = 0; i < aheadsArray.length; i++) {
|
|
||||||
aheadsArray[i].write();
|
|
||||||
byte[] buffer = aheadsArray[i].getPointer().getByteArray(0, structSize);
|
|
||||||
mem.write(i * structSize, buffer, 0, buffer.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.heads = mem;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return Arrays.asList("n_heads", "heads");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class ByReference extends WhisperAheads implements Structure.ByReference {}
|
|
||||||
|
|
||||||
public static class ByValue extends WhisperAheads implements Structure.ByValue {}
|
|
||||||
}
|
|
@ -1,81 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
import com.sun.jna.*;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parameters for the whisper_init_from_file_with_params() function.
|
|
||||||
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
|
||||||
* whisper_context_default_params()
|
|
||||||
*/
|
|
||||||
public class WhisperContextParams extends Structure {
|
|
||||||
public WhisperContextParams(Pointer p) {
|
|
||||||
super(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
public WhisperContextParams() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Use GPU for inference (default = true) */
|
|
||||||
public CBool use_gpu;
|
|
||||||
|
|
||||||
/** Use flash attention (default = false) */
|
|
||||||
public CBool flash_attn;
|
|
||||||
|
|
||||||
/** CUDA device to use (default = 0) */
|
|
||||||
public int gpu_device;
|
|
||||||
|
|
||||||
/** [EXPERIMENTAL] Enable token-level timestamps with DTW (default = false) */
|
|
||||||
public CBool dtw_token_timestamps;
|
|
||||||
|
|
||||||
/** [EXPERIMENTAL] Alignment heads preset for DTW */
|
|
||||||
public int dtw_aheads_preset;
|
|
||||||
|
|
||||||
/** Number of top layers to use for DTW when using WHISPER_AHEADS_N_TOP_MOST preset */
|
|
||||||
public int dtw_n_top;
|
|
||||||
|
|
||||||
public WhisperAheads.ByValue dtw_aheads;
|
|
||||||
|
|
||||||
/** DTW memory size (internal use) */
|
|
||||||
public NativeLong dtw_mem_size;
|
|
||||||
|
|
||||||
/** Use GPU for inference */
|
|
||||||
public void useGpu(boolean enable) {
|
|
||||||
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Use flash attention */
|
|
||||||
public void useFlashAttn(boolean enable) {
|
|
||||||
flash_attn = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Enable DTW token-level timestamps */
|
|
||||||
public void enableDtwTokenTimestamps(boolean enable) {
|
|
||||||
dtw_token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Set DTW alignment heads preset */
|
|
||||||
public void setDtwAheadsPreset(int preset) {
|
|
||||||
dtw_aheads_preset = preset;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return Arrays.asList(
|
|
||||||
"use_gpu",
|
|
||||||
"flash_attn",
|
|
||||||
"gpu_device",
|
|
||||||
"dtw_token_timestamps",
|
|
||||||
"dtw_aheads_preset",
|
|
||||||
"dtw_n_top",
|
|
||||||
"dtw_aheads",
|
|
||||||
"dtw_mem_size"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class ByValue extends WhisperContextParams implements Structure.ByValue {
|
|
||||||
public ByValue() { super(); }
|
|
||||||
public ByValue(Pointer p) { super(p); }
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class WhisperFilters {
|
|
||||||
int n_mel;
|
|
||||||
int n_fft;
|
|
||||||
|
|
||||||
List<Float> data;
|
|
||||||
}
|
|
@ -1,358 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
|
|
||||||
import com.sun.jna.*;
|
|
||||||
import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
|
|
||||||
import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
|
|
||||||
import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
|
|
||||||
import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
|
|
||||||
import io.github.ggerganov.whispercpp.callbacks.GgmlAbortCallback;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parameters for the whisper_full() function.
|
|
||||||
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
|
||||||
* whisper_full_default_params()
|
|
||||||
*/
|
|
||||||
public class WhisperFullParams extends Structure {
|
|
||||||
|
|
||||||
public WhisperFullParams() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
public WhisperFullParams(Pointer p) {
|
|
||||||
super(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Sampling strategy for whisper_full() function. */
|
|
||||||
public int strategy;
|
|
||||||
|
|
||||||
/** Number of threads. (default = 4) */
|
|
||||||
public int n_threads;
|
|
||||||
|
|
||||||
/** Maximum tokens to use from past text as a prompt for the decoder. (default = 16384) */
|
|
||||||
public int n_max_text_ctx;
|
|
||||||
|
|
||||||
/** Start offset in milliseconds. (default = 0) */
|
|
||||||
public int offset_ms;
|
|
||||||
|
|
||||||
/** Audio duration to process in milliseconds. (default = 0) */
|
|
||||||
public int duration_ms;
|
|
||||||
|
|
||||||
/** Translate flag. (default = false) */
|
|
||||||
public CBool translate;
|
|
||||||
|
|
||||||
/** The compliment of translateMode() */
|
|
||||||
public void transcribeMode() {
|
|
||||||
translate = CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** The compliment of transcribeMode() */
|
|
||||||
public void translateMode() {
|
|
||||||
translate = CBool.TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
|
|
||||||
public CBool no_context;
|
|
||||||
|
|
||||||
/** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
|
|
||||||
public void enableContext(boolean enable) {
|
|
||||||
no_context = enable ? CBool.FALSE : CBool.TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Generate timestamps or not? */
|
|
||||||
public CBool no_timestamps;
|
|
||||||
|
|
||||||
/** Flag to force single segment output (useful for streaming). (default = false) */
|
|
||||||
public CBool single_segment;
|
|
||||||
|
|
||||||
/** Flag to force single segment output (useful for streaming). (default = false) */
|
|
||||||
public void singleSegment(boolean single) {
|
|
||||||
single_segment = single ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
|
||||||
public CBool print_special;
|
|
||||||
|
|
||||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
|
||||||
public void printSpecial(boolean enable) {
|
|
||||||
print_special = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Flag to print progress information. (default = true) */
|
|
||||||
public CBool print_progress;
|
|
||||||
|
|
||||||
/** Flag to print progress information. (default = true) */
|
|
||||||
public void printProgress(boolean enable) {
|
|
||||||
print_progress = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
|
|
||||||
public CBool print_realtime;
|
|
||||||
|
|
||||||
/** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
|
|
||||||
public void printRealtime(boolean enable) {
|
|
||||||
print_realtime = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Flag to print timestamps for each text segment when printing realtime. (default = true) */
|
|
||||||
public CBool print_timestamps;
|
|
||||||
|
|
||||||
/** Flag to print timestamps for each text segment when printing realtime. (default = true) */
|
|
||||||
public void printTimestamps(boolean enable) {
|
|
||||||
print_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
|
|
||||||
public CBool token_timestamps;
|
|
||||||
|
|
||||||
/** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
|
|
||||||
public void tokenTimestamps(boolean enable) {
|
|
||||||
token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** [EXPERIMENTAL] Timestamp token probability threshold (~0.01). (default = 0.01) */
|
|
||||||
public float thold_pt;
|
|
||||||
|
|
||||||
/** [EXPERIMENTAL] Timestamp token sum probability threshold (~0.01). */
|
|
||||||
public float thold_ptsum;
|
|
||||||
|
|
||||||
/** Maximum segment length in characters. (default = 0) */
|
|
||||||
public int max_len;
|
|
||||||
|
|
||||||
/** Flag to split on word rather than on token (when used with max_len). (default = false) */
|
|
||||||
public CBool split_on_word;
|
|
||||||
|
|
||||||
/** Flag to split on word rather than on token (when used with max_len). (default = false) */
|
|
||||||
public void splitOnWord(boolean enable) {
|
|
||||||
split_on_word = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Maximum tokens per segment (0, default = no limit) */
|
|
||||||
public int max_tokens;
|
|
||||||
|
|
||||||
/** [EXPERIMENTAL] Enable debug mode for extra info */
|
|
||||||
public CBool debug_mode;
|
|
||||||
|
|
||||||
/** Enable debug mode */
|
|
||||||
public void enableDebugMode(boolean enable) {
|
|
||||||
debug_mode = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Overwrite the audio context size (0 = use default). */
|
|
||||||
public int audio_ctx;
|
|
||||||
|
|
||||||
/** Enable tinydiarize (default = false) */
|
|
||||||
public CBool tdrz_enable;
|
|
||||||
|
|
||||||
/** Enable tinydiarize (default = false) */
|
|
||||||
public void tdrzEnable(boolean enable) {
|
|
||||||
tdrz_enable = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Regular expression matching tokens to suppress. */
|
|
||||||
public String suppress_regex;
|
|
||||||
|
|
||||||
/** Tokens to provide to the whisper decoder as an initial prompt.
|
|
||||||
* These are prepended to any existing text context from a previous call. */
|
|
||||||
public String initial_prompt;
|
|
||||||
|
|
||||||
/** Prompt tokens. (int*) */
|
|
||||||
public Pointer prompt_tokens;
|
|
||||||
|
|
||||||
public void setPromptTokens(int[] tokens) {
|
|
||||||
Memory mem = new Memory(tokens.length * 4L);
|
|
||||||
mem.write(0, tokens, 0, tokens.length);
|
|
||||||
prompt_tokens = mem;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Number of prompt tokens. */
|
|
||||||
public int prompt_n_tokens;
|
|
||||||
|
|
||||||
/** Language for auto-detection.
|
|
||||||
* For auto-detection, set to `null`, `""`, or "auto". */
|
|
||||||
public String language;
|
|
||||||
|
|
||||||
/** Flag to indicate whether to detect language automatically. */
|
|
||||||
public CBool detect_language;
|
|
||||||
|
|
||||||
/** Flag to indicate whether to detect language automatically. */
|
|
||||||
public void detectLanguage(boolean enable) {
|
|
||||||
detect_language = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Common decoding parameters.
|
|
||||||
|
|
||||||
/** Flag to suppress blank tokens. */
|
|
||||||
public CBool suppress_blank;
|
|
||||||
|
|
||||||
public void suppressBlanks(boolean enable) {
|
|
||||||
suppress_blank = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Flag to suppress non-speech tokens. */
|
|
||||||
public CBool suppress_nst;
|
|
||||||
|
|
||||||
/** Flag to suppress non-speech tokens. */
|
|
||||||
public void suppressNonSpeechTokens(boolean enable) {
|
|
||||||
suppress_nst = enable ? CBool.TRUE : CBool.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Initial decoding temperature. */
|
|
||||||
public float temperature;
|
|
||||||
|
|
||||||
/** Maximum initial timestamp. */
|
|
||||||
public float max_initial_ts;
|
|
||||||
|
|
||||||
/** Length penalty. */
|
|
||||||
public float length_penalty;
|
|
||||||
|
|
||||||
// Fallback parameters.
|
|
||||||
|
|
||||||
/** Temperature increment. */
|
|
||||||
public float temperature_inc;
|
|
||||||
|
|
||||||
/** Entropy threshold (similar to OpenAI's "compression_ratio_threshold"). */
|
|
||||||
public float entropy_thold;
|
|
||||||
|
|
||||||
/** Log probability threshold. */
|
|
||||||
public float logprob_thold;
|
|
||||||
|
|
||||||
/** No speech threshold. */
|
|
||||||
public float no_speech_thold;
|
|
||||||
|
|
||||||
/** Greedy decoding parameters. */
|
|
||||||
public GreedyParams greedy;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Beam search decoding parameters.
|
|
||||||
*/
|
|
||||||
public BeamSearchParams beam_search;
|
|
||||||
|
|
||||||
public void setBestOf(int bestOf) {
|
|
||||||
if (greedy == null) {
|
|
||||||
greedy = new GreedyParams();
|
|
||||||
}
|
|
||||||
greedy.best_of = bestOf;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBeamSize(int beamSize) {
|
|
||||||
if (beam_search == null) {
|
|
||||||
beam_search = new BeamSearchParams();
|
|
||||||
}
|
|
||||||
beam_search.beam_size = beamSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBeamSizeAndPatience(int beamSize, float patience) {
|
|
||||||
if (beam_search == null) {
|
|
||||||
beam_search = new BeamSearchParams();
|
|
||||||
}
|
|
||||||
beam_search.beam_size = beamSize;
|
|
||||||
beam_search.patience = patience;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback for every newly generated text segment.
|
|
||||||
* WhisperNewSegmentCallback
|
|
||||||
*/
|
|
||||||
public Pointer new_segment_callback;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* User data for the new_segment_callback.
|
|
||||||
*/
|
|
||||||
public Pointer new_segment_callback_user_data;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback on each progress update.
|
|
||||||
* WhisperProgressCallback
|
|
||||||
*/
|
|
||||||
public Pointer progress_callback;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* User data for the progress_callback.
|
|
||||||
*/
|
|
||||||
public Pointer progress_callback_user_data;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback each time before the encoder starts.
|
|
||||||
* WhisperEncoderBeginCallback
|
|
||||||
*/
|
|
||||||
public Pointer encoder_begin_callback;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* User data for the encoder_begin_callback.
|
|
||||||
*/
|
|
||||||
public Pointer encoder_begin_callback_user_data;
|
|
||||||
|
|
||||||
/** Callback used to abort GGML computation */
|
|
||||||
public Pointer abort_callback;
|
|
||||||
|
|
||||||
/** User data for the abort_callback */
|
|
||||||
public Pointer abort_callback_user_data;
|
|
||||||
|
|
||||||
public void setAbortCallback(GgmlAbortCallback callback) {
|
|
||||||
abort_callback = CallbackReference.getFunctionPointer(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Callback by each decoder to filter obtained logits.
|
|
||||||
* WhisperLogitsFilterCallback
|
|
||||||
*/
|
|
||||||
public Pointer logits_filter_callback;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* User data for the logits_filter_callback.
|
|
||||||
*/
|
|
||||||
public Pointer logits_filter_callback_user_data;
|
|
||||||
|
|
||||||
|
|
||||||
public void setNewSegmentCallback(WhisperNewSegmentCallback callback) {
|
|
||||||
new_segment_callback = CallbackReference.getFunctionPointer(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setProgressCallback(WhisperProgressCallback callback) {
|
|
||||||
progress_callback = CallbackReference.getFunctionPointer(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEncoderBeginCallbackeginCallbackCallback(WhisperEncoderBeginCallback callback) {
|
|
||||||
encoder_begin_callback = CallbackReference.getFunctionPointer(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLogitsFilterCallback(WhisperLogitsFilterCallback callback) {
|
|
||||||
logits_filter_callback = CallbackReference.getFunctionPointer(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Grammar stuff */
|
|
||||||
public Pointer grammar_rules;
|
|
||||||
public long n_grammar_rules;
|
|
||||||
public long i_start_rule;
|
|
||||||
public float grammar_penalty;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected List<String> getFieldOrder() {
|
|
||||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx",
|
|
||||||
"offset_ms", "duration_ms", "translate", "no_context",
|
|
||||||
"no_timestamps", "single_segment", "print_special",
|
|
||||||
"print_progress", "print_realtime", "print_timestamps",
|
|
||||||
"token_timestamps", "thold_pt", "thold_ptsum", "max_len",
|
|
||||||
"split_on_word", "max_tokens", "debug_mode", "audio_ctx",
|
|
||||||
"tdrz_enable", "suppress_regex", "initial_prompt",
|
|
||||||
"prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
|
||||||
"suppress_blank", "suppress_nst", "temperature",
|
|
||||||
"max_initial_ts", "length_penalty", "temperature_inc",
|
|
||||||
"entropy_thold", "logprob_thold", "no_speech_thold", "greedy",
|
|
||||||
"beam_search", "new_segment_callback", "new_segment_callback_user_data",
|
|
||||||
"progress_callback", "progress_callback_user_data",
|
|
||||||
"encoder_begin_callback", "encoder_begin_callback_user_data",
|
|
||||||
"abort_callback", "abort_callback_user_data",
|
|
||||||
"logits_filter_callback", "logits_filter_callback_user_data",
|
|
||||||
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class ByValue extends WhisperFullParams implements Structure.ByValue {
|
|
||||||
public ByValue() { super(); }
|
|
||||||
public ByValue(Pointer p) { super(p); }
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
|
|
||||||
public class WhisperHParams {
|
|
||||||
int n_vocab = 51864;
|
|
||||||
int n_audio_ctx = 1500;
|
|
||||||
int n_audio_state = 384;
|
|
||||||
int n_audio_head = 6;
|
|
||||||
int n_audio_layer = 4;
|
|
||||||
int n_text_ctx = 448;
|
|
||||||
int n_text_state = 384;
|
|
||||||
int n_text_head = 6;
|
|
||||||
int n_text_layer = 4;
|
|
||||||
int n_mels = 80;
|
|
||||||
int ftype = 1;
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp.params;
|
|
||||||
|
|
||||||
/** Available sampling strategies */
|
|
||||||
public enum WhisperSamplingStrategy {
|
|
||||||
/** similar to OpenAI's GreedyDecoder */
|
|
||||||
WHISPER_SAMPLING_GREEDY,
|
|
||||||
|
|
||||||
/** similar to OpenAI's BeamSearchDecoder */
|
|
||||||
WHISPER_SAMPLING_BEAM_SEARCH
|
|
||||||
}
|
|
@ -1,144 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
|
||||||
import io.github.ggerganov.whispercpp.params.CBool;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
|
||||||
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import javax.sound.sampled.AudioInputStream;
|
|
||||||
import javax.sound.sampled.AudioSystem;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
class WhisperCppTest {
|
|
||||||
private static WhisperCpp whisper = new WhisperCpp();
|
|
||||||
private static boolean modelInitialised = false;
|
|
||||||
|
|
||||||
@BeforeAll
|
|
||||||
static void init() throws FileNotFoundException {
|
|
||||||
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
|
||||||
// or you can provide the absolute path to the model file.
|
|
||||||
//String modelName = "../../models/ggml-tiny.bin";
|
|
||||||
String modelName = "../../models/ggml-tiny.en.bin";
|
|
||||||
try {
|
|
||||||
whisper.initContext(modelName);
|
|
||||||
//whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
|
||||||
//whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
|
||||||
modelInitialised = true;
|
|
||||||
} catch (FileNotFoundException ex) {
|
|
||||||
System.out.println("Model " + modelName + " not found");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testGetDefaultFullParams_BeamSearch() {
|
|
||||||
// When
|
|
||||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
|
||||||
|
|
||||||
// Then
|
|
||||||
assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal(), params.strategy);
|
|
||||||
assertNotEquals(0, params.n_threads);
|
|
||||||
assertEquals(16384, params.n_max_text_ctx);
|
|
||||||
assertFalse(params.translate);
|
|
||||||
assertEquals(0.01f, params.thold_pt);
|
|
||||||
assertEquals(5, params.beam_search.beam_size);
|
|
||||||
assertEquals(-1.0f, params.beam_search.patience);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testGetDefaultFullParams_Greedy() {
|
|
||||||
// When
|
|
||||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
|
||||||
|
|
||||||
// Then
|
|
||||||
assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY.ordinal(), params.strategy);
|
|
||||||
assertNotEquals(0, params.n_threads);
|
|
||||||
assertEquals(16384, params.n_max_text_ctx);
|
|
||||||
assertEquals(5, params.greedy.best_of);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testFullTranscribe() throws Exception {
|
|
||||||
if (!modelInitialised) {
|
|
||||||
System.out.println("Model not initialised, skipping test");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given
|
|
||||||
File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
|
|
||||||
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
|
|
||||||
|
|
||||||
byte[] b = new byte[audioInputStream.available()];
|
|
||||||
float[] floats = new float[b.length / 2];
|
|
||||||
|
|
||||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
|
||||||
WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
|
||||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
|
||||||
params.print_progress = CBool.FALSE;
|
|
||||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
|
||||||
|
|
||||||
|
|
||||||
try {
|
|
||||||
audioInputStream.read(b);
|
|
||||||
|
|
||||||
for (int i = 0, j = 0; i < b.length; i += 2, j++) {
|
|
||||||
int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
|
|
||||||
floats[j] = intSample / 32767.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// When
|
|
||||||
String result = whisper.fullTranscribe(params, floats);
|
|
||||||
|
|
||||||
// Then
|
|
||||||
System.err.println(result);
|
|
||||||
assertEquals("And so my fellow Americans ask not what your country can do for you " +
|
|
||||||
"ask what you can do for your country.",
|
|
||||||
result.replace(",", ""));
|
|
||||||
} finally {
|
|
||||||
audioInputStream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testFullTranscribeWithTime() throws Exception {
|
|
||||||
if (!modelInitialised) {
|
|
||||||
System.out.println("Model not initialised, skipping test");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given
|
|
||||||
File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
|
|
||||||
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
|
|
||||||
|
|
||||||
byte[] b = new byte[audioInputStream.available()];
|
|
||||||
float[] floats = new float[b.length / 2];
|
|
||||||
|
|
||||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
|
||||||
WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
|
||||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
|
||||||
params.print_progress = CBool.FALSE;
|
|
||||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
|
||||||
|
|
||||||
try {
|
|
||||||
audioInputStream.read(b);
|
|
||||||
|
|
||||||
for (int i = 0, j = 0; i < b.length; i += 2, j++) {
|
|
||||||
int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
|
|
||||||
floats[j] = intSample / 32767.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<WhisperSegment> segments = whisper.fullTranscribeWithTime(params, floats);
|
|
||||||
assertTrue(segments.size() > 0, "The size of segments should be greater than 0");
|
|
||||||
for (WhisperSegment segment : segments) {
|
|
||||||
System.out.println(segment);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
audioInputStream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,17 +0,0 @@
|
|||||||
package io.github.ggerganov.whispercpp;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
class WhisperJnaLibraryTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testWhisperPrint_system_info() {
|
|
||||||
String systemInfo = WhisperCppJnaLibrary.instance.whisper_print_system_info();
|
|
||||||
// eg: "AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0
|
|
||||||
// | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | "
|
|
||||||
System.out.println("System info: " + systemInfo);
|
|
||||||
assertTrue(systemInfo.length() > 10);
|
|
||||||
}
|
|
||||||
}
|
|
@ -33,9 +33,6 @@ mkdir build-em && cd build-em
|
|||||||
emcmake cmake .. && make -j
|
emcmake cmake .. && make -j
|
||||||
|
|
||||||
# run test
|
# run test
|
||||||
node ../tests/test-whisper.js
|
|
||||||
|
|
||||||
# For Node.js versions prior to v16.4.0, experimental features need to be enabled:
|
|
||||||
node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
||||||
|
|
||||||
# publish npm package
|
# publish npm package
|
||||||
@ -44,7 +41,7 @@ make publish-npm
|
|||||||
|
|
||||||
## Sample run
|
## Sample run
|
||||||
|
|
||||||
```text
|
```java
|
||||||
$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
||||||
|
|
||||||
whisper_model_load: loading model from 'whisper.bin'
|
whisper_model_load: loading model from 'whisper.bin'
|
||||||
@ -66,7 +63,7 @@ whisper_model_load: ggml ctx size = 140.60 MB
|
|||||||
whisper_model_load: memory size = 22.83 MB
|
whisper_model_load: memory size = 22.83 MB
|
||||||
whisper_model_load: model size = 140.54 MB
|
whisper_model_load: model size = 140.54 MB
|
||||||
|
|
||||||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
||||||
|
|
||||||
operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...
|
operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ struct whisper_context * g_context;
|
|||||||
EMSCRIPTEN_BINDINGS(whisper) {
|
EMSCRIPTEN_BINDINGS(whisper) {
|
||||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
if (g_context == nullptr) {
|
if (g_context == nullptr) {
|
||||||
g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
g_context = whisper_init(path_model.c_str());
|
||||||
if (g_context != nullptr) {
|
if (g_context != nullptr) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1 +1 @@
|
|||||||
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:f=>(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f),postMessage:msg=>parentPort.postMessage(msg),performance:global.performance||{now:Date.now}})}var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err(`worker.js received unknown command ${e.data.cmd}`);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}}self.onmessage=handleMessage;
|
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance||{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "whisper.cpp",
|
"name": "whisper.cpp",
|
||||||
"version": "1.7.5",
|
"version": "1.0.4",
|
||||||
"description": "Whisper speech recognition",
|
"description": "Whisper speech recognition",
|
||||||
"main": "whisper.js",
|
"main": "whisper.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
File diff suppressed because one or more lines are too long
9
bindings/ruby/.gitignore
vendored
9
bindings/ruby/.gitignore
vendored
@ -1,9 +0,0 @@
|
|||||||
LICENSE
|
|
||||||
pkg/
|
|
||||||
lib/whisper.*
|
|
||||||
ext/examples/
|
|
||||||
ext/ggml/
|
|
||||||
ext/include/
|
|
||||||
ext/scripts/
|
|
||||||
ext/src/
|
|
||||||
test/fixtures/
|
|
@ -1,349 +0,0 @@
|
|||||||
whispercpp
|
|
||||||
==========
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
Ruby bindings for [whisper.cpp][], an interface of automatic speech recognition model.
|
|
||||||
|
|
||||||
Installation
|
|
||||||
------------
|
|
||||||
|
|
||||||
Install the gem and add to the application's Gemfile by executing:
|
|
||||||
|
|
||||||
$ bundle add whispercpp
|
|
||||||
|
|
||||||
If bundler is not being used to manage dependencies, install the gem by executing:
|
|
||||||
|
|
||||||
$ gem install whispercpp
|
|
||||||
|
|
||||||
You can pass build options for whisper.cpp, for instance:
|
|
||||||
|
|
||||||
$ bundle config build.whispercpp --enable-ggml-cuda
|
|
||||||
|
|
||||||
or,
|
|
||||||
|
|
||||||
$ gem install whispercpp -- --enable-ggml-cuda
|
|
||||||
|
|
||||||
See whisper.cpp's [README](https://github.com/ggml-org/whisper.cpp/blob/master/README.md) for available options. You need convert options present the README to Ruby-style options, for example:
|
|
||||||
|
|
||||||
Boolean options:
|
|
||||||
|
|
||||||
* `-DGGML_BLAS=1` -> `--enable-ggml-blas`
|
|
||||||
* `-DWHISER_COREML=OFF` -> `--disable-whisper-coreml`
|
|
||||||
|
|
||||||
Argument options:
|
|
||||||
|
|
||||||
* `-DGGML_CUDA_COMPRESSION_MODE=size` -> `--ggml-cuda-compression-mode=size`
|
|
||||||
|
|
||||||
Combination:
|
|
||||||
|
|
||||||
* `-DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86"` -> `--enable-ggml-cuda --cmake_cuda-architectures="86"`
|
|
||||||
|
|
||||||
For boolean options like `GGML_CUDA`, the README says `-DGGML_CUDA=1`. You need strip `-D`, prepend `--enable-` for `1` or `ON` (`--disable-` for `0` or `OFF`) and make it kebab-case: `--enable-ggml-cuda`.
|
|
||||||
For options which require arguments like `CMAKE_CUDA_ARCHITECTURES`, the README says `-DCMAKE_CUDA_ARCHITECTURES="86"`. You need strip `-D`, prepend `--`, make it kebab-case, append `=` and append argument: `--cmake-cuda-architectures="86"`.
|
|
||||||
|
|
||||||
Usage
|
|
||||||
-----
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
require "whisper"
|
|
||||||
|
|
||||||
whisper = Whisper::Context.new("base")
|
|
||||||
|
|
||||||
params = Whisper::Params.new(
|
|
||||||
language: "en",
|
|
||||||
offset: 10_000,
|
|
||||||
duration: 60_000,
|
|
||||||
max_text_tokens: 300,
|
|
||||||
translate: true,
|
|
||||||
print_timestamps: false,
|
|
||||||
initial_prompt: "Initial prompt here."
|
|
||||||
)
|
|
||||||
|
|
||||||
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
|
|
||||||
puts whole_text
|
|
||||||
end
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### Preparing model ###
|
|
||||||
|
|
||||||
Some models are prepared up-front:
|
|
||||||
|
|
||||||
You also can use shorthand for pre-converted models:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
whisper = Whisper::Context.new("base.en")
|
|
||||||
```
|
|
||||||
|
|
||||||
You can see the list of prepared model names by `Whisper::Model.pre_converted_models.keys`:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
puts Whisper::Model.pre_converted_models.keys
|
|
||||||
# tiny
|
|
||||||
# tiny.en
|
|
||||||
# tiny-q5_1
|
|
||||||
# tiny.en-q5_1
|
|
||||||
# tiny-q8_0
|
|
||||||
# base
|
|
||||||
# base.en
|
|
||||||
# base-q5_1
|
|
||||||
# base.en-q5_1
|
|
||||||
# base-q8_0
|
|
||||||
# :
|
|
||||||
# :
|
|
||||||
```
|
|
||||||
|
|
||||||
You can also retrieve each model:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
base_en = Whisper::Model.pre_converted_models["base.en"]
|
|
||||||
whisper = Whisper::Context.new(base_en)
|
|
||||||
```
|
|
||||||
|
|
||||||
At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
Whisper::Model.pre_converted_models["base"].clear_cache
|
|
||||||
```
|
|
||||||
|
|
||||||
You can also use local model files you prepared:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
whisper = Whisper::Context.new("path/to/your/model.bin")
|
|
||||||
```
|
|
||||||
|
|
||||||
Or, you can download model files:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
whisper = Whisper::Context.new("https://example.net/uri/of/your/model.bin")
|
|
||||||
# Or
|
|
||||||
whisper = Whisper::Context.new(URI("https://example.net/uri/of/your/model.bin"))
|
|
||||||
```
|
|
||||||
|
|
||||||
See [models][] page for details.
|
|
||||||
|
|
||||||
### Preparing audio file ###
|
|
||||||
|
|
||||||
Currently, whisper.cpp accepts only 16-bit WAV files.
|
|
||||||
|
|
||||||
### Voice Activity Detection (VAD) ###
|
|
||||||
|
|
||||||
Support for Voice Activity Detection (VAD) can be enabled by setting `Whisper::Params`'s `vad` argument to `true` and specifying VAD model:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
Whisper::Params.new(
|
|
||||||
vad: true,
|
|
||||||
vad_model_path: "silero-v5.1.2",
|
|
||||||
# other arguments...
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
When you pass the model name (`"silero-v5.1.2"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin`), it will be downloaded automatically.
|
|
||||||
Currently, "silero-v5.1.2" is registered as pre-converted model like ASR models. You also specify file path or URI of model.
|
|
||||||
|
|
||||||
If you need configure VAD behavior, pass params for that:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
Whisper::Params.new(
|
|
||||||
vad: true,
|
|
||||||
vad_model_path: "silero-v5.1.2",
|
|
||||||
vad_params: Whisper::VAD::Params.new(
|
|
||||||
threshold: 1.0, # defaults to 0.5
|
|
||||||
min_speech_duration_ms: 500, # defaults to 250
|
|
||||||
min_silence_duration_ms: 200, # defaults to 100
|
|
||||||
max_speech_duration_s: 30000, # default is FLT_MAX,
|
|
||||||
speech_pad_ms: 50, # defaults to 30
|
|
||||||
samples_overlap: 0.5 # defaults to 0.1
|
|
||||||
),
|
|
||||||
# other arguments...
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
For details on VAD, see [whisper.cpp's README](https://github.com/ggml-org/whisper.cpp?tab=readme-ov-file#voice-activity-detection-vad).
|
|
||||||
|
|
||||||
### Output ###
|
|
||||||
|
|
||||||
whispercpp supports SRT and WebVTT output:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
puts whisper.transcribe("path/to/audio.wav", Whisper::Params.new).to_webvtt
|
|
||||||
# =>
|
|
||||||
WEBVTT
|
|
||||||
|
|
||||||
1
|
|
||||||
00:00:00.000 --> 00:00:03.860
|
|
||||||
My thought I have nobody by a beauty and will as you poured.
|
|
||||||
|
|
||||||
2
|
|
||||||
00:00:03.860 --> 00:00:09.840
|
|
||||||
Mr. Rochester is sub in that so-don't find simplest, and devoted about, to let might in
|
|
||||||
|
|
||||||
3
|
|
||||||
00:00:09.840 --> 00:00:09.940
|
|
||||||
a
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
You may call `#to_srt`, too
|
|
||||||
|
|
||||||
|
|
||||||
API
|
|
||||||
---
|
|
||||||
|
|
||||||
### Transcription ###
|
|
||||||
|
|
||||||
By default, `Whisper::Context#transcribe` works in a single thread. You can make it work in parallel by passing `n_processors` option:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
whisper.transcribe("path/to/audio.wav", params, n_processors: Etc.nprocessors)
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that transcription occasionally might be low accuracy when it works in parallel.
|
|
||||||
|
|
||||||
### Segments ###
|
|
||||||
|
|
||||||
Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
def format_time(time_ms)
|
|
||||||
sec, decimal_part = time_ms.divmod(1000)
|
|
||||||
min, sec = sec.divmod(60)
|
|
||||||
hour, min = min.divmod(60)
|
|
||||||
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
|
||||||
end
|
|
||||||
|
|
||||||
whisper
|
|
||||||
.transcribe("path/to/audio.wav", params)
|
|
||||||
.each_segment.with_index do |segment, index|
|
|
||||||
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
|
|
||||||
nth: index + 1,
|
|
||||||
st: format_time(segment.start_time),
|
|
||||||
ed: format_time(segment.end_time),
|
|
||||||
text: segment.text
|
|
||||||
}
|
|
||||||
line << " (speaker turned)" if segment.speaker_turn_next?
|
|
||||||
puts line
|
|
||||||
end
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
You can also add hook to params called on new segment:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
# Add hook before calling #transcribe
|
|
||||||
params.on_new_segment do |segment|
|
|
||||||
line = "[%{st} --> %{ed}] %{text}" % {
|
|
||||||
st: format_time(segment.start_time),
|
|
||||||
ed: format_time(segment.end_time),
|
|
||||||
text: segment.text
|
|
||||||
}
|
|
||||||
line << " (speaker turned)" if segment.speaker_turn_next?
|
|
||||||
puts line
|
|
||||||
end
|
|
||||||
|
|
||||||
whisper.transcribe("path/to/audio.wav", params)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### Models ###
|
|
||||||
|
|
||||||
You can see model information:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
whisper = Whisper::Context.new("base")
|
|
||||||
model = whisper.model
|
|
||||||
|
|
||||||
model.n_vocab # => 51864
|
|
||||||
model.n_audio_ctx # => 1500
|
|
||||||
model.n_audio_state # => 512
|
|
||||||
model.n_audio_head # => 8
|
|
||||||
model.n_audio_layer # => 6
|
|
||||||
model.n_text_ctx # => 448
|
|
||||||
model.n_text_state # => 512
|
|
||||||
model.n_text_head # => 8
|
|
||||||
model.n_text_layer # => 6
|
|
||||||
model.n_mels # => 80
|
|
||||||
model.ftype # => 1
|
|
||||||
model.type # => "base"
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### Logging ###
|
|
||||||
|
|
||||||
You can set log callback:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
prefix = "[MyApp] "
|
|
||||||
log_callback = ->(level, buffer, user_data) {
|
|
||||||
case level
|
|
||||||
when Whisper::LOG_LEVEL_NONE
|
|
||||||
puts "#{user_data}none: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_INFO
|
|
||||||
puts "#{user_data}info: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_WARN
|
|
||||||
puts "#{user_data}warn: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_ERROR
|
|
||||||
puts "#{user_data}error: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_DEBUG
|
|
||||||
puts "#{user_data}debug: #{buffer}"
|
|
||||||
when Whisper::LOG_LEVEL_CONT
|
|
||||||
puts "#{user_data}same to previous: #{buffer}"
|
|
||||||
end
|
|
||||||
}
|
|
||||||
Whisper.log_set log_callback, prefix
|
|
||||||
```
|
|
||||||
|
|
||||||
Using this feature, you are also able to suppress log:
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
Whisper.log_set ->(level, buffer, user_data) {
|
|
||||||
# do nothing
|
|
||||||
}, nil
|
|
||||||
Whisper::Context.new("base")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Low-level API to transcribe ###
|
|
||||||
|
|
||||||
You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility.
|
|
||||||
|
|
||||||
```ruby
|
|
||||||
require "whisper"
|
|
||||||
require "wavefile"
|
|
||||||
|
|
||||||
reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
|
|
||||||
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
|
|
||||||
|
|
||||||
whisper = Whisper::Context.new("base")
|
|
||||||
whisper
|
|
||||||
.full(Whisper::Params.new, samples)
|
|
||||||
.each_segment do |segment|
|
|
||||||
puts segment.text
|
|
||||||
end
|
|
||||||
```
|
|
||||||
|
|
||||||
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
|
|
||||||
|
|
||||||
Development
|
|
||||||
-----------
|
|
||||||
|
|
||||||
% git clone https://github.com/ggml-org/whisper.cpp.git
|
|
||||||
% cd whisper.cpp/bindings/ruby
|
|
||||||
% rake test
|
|
||||||
|
|
||||||
First call of `rake test` builds an extension and downloads a model for testing. After that, you add tests in `tests` directory and modify `ext/ruby_whisper.cpp`.
|
|
||||||
|
|
||||||
If something seems wrong on build, running `rake clean` solves some cases.
|
|
||||||
|
|
||||||
### Need help ###
|
|
||||||
|
|
||||||
* Windows support
|
|
||||||
* Refinement of C/C++ code, especially memory management
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
The same to [whisper.cpp][].
|
|
||||||
|
|
||||||
[whisper.cpp]: https://github.com/ggml-org/whisper.cpp
|
|
||||||
[models]: https://github.com/ggml-org/whisper.cpp/tree/master/models
|
|
@ -1,96 +0,0 @@
|
|||||||
require 'rake/clean'
|
|
||||||
require "bundler/gem_tasks"
|
|
||||||
require "rake/testtask"
|
|
||||||
require_relative "extsources"
|
|
||||||
|
|
||||||
SOURCES_DIR = "ext/sources"
|
|
||||||
|
|
||||||
SOURCES = FileList[]
|
|
||||||
|
|
||||||
EXTSOURCES.each do |src|
|
|
||||||
basename = src.pathmap("%f")
|
|
||||||
dest = basename == "LICENSE" ? basename
|
|
||||||
: src.pathmap("%{\\.\\./\\.\\.,#{SOURCES_DIR}}p")
|
|
||||||
.pathmap("%{\\.\\./javascript,#{SOURCES_DIR}/bindings/javascript}p")
|
|
||||||
dir = dest.pathmap("%d")
|
|
||||||
file src
|
|
||||||
directory dir
|
|
||||||
file dest => [src, dir] do |t|
|
|
||||||
cp t.source, t.name
|
|
||||||
end
|
|
||||||
SOURCES.include dest
|
|
||||||
end
|
|
||||||
|
|
||||||
CLEAN.include SOURCES
|
|
||||||
|
|
||||||
SRC = FileList["ext/*.{c,cpp,h}"]
|
|
||||||
|
|
||||||
task build: SOURCES
|
|
||||||
|
|
||||||
directory "pkg"
|
|
||||||
CLOBBER.include "pkg"
|
|
||||||
|
|
||||||
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
|
||||||
SO_FILE = File.join("ext", LIB_NAME)
|
|
||||||
LIB_FILE = File.join("lib", LIB_NAME)
|
|
||||||
|
|
||||||
file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t|
|
|
||||||
chdir "ext" do
|
|
||||||
ruby "extconf.rb"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if File.exist? "ext/Makefile"
|
|
||||||
task :make_clean do
|
|
||||||
cd "ext" do
|
|
||||||
sh "make", "clean"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
task clean: :make_clean
|
|
||||||
task :make_distclean do
|
|
||||||
cd "ext" do
|
|
||||||
sh "make", "distclean"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
task clobber: :make_distclean
|
|
||||||
end
|
|
||||||
|
|
||||||
file SO_FILE => "ext/Makefile" do |t|
|
|
||||||
chdir "ext" do
|
|
||||||
sh "make"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
CLEAN.include SO_FILE
|
|
||||||
|
|
||||||
directory "lib"
|
|
||||||
file LIB_FILE => [SO_FILE, "lib"] do |t|
|
|
||||||
copy t.source, t.name
|
|
||||||
end
|
|
||||||
CLEAN.include LIB_FILE
|
|
||||||
|
|
||||||
Rake::TestTask.new
|
|
||||||
|
|
||||||
TEST_FIXTURE_AUDIO = "test/fixtures/jfk.wav"
|
|
||||||
TEST_FIXTURE_AUDIO_SRC = File.expand_path(File.join(__dir__, "..", "..", "samples", "jfk.wav"))
|
|
||||||
TEST_FIXTURE_AUDIO_DIR = TEST_FIXTURE_AUDIO.pathmap("%d")
|
|
||||||
directory TEST_FIXTURE_AUDIO_DIR
|
|
||||||
if File.exist? TEST_FIXTURE_AUDIO_SRC
|
|
||||||
file TEST_FIXTURE_AUDIO => [TEST_FIXTURE_AUDIO_SRC, TEST_FIXTURE_AUDIO_DIR] do |t|
|
|
||||||
symlink t.source, t.name
|
|
||||||
end
|
|
||||||
else
|
|
||||||
require "open-uri"
|
|
||||||
file TEST_FIXTURE_AUDIO => TEST_FIXTURE_AUDIO_DIR do |t|
|
|
||||||
File.write t.name, URI("https://github.com/ggml-org/whisper.cpp/raw/refs/heads/master/samples/jfk.wav").read
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
TEST_MEMORY_VIEW = "test/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
|
|
||||||
file TEST_MEMORY_VIEW => "test/jfk_reader/jfk_reader.c" do |t|
|
|
||||||
chdir "test/jfk_reader" do
|
|
||||||
ruby "extconf.rb"
|
|
||||||
sh "make"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
CLEAN.include TEST_MEMORY_VIEW
|
|
||||||
|
|
||||||
task test: [LIB_FILE, TEST_MEMORY_VIEW, TEST_FIXTURE_AUDIO]
|
|
9
bindings/ruby/ext/.gitignore
vendored
9
bindings/ruby/ext/.gitignore
vendored
@ -1,9 +0,0 @@
|
|||||||
Makefile
|
|
||||||
whisper.so
|
|
||||||
whisper.bundle
|
|
||||||
whisper.dll
|
|
||||||
*.o
|
|
||||||
*.a
|
|
||||||
sources/*
|
|
||||||
!sources/CMakeGraphVizOptions.cmake
|
|
||||||
mkmf.log
|
|
@ -1,73 +0,0 @@
|
|||||||
require "tsort"
|
|
||||||
|
|
||||||
class Dependencies
|
|
||||||
include TSort
|
|
||||||
|
|
||||||
def initialize(cmake, options)
|
|
||||||
@cmake = cmake
|
|
||||||
@options = options
|
|
||||||
@static_lib_shape = nil
|
|
||||||
@nodes = {}
|
|
||||||
@graph = Hash.new {|h, k| h[k] = []}
|
|
||||||
|
|
||||||
generate_dot
|
|
||||||
parse_dot
|
|
||||||
end
|
|
||||||
|
|
||||||
def libs
|
|
||||||
tsort.filter_map {|node|
|
|
||||||
label, shape = @nodes[node]
|
|
||||||
if shape == @static_lib_shape
|
|
||||||
label.gsub(/\\n\([^)]+\)/, '')
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
}.reverse.collect {|lib| "lib#{lib}.a"}
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
libs.join(" ")
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def dot_path
|
|
||||||
File.join(__dir__, "build", "whisper.cpp.dot")
|
|
||||||
end
|
|
||||||
|
|
||||||
def generate_dot
|
|
||||||
args = ["-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF"]
|
|
||||||
args << @options.to_s unless @options.to_s.empty?
|
|
||||||
system @cmake, *args, exception: true
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_dot
|
|
||||||
File.open(dot_path).each_line do |line|
|
|
||||||
case line
|
|
||||||
when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/
|
|
||||||
@static_lib_shape = $~[:shape]
|
|
||||||
when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/
|
|
||||||
node = $~[:node]
|
|
||||||
label = $~[:label]
|
|
||||||
shape = $~[:shape]
|
|
||||||
@nodes[node] = [label, shape]
|
|
||||||
when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/
|
|
||||||
depender = $~[:depender]
|
|
||||||
dependee = $~[:dependee]
|
|
||||||
@graph[depender] << dependee
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def tsort_each_node
|
|
||||||
@nodes.each_key do |node|
|
|
||||||
yield node
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def tsort_each_child(node)
|
|
||||||
@graph[node].each do |child|
|
|
||||||
yield child
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,22 +0,0 @@
|
|||||||
require "mkmf"
|
|
||||||
require_relative "options"
|
|
||||||
require_relative "dependencies"
|
|
||||||
|
|
||||||
cmake = find_executable("cmake") || abort
|
|
||||||
options = Options.new(cmake)
|
|
||||||
have_library("gomp") rescue nil
|
|
||||||
libs = Dependencies.new(cmake, options)
|
|
||||||
|
|
||||||
$INCFLAGS << " -Isources/include -Isources/ggml/include -Isources/examples"
|
|
||||||
$LOCAL_LIBS << " #{libs}"
|
|
||||||
$cleanfiles << " build #{libs}"
|
|
||||||
|
|
||||||
create_makefile "whisper" do |conf|
|
|
||||||
conf << <<~EOF
|
|
||||||
$(TARGET_SO): #{libs}
|
|
||||||
#{libs}: cmake-targets
|
|
||||||
cmake-targets:
|
|
||||||
#{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options}
|
|
||||||
#{"\t"}#{cmake} --build build --config Release --target common whisper
|
|
||||||
EOF
|
|
||||||
end
|
|
@ -1,82 +0,0 @@
|
|||||||
class Options
|
|
||||||
def initialize(cmake="cmake")
|
|
||||||
@cmake = cmake
|
|
||||||
@options = {}
|
|
||||||
|
|
||||||
configure
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
@options
|
|
||||||
.reject {|name, (type, value)| value.nil?}
|
|
||||||
.collect {|name, (type, value)| "-D #{name}=#{value == true ? "ON" : value == false ? "OFF" : value.shellescape}"}
|
|
||||||
.join(" ")
|
|
||||||
end
|
|
||||||
|
|
||||||
def cmake_options
|
|
||||||
return @cmake_options if @cmake_options
|
|
||||||
|
|
||||||
output = nil
|
|
||||||
Dir.chdir __dir__ do
|
|
||||||
output = `#{@cmake.shellescape} -S sources -B build -L`
|
|
||||||
end
|
|
||||||
@cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1)
|
|
||||||
.filter_map {|line|
|
|
||||||
option, value = line.chomp.split("=", 2)
|
|
||||||
name, type = option.split(":", 2)
|
|
||||||
[
|
|
||||||
name,
|
|
||||||
[
|
|
||||||
type,
|
|
||||||
type == "BOOL" ? value == "ON" : value
|
|
||||||
]
|
|
||||||
]
|
|
||||||
}.to_h
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def configure
|
|
||||||
cmake_options.each_pair do |name, (type, default_value)|
|
|
||||||
option = option_name(name)
|
|
||||||
value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
|
|
||||||
@options[name] = [type, value]
|
|
||||||
end
|
|
||||||
|
|
||||||
configure_accelerate
|
|
||||||
configure_metal
|
|
||||||
configure_coreml
|
|
||||||
end
|
|
||||||
|
|
||||||
# See ggml/src/ggml-cpu/CMakeLists.txt
|
|
||||||
def configure_accelerate
|
|
||||||
if RUBY_PLATFORM.match?(/darwin/) && enabled?("GGML_ACCELERATE")
|
|
||||||
$LDFLAGS << " -framework Accelerate"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# See ggml/src/ggml-metal/CMakeLists.txt
|
|
||||||
def configure_metal
|
|
||||||
$LDFLAGS << " -framework Foundation -framework Metal -framework MetalKit" if enabled?("GGML_METAL")
|
|
||||||
end
|
|
||||||
|
|
||||||
# See src/CmakeLists.txt
|
|
||||||
def configure_coreml
|
|
||||||
if enabled?("WHISPER_COREML")
|
|
||||||
$LDFLAGS << " -framework Foundation -framework CoreML"
|
|
||||||
$CPPFLAGS << " -DRUBY_WHISPER_USE_COREML"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def option_name(name)
|
|
||||||
name.downcase.gsub("_", "-")
|
|
||||||
end
|
|
||||||
|
|
||||||
def enabled?(option)
|
|
||||||
if @options[option][1].nil?
|
|
||||||
cmake_options[option][1]
|
|
||||||
else
|
|
||||||
@options[option][1]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,176 +0,0 @@
|
|||||||
#include <ruby.h>
|
|
||||||
#include <ruby/memory_view.h>
|
|
||||||
#include "ruby_whisper.h"
|
|
||||||
|
|
||||||
VALUE mWhisper;
|
|
||||||
VALUE mVAD;
|
|
||||||
VALUE cContext;
|
|
||||||
VALUE cParams;
|
|
||||||
VALUE cVADParams;
|
|
||||||
VALUE eError;
|
|
||||||
|
|
||||||
VALUE cSegment;
|
|
||||||
VALUE cModel;
|
|
||||||
|
|
||||||
ID id_to_s;
|
|
||||||
ID id_call;
|
|
||||||
ID id___method__;
|
|
||||||
ID id_to_enum;
|
|
||||||
ID id_length;
|
|
||||||
ID id_next;
|
|
||||||
ID id_new;
|
|
||||||
ID id_to_path;
|
|
||||||
ID id_URI;
|
|
||||||
ID id_pre_converted_models;
|
|
||||||
ID id_coreml_compiled_models;
|
|
||||||
ID id_cache;
|
|
||||||
ID id_n_processors;
|
|
||||||
|
|
||||||
static bool is_log_callback_finalized = false;
|
|
||||||
|
|
||||||
// High level API
|
|
||||||
extern VALUE ruby_whisper_segment_allocate(VALUE klass);
|
|
||||||
|
|
||||||
extern void init_ruby_whisper_context(VALUE *mWhisper);
|
|
||||||
extern void init_ruby_whisper_params(VALUE *mWhisper);
|
|
||||||
extern void init_ruby_whisper_error(VALUE *mWhisper);
|
|
||||||
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
|
|
||||||
extern void init_ruby_whisper_model(VALUE *mWhisper);
|
|
||||||
extern void init_ruby_whisper_vad_params(VALUE *mVAD);
|
|
||||||
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* lang_max_id -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
|
|
||||||
return INT2NUM(whisper_lang_max_id());
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* lang_id(lang_name) -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
|
|
||||||
const char * lang_str = StringValueCStr(lang);
|
|
||||||
const int id = whisper_lang_id(lang_str);
|
|
||||||
if (-1 == id) {
|
|
||||||
rb_raise(rb_eArgError, "language not found: %s", lang_str);
|
|
||||||
}
|
|
||||||
return INT2NUM(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* lang_str(lang_id) -> String
|
|
||||||
*/
|
|
||||||
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
|
|
||||||
const int lang_id = NUM2INT(id);
|
|
||||||
const char * str = whisper_lang_str(lang_id);
|
|
||||||
if (NULL == str) {
|
|
||||||
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
|
||||||
}
|
|
||||||
return rb_str_new2(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* lang_str(lang_id) -> String
|
|
||||||
*/
|
|
||||||
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
|
||||||
const int lang_id = NUM2INT(id);
|
|
||||||
const char * str_full = whisper_lang_str_full(lang_id);
|
|
||||||
if (NULL == str_full) {
|
|
||||||
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
|
||||||
}
|
|
||||||
return rb_str_new2(str_full);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* system_info_str -> String
|
|
||||||
*/
|
|
||||||
static VALUE ruby_whisper_s_system_info_str(VALUE self) {
|
|
||||||
return rb_str_new2(whisper_print_system_info());
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
|
||||||
is_log_callback_finalized = true;
|
|
||||||
return Qnil;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) {
|
|
||||||
if (is_log_callback_finalized) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
|
|
||||||
VALUE udata = rb_iv_get(mWhisper, "user_data");
|
|
||||||
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
|
|
||||||
*/
|
|
||||||
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
|
|
||||||
VALUE old_callback = rb_iv_get(self, "log_callback");
|
|
||||||
if (!NIL_P(old_callback)) {
|
|
||||||
rb_undefine_finalizer(old_callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
rb_iv_set(self, "log_callback", log_callback);
|
|
||||||
rb_iv_set(self, "user_data", user_data);
|
|
||||||
|
|
||||||
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
|
|
||||||
rb_define_finalizer(log_callback, finalize_log_callback);
|
|
||||||
|
|
||||||
whisper_log_set(ruby_whisper_log_callback, NULL);
|
|
||||||
|
|
||||||
return Qnil;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Init_whisper() {
|
|
||||||
id_to_s = rb_intern("to_s");
|
|
||||||
id_call = rb_intern("call");
|
|
||||||
id___method__ = rb_intern("__method__");
|
|
||||||
id_to_enum = rb_intern("to_enum");
|
|
||||||
id_length = rb_intern("length");
|
|
||||||
id_next = rb_intern("next");
|
|
||||||
id_new = rb_intern("new");
|
|
||||||
id_to_path = rb_intern("to_path");
|
|
||||||
id_URI = rb_intern("URI");
|
|
||||||
id_pre_converted_models = rb_intern("pre_converted_models");
|
|
||||||
id_coreml_compiled_models = rb_intern("coreml_compiled_models");
|
|
||||||
id_cache = rb_intern("cache");
|
|
||||||
id_n_processors = rb_intern("n_processors");
|
|
||||||
|
|
||||||
mWhisper = rb_define_module("Whisper");
|
|
||||||
mVAD = rb_define_module_under(mWhisper, "VAD");
|
|
||||||
|
|
||||||
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
|
|
||||||
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
|
|
||||||
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
|
|
||||||
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
|
|
||||||
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
|
|
||||||
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
|
|
||||||
|
|
||||||
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
|
||||||
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
|
||||||
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
|
||||||
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
|
||||||
rb_define_singleton_method(mWhisper, "system_info_str", ruby_whisper_s_system_info_str, 0);
|
|
||||||
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
|
||||||
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
|
||||||
|
|
||||||
init_ruby_whisper_context(&mWhisper);
|
|
||||||
init_ruby_whisper_params(&mWhisper);
|
|
||||||
init_ruby_whisper_error(&mWhisper);
|
|
||||||
init_ruby_whisper_segment(&mWhisper, &cContext);
|
|
||||||
init_ruby_whisper_model(&mWhisper);
|
|
||||||
init_ruby_whisper_vad_params(&mVAD);
|
|
||||||
|
|
||||||
rb_require("whisper/context");
|
|
||||||
rb_require("whisper/segment");
|
|
||||||
rb_require("whisper/model/uri");
|
|
||||||
}
|
|
@ -1,40 +0,0 @@
|
|||||||
#ifndef RUBY_WHISPER_H
|
|
||||||
#define RUBY_WHISPER_H
|
|
||||||
|
|
||||||
#include "whisper.h"
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VALUE *context;
|
|
||||||
VALUE user_data;
|
|
||||||
VALUE callback;
|
|
||||||
VALUE callbacks;
|
|
||||||
} ruby_whisper_callback_container;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
struct whisper_context *context;
|
|
||||||
} ruby_whisper;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
struct whisper_full_params params;
|
|
||||||
bool diarize;
|
|
||||||
ruby_whisper_callback_container *new_segment_callback_container;
|
|
||||||
ruby_whisper_callback_container *progress_callback_container;
|
|
||||||
ruby_whisper_callback_container *encoder_begin_callback_container;
|
|
||||||
ruby_whisper_callback_container *abort_callback_container;
|
|
||||||
VALUE vad_params;
|
|
||||||
} ruby_whisper_params;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
struct whisper_vad_params params;
|
|
||||||
} ruby_whisper_vad_params;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VALUE context;
|
|
||||||
int index;
|
|
||||||
} ruby_whisper_segment;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
VALUE context;
|
|
||||||
} ruby_whisper_model;
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,672 +0,0 @@
|
|||||||
#include <ruby.h>
|
|
||||||
#include <ruby/memory_view.h>
|
|
||||||
#include "ruby_whisper.h"
|
|
||||||
|
|
||||||
extern ID id_to_s;
|
|
||||||
extern ID id___method__;
|
|
||||||
extern ID id_to_enum;
|
|
||||||
extern ID id_length;
|
|
||||||
extern ID id_next;
|
|
||||||
extern ID id_new;
|
|
||||||
extern ID id_to_path;
|
|
||||||
extern ID id_URI;
|
|
||||||
extern ID id_pre_converted_models;
|
|
||||||
extern ID id_coreml_compiled_models;
|
|
||||||
extern ID id_cache;
|
|
||||||
extern ID id_n_processors;
|
|
||||||
|
|
||||||
extern VALUE cContext;
|
|
||||||
extern VALUE eError;
|
|
||||||
extern VALUE cModel;
|
|
||||||
|
|
||||||
extern const rb_data_type_t ruby_whisper_params_type;
|
|
||||||
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
|
|
||||||
extern VALUE rb_whisper_model_s_new(VALUE context);
|
|
||||||
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
|
|
||||||
extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
|
|
||||||
|
|
||||||
ID transcribe_option_names[1];
|
|
||||||
|
|
||||||
static void
|
|
||||||
ruby_whisper_free(ruby_whisper *rw)
|
|
||||||
{
|
|
||||||
if (rw->context) {
|
|
||||||
whisper_free(rw->context);
|
|
||||||
rw->context = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
rb_whisper_mark(ruby_whisper *rw)
|
|
||||||
{
|
|
||||||
// call rb_gc_mark on any ruby references in rw
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
rb_whisper_free(void *p)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw = (ruby_whisper *)p;
|
|
||||||
ruby_whisper_free(rw);
|
|
||||||
free(rw);
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
ruby_whisper_memsize(const void *p)
|
|
||||||
{
|
|
||||||
const ruby_whisper *rw = (const ruby_whisper *)p;
|
|
||||||
size_t size = sizeof(rw);
|
|
||||||
if (!rw) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (rw->context) {
|
|
||||||
size += sizeof(rw->context);
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
const rb_data_type_t ruby_whisper_type = {
|
|
||||||
"ruby_whisper",
|
|
||||||
{0, rb_whisper_free, ruby_whisper_memsize,},
|
|
||||||
0, 0,
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_allocate(VALUE klass)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
VALUE obj = TypedData_Make_Struct(klass, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
rw->context = NULL;
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
VALUE
|
|
||||||
ruby_whisper_normalize_model_path(VALUE model_path)
|
|
||||||
{
|
|
||||||
VALUE pre_converted_models = rb_funcall(cModel, id_pre_converted_models, 0);
|
|
||||||
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
|
|
||||||
if (!NIL_P(pre_converted_model)) {
|
|
||||||
model_path = pre_converted_model;
|
|
||||||
#ifdef RUBY_WHISPER_USE_COREML
|
|
||||||
VALUE coreml_converted_models = rb_funcall(cModel, id_coreml_compiled_models, 0);
|
|
||||||
VALUE coreml_converted_model = rb_hash_aref(coreml_converted_models, pre_converted_model);
|
|
||||||
if (!NIL_P(coreml_converted_model)) {
|
|
||||||
rb_funcall(coreml_converted_model, id_cache, 0);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
else if (TYPE(model_path) == T_STRING) {
|
|
||||||
const char * model_path_str = StringValueCStr(model_path);
|
|
||||||
if (strncmp("http://", model_path_str, 7) == 0 || strncmp("https://", model_path_str, 8) == 0) {
|
|
||||||
VALUE uri_class = rb_const_get(cModel, id_URI);
|
|
||||||
model_path = rb_class_new_instance(1, &model_path, uri_class);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (rb_obj_is_kind_of(model_path, rb_path2class("URI::HTTP"))) {
|
|
||||||
VALUE uri_class = rb_const_get(cModel, id_URI);
|
|
||||||
model_path = rb_class_new_instance(1, &model_path, uri_class);
|
|
||||||
}
|
|
||||||
if (rb_respond_to(model_path, id_to_path)) {
|
|
||||||
model_path = rb_funcall(model_path, id_to_path, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return model_path;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* new("base.en") -> Whisper::Context
|
|
||||||
* new("path/to/model.bin") -> Whisper::Context
|
|
||||||
* new(Whisper::Model::URI.new("https://example.net/uri/of/model.bin")) -> Whisper::Context
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_initialize(int argc, VALUE *argv, VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
VALUE whisper_model_file_path;
|
|
||||||
|
|
||||||
// TODO: we can support init from buffer here too maybe another ruby object to expose
|
|
||||||
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
|
|
||||||
whisper_model_file_path = ruby_whisper_normalize_model_path(whisper_model_file_path);
|
|
||||||
if (!rb_respond_to(whisper_model_file_path, id_to_s)) {
|
|
||||||
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
|
||||||
}
|
|
||||||
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
|
||||||
if (rw->context == NULL) {
|
|
||||||
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
|
||||||
}
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_vocab -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_vocab(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_audio_ctx -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_audio_ctx(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_audio_state -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_audio_state(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_audio_head -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_audio_head(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_audio_layer -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_audio_layer(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_text_ctx -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_text_ctx(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_text_state -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_text_state(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_text_head -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_text_head(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_text_layer -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_text_layer(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_n_mels -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_n_mels(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_ftype -> Integer
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_ftype(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_ftype(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model_type -> String
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_model_type(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
|
||||||
* Not thread safe for same context
|
|
||||||
* Uses the specified decoding strategy to obtain the text.
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full(params, samples, n_samples) -> nil
|
|
||||||
* full(params, samples) -> nil
|
|
||||||
*
|
|
||||||
* The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
|
|
||||||
*/
|
|
||||||
VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
|
|
||||||
{
|
|
||||||
if (argc < 2 || argc > 3) {
|
|
||||||
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
|
||||||
}
|
|
||||||
|
|
||||||
ruby_whisper *rw;
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
VALUE params = argv[0];
|
|
||||||
TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
|
||||||
VALUE samples = argv[1];
|
|
||||||
int n_samples;
|
|
||||||
rb_memory_view_t view;
|
|
||||||
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
|
||||||
if (argc == 3) {
|
|
||||||
n_samples = NUM2INT(argv[2]);
|
|
||||||
if (TYPE(samples) == T_ARRAY) {
|
|
||||||
if (RARRAY_LEN(samples) < n_samples) {
|
|
||||||
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Should check when samples.respond_to?(:length)?
|
|
||||||
} else {
|
|
||||||
if (TYPE(samples) == T_ARRAY) {
|
|
||||||
if (RARRAY_LEN(samples) > INT_MAX) {
|
|
||||||
rb_raise(rb_eArgError, "samples are too long");
|
|
||||||
}
|
|
||||||
n_samples = (int)RARRAY_LEN(samples);
|
|
||||||
} else if (memory_view_available_p) {
|
|
||||||
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
|
||||||
view.obj = Qnil;
|
|
||||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
|
||||||
}
|
|
||||||
ssize_t n_samples_size = view.byte_size / view.item_size;
|
|
||||||
if (n_samples_size > INT_MAX) {
|
|
||||||
rb_raise(rb_eArgError, "samples are too long");
|
|
||||||
}
|
|
||||||
n_samples = (int)n_samples_size;
|
|
||||||
} else if (rb_respond_to(samples, id_length)) {
|
|
||||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
|
||||||
} else {
|
|
||||||
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
|
||||||
if (memory_view_available_p) {
|
|
||||||
c_samples = (float *)view.data;
|
|
||||||
} else {
|
|
||||||
if (TYPE(samples) == T_ARRAY) {
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// TODO: use rb_block_call
|
|
||||||
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
// TODO: check if iter is exhausted and raise ArgumentError appropriately
|
|
||||||
VALUE sample = rb_funcall(iter, id_next, 0);
|
|
||||||
c_samples[i] = RFLOAT_VALUE(sample);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
prepare_transcription(rwp, &self);
|
|
||||||
const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples);
|
|
||||||
if (0 == result) {
|
|
||||||
return self;
|
|
||||||
} else {
|
|
||||||
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
|
||||||
* Result is stored in the default state of the context
|
|
||||||
* Not thread safe if executed in parallel on the same context.
|
|
||||||
* It seems this approach can offer some speedup in some cases.
|
|
||||||
* However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full_parallel(params, samples) -> nil
|
|
||||||
* full_parallel(params, samples, n_samples) -> nil
|
|
||||||
* full_parallel(params, samples, n_samples, n_processors) -> nil
|
|
||||||
* full_parallel(params, samples, nil, n_processors) -> nil
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
|
|
||||||
{
|
|
||||||
if (argc < 2 || argc > 4) {
|
|
||||||
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
|
||||||
}
|
|
||||||
|
|
||||||
ruby_whisper *rw;
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
VALUE params = argv[0];
|
|
||||||
TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
|
||||||
VALUE samples = argv[1];
|
|
||||||
int n_samples;
|
|
||||||
int n_processors;
|
|
||||||
rb_memory_view_t view;
|
|
||||||
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
|
||||||
switch (argc) {
|
|
||||||
case 2:
|
|
||||||
n_processors = 1;
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
n_processors = 1;
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
n_processors = NUM2INT(argv[3]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (argc >= 3 && !NIL_P(argv[2])) {
|
|
||||||
n_samples = NUM2INT(argv[2]);
|
|
||||||
if (TYPE(samples) == T_ARRAY) {
|
|
||||||
if (RARRAY_LEN(samples) < n_samples) {
|
|
||||||
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Should check when samples.respond_to?(:length)?
|
|
||||||
} else if (memory_view_available_p) {
|
|
||||||
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
|
||||||
view.obj = Qnil;
|
|
||||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
|
||||||
}
|
|
||||||
ssize_t n_samples_size = view.byte_size / view.item_size;
|
|
||||||
if (n_samples_size > INT_MAX) {
|
|
||||||
rb_raise(rb_eArgError, "samples are too long");
|
|
||||||
}
|
|
||||||
n_samples = (int)n_samples_size;
|
|
||||||
} else {
|
|
||||||
if (TYPE(samples) == T_ARRAY) {
|
|
||||||
if (RARRAY_LEN(samples) > INT_MAX) {
|
|
||||||
rb_raise(rb_eArgError, "samples are too long");
|
|
||||||
}
|
|
||||||
n_samples = (int)RARRAY_LEN(samples);
|
|
||||||
} else if (rb_respond_to(samples, id_length)) {
|
|
||||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
|
||||||
} else {
|
|
||||||
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
|
||||||
if (memory_view_available_p) {
|
|
||||||
c_samples = (float *)view.data;
|
|
||||||
} else {
|
|
||||||
if (TYPE(samples) == T_ARRAY) {
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// FIXME: use rb_block_call
|
|
||||||
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
|
||||||
for (int i = 0; i < n_samples; i++) {
|
|
||||||
// TODO: check if iter is exhausted and raise ArgumentError
|
|
||||||
VALUE sample = rb_funcall(iter, id_next, 0);
|
|
||||||
c_samples[i] = RFLOAT_VALUE(sample);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
prepare_transcription(rwp, &self);
|
|
||||||
const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors);
|
|
||||||
if (0 == result) {
|
|
||||||
return self;
|
|
||||||
} else {
|
|
||||||
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Number of segments.
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full_n_segments -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_n_segments(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_full_n_segments(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full_lang_id -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_lang_id(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_full_lang_id(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
static int ruby_whisper_full_check_segment_index(const ruby_whisper * rw, const VALUE i_segment)
|
|
||||||
{
|
|
||||||
const int c_i_segment = NUM2INT(i_segment);
|
|
||||||
if (c_i_segment < 0 || c_i_segment >= whisper_full_n_segments(rw->context)) {
|
|
||||||
rb_raise(rb_eIndexError, "segment index %d out of range", c_i_segment);
|
|
||||||
}
|
|
||||||
return c_i_segment;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
|
||||||
*
|
|
||||||
* full_get_segment_t0(3) # => 1668 (16680 ms)
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full_get_segment_t0(segment_index) -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
|
||||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
|
||||||
return LONG2NUM(t0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
|
||||||
*
|
|
||||||
* full_get_segment_t1(3) # => 1668 (16680 ms)
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full_get_segment_t1(segment_index) -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
|
||||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
|
||||||
return LONG2NUM(t1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
|
|
||||||
*
|
|
||||||
* full_get_segment_speacker_turn_next(3) # => true
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full_get_segment_speacker_turn_next(segment_index) -> bool
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_get_segment_speaker_turn_next(VALUE self, VALUE i_segment)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
|
||||||
const bool speaker_turn_next = whisper_full_get_segment_speaker_turn_next(rw->context, c_i_segment);
|
|
||||||
return speaker_turn_next ? Qtrue : Qfalse;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Text of a segment indexed by +segment_index+.
|
|
||||||
*
|
|
||||||
* full_get_segment_text(3) # => "ask not what your country can do for you, ..."
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* full_get_segment_text(segment_index) -> String
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
|
||||||
const char * text = whisper_full_get_segment_text(rw->context, c_i_segment);
|
|
||||||
return rb_str_new2(text);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* full_get_segment_no_speech_prob(segment_index) -> Float
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
|
|
||||||
{
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
|
||||||
const float no_speech_prob = whisper_full_get_segment_no_speech_prob(rw->context, c_i_segment);
|
|
||||||
return DBL2NUM(no_speech_prob);
|
|
||||||
}
|
|
||||||
|
|
||||||
// High level API
|
|
||||||
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
|
|
||||||
{
|
|
||||||
return rb_whisper_segment_s_new(self, NUM2INT(i_segment));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Yields each Whisper::Segment:
|
|
||||||
*
|
|
||||||
* whisper.transcribe("path/to/audio.wav", params)
|
|
||||||
* whisper.each_segment do |segment|
|
|
||||||
* puts segment.text
|
|
||||||
* end
|
|
||||||
*
|
|
||||||
* Returns an Enumerator if no block given:
|
|
||||||
*
|
|
||||||
* whisper.transcribe("path/to/audio.wav", params)
|
|
||||||
* enum = whisper.each_segment
|
|
||||||
* enum.to_a # => [#<Whisper::Segment>, ...]
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* each_segment {|segment| ... }
|
|
||||||
* each_segment -> Enumerator
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_each_segment(VALUE self)
|
|
||||||
{
|
|
||||||
if (!rb_block_given_p()) {
|
|
||||||
const VALUE method_name = rb_funcall(self, id___method__, 0);
|
|
||||||
return rb_funcall(self, id_to_enum, 1, method_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
|
|
||||||
const int n_segments = whisper_full_n_segments(rw->context);
|
|
||||||
for (int i = 0; i < n_segments; ++i) {
|
|
||||||
rb_yield(rb_whisper_segment_s_new(self, i));
|
|
||||||
}
|
|
||||||
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* model -> Whisper::Model
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_get_model(VALUE self)
|
|
||||||
{
|
|
||||||
return rb_whisper_model_s_new(self);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
init_ruby_whisper_context(VALUE *mWhisper)
|
|
||||||
{
|
|
||||||
cContext = rb_define_class_under(*mWhisper, "Context", rb_cObject);
|
|
||||||
|
|
||||||
transcribe_option_names[0] = id_n_processors;
|
|
||||||
|
|
||||||
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
|
||||||
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
|
||||||
|
|
||||||
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
|
||||||
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
|
|
||||||
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
|
||||||
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
|
||||||
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
|
||||||
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
|
||||||
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
|
||||||
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
|
|
||||||
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
|
|
||||||
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
|
||||||
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
|
|
||||||
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
|
|
||||||
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
|
|
||||||
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
|
||||||
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
|
||||||
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
|
||||||
rb_define_method(cContext, "full_get_segment_t1", ruby_whisper_full_get_segment_t1, 1);
|
|
||||||
rb_define_method(cContext, "full_get_segment_speaker_turn_next", ruby_whisper_full_get_segment_speaker_turn_next, 1);
|
|
||||||
rb_define_method(cContext, "full_get_segment_text", ruby_whisper_full_get_segment_text, 1);
|
|
||||||
rb_define_method(cContext, "full_get_segment_no_speech_prob", ruby_whisper_full_get_segment_no_speech_prob, 1);
|
|
||||||
rb_define_method(cContext, "full", ruby_whisper_full, -1);
|
|
||||||
rb_define_method(cContext, "full_parallel", ruby_whisper_full_parallel, -1);
|
|
||||||
|
|
||||||
// High level
|
|
||||||
rb_define_method(cContext, "full_get_segment", ruby_whisper_full_get_segment, 1);
|
|
||||||
rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
|
|
||||||
|
|
||||||
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
|
|
||||||
}
|
|
@ -1,52 +0,0 @@
|
|||||||
#include <ruby.h>
|
|
||||||
|
|
||||||
extern VALUE eError;
|
|
||||||
|
|
||||||
VALUE ruby_whisper_error_initialize(VALUE self, VALUE code)
|
|
||||||
{
|
|
||||||
const int c_code = NUM2INT(code);
|
|
||||||
const char *raw_message;
|
|
||||||
switch (c_code) {
|
|
||||||
case -2:
|
|
||||||
raw_message = "failed to compute log mel spectrogram";
|
|
||||||
break;
|
|
||||||
case -3:
|
|
||||||
raw_message = "failed to auto-detect language";
|
|
||||||
break;
|
|
||||||
case -4:
|
|
||||||
raw_message = "too many decoders requested";
|
|
||||||
break;
|
|
||||||
case -5:
|
|
||||||
raw_message = "audio_ctx is larger than the maximum allowed";
|
|
||||||
break;
|
|
||||||
case -6:
|
|
||||||
raw_message = "failed to encode";
|
|
||||||
break;
|
|
||||||
case -7:
|
|
||||||
raw_message = "whisper_kv_cache_init() failed for self-attention cache";
|
|
||||||
break;
|
|
||||||
case -8:
|
|
||||||
raw_message = "failed to decode";
|
|
||||||
break;
|
|
||||||
case -9:
|
|
||||||
raw_message = "failed to decode";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
raw_message = "unknown error";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
const VALUE message = rb_str_new2(raw_message);
|
|
||||||
rb_call_super(1, &message);
|
|
||||||
rb_iv_set(self, "@code", code);
|
|
||||||
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
init_ruby_whisper_error(VALUE *mWhisper)
|
|
||||||
{
|
|
||||||
eError = rb_define_class_under(*mWhisper, "Error", rb_eStandardError);
|
|
||||||
|
|
||||||
rb_define_attr(eError, "code", true, false);
|
|
||||||
rb_define_method(eError, "initialize", ruby_whisper_error_initialize, 1);
|
|
||||||
}
|
|
@ -1,232 +0,0 @@
|
|||||||
#include <ruby.h>
|
|
||||||
#include "ruby_whisper.h"
|
|
||||||
|
|
||||||
extern const rb_data_type_t ruby_whisper_type;
|
|
||||||
|
|
||||||
extern VALUE cModel;
|
|
||||||
|
|
||||||
static void rb_whisper_model_mark(void *p) {
|
|
||||||
ruby_whisper_model *rwm = (ruby_whisper_model *)p;
|
|
||||||
if (rwm->context) {
|
|
||||||
rb_gc_mark(rwm->context);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
ruby_whisper_model_memsize(const void *p)
|
|
||||||
{
|
|
||||||
const ruby_whisper_model *rwm = (const ruby_whisper_model *)p;
|
|
||||||
size_t size = sizeof(rwm);
|
|
||||||
if (!rwm) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const rb_data_type_t rb_whisper_model_type = {
|
|
||||||
"ruby_whisper_model",
|
|
||||||
{rb_whisper_model_mark, RUBY_DEFAULT_FREE, ruby_whisper_model_memsize,},
|
|
||||||
0, 0,
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
}
|
|
||||||
|
|
||||||
VALUE rb_whisper_model_s_new(VALUE context) {
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
const VALUE model = ruby_whisper_model_allocate(cModel);
|
|
||||||
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
rwm->context = context;
|
|
||||||
return model;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_vocab -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_vocab(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_audio_ctx -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_audio_ctx(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_audio_state -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_audio_state(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_audio_head -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_audio_head(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_audio_layer -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_audio_layer(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_text_ctx -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_text_ctx(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_text_state -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_text_state(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_text_head -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_text_head(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_text_layer -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_text_layer(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* n_mels -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_n_mels(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* ftype -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_ftype(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return INT2NUM(whisper_model_ftype(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* type -> String
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_model_type(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_model *rwm;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
init_ruby_whisper_model(VALUE *mWhisper)
|
|
||||||
{
|
|
||||||
cModel = rb_define_class_under(*mWhisper, "Model", rb_cObject);
|
|
||||||
|
|
||||||
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
|
|
||||||
rb_define_method(cModel, "n_vocab", ruby_whisper_model_n_vocab, 0);
|
|
||||||
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
|
||||||
rb_define_method(cModel, "n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
|
||||||
rb_define_method(cModel, "n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
|
||||||
rb_define_method(cModel, "n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
|
||||||
rb_define_method(cModel, "n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
|
||||||
rb_define_method(cModel, "n_text_state", ruby_whisper_model_n_text_state, 0);
|
|
||||||
rb_define_method(cModel, "n_text_head", ruby_whisper_model_n_text_head, 0);
|
|
||||||
rb_define_method(cModel, "n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
|
||||||
rb_define_method(cModel, "n_mels", ruby_whisper_model_n_mels, 0);
|
|
||||||
rb_define_method(cModel, "ftype", ruby_whisper_model_ftype, 0);
|
|
||||||
rb_define_method(cModel, "type", ruby_whisper_model_type, 0);
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
@ -1,220 +0,0 @@
|
|||||||
#include <ruby.h>
|
|
||||||
#include "ruby_whisper.h"
|
|
||||||
|
|
||||||
#define N_KEY_NAMES 5
|
|
||||||
|
|
||||||
static VALUE sym_start_time;
|
|
||||||
static VALUE sym_end_time;
|
|
||||||
static VALUE sym_text;
|
|
||||||
static VALUE sym_no_speech_prob;
|
|
||||||
static VALUE sym_speaker_turn_next;
|
|
||||||
static VALUE key_names;
|
|
||||||
|
|
||||||
extern const rb_data_type_t ruby_whisper_type;
|
|
||||||
|
|
||||||
extern VALUE cSegment;
|
|
||||||
|
|
||||||
static void
|
|
||||||
rb_whisper_segment_mark(void *p)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws = (ruby_whisper_segment *)p;
|
|
||||||
rb_gc_mark(rws->context);
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
ruby_whisper_segment_memsize(const void *p)
|
|
||||||
{
|
|
||||||
const ruby_whisper_segment *rws = (const ruby_whisper_segment *)p;
|
|
||||||
size_t size = sizeof(rws);
|
|
||||||
if (!rws) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const rb_data_type_t ruby_whisper_segment_type = {
|
|
||||||
"ruby_whisper_segment",
|
|
||||||
{rb_whisper_segment_mark, RUBY_DEFAULT_FREE, ruby_whisper_segment_memsize,},
|
|
||||||
0, 0,
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
VALUE
|
|
||||||
ruby_whisper_segment_allocate(VALUE klass)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
return TypedData_Make_Struct(klass, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
}
|
|
||||||
|
|
||||||
VALUE
|
|
||||||
rb_whisper_segment_s_new(VALUE context, int index)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
|
||||||
TypedData_Get_Struct(segment, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
rws->context = context;
|
|
||||||
rws->index = index;
|
|
||||||
return segment;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Start time in milliseconds.
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* start_time -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_segment_get_start_time(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
|
||||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
|
||||||
return LONG2NUM(t0 * 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* End time in milliseconds.
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* end_time -> Integer
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_segment_get_end_time(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
|
||||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
|
||||||
return LONG2NUM(t1 * 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Whether the next segment is predicted as a speaker turn.
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* speaker_turn_next? -> bool
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_segment_get_speaker_turn_next(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* text -> String
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_segment_get_text(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
const char * text = whisper_full_get_segment_text(rw->context, rws->index);
|
|
||||||
return rb_str_new2(text);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* no_speech_prob -> Float
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_segment_get_no_speech_prob(VALUE self)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
return DBL2NUM(whisper_full_get_segment_no_speech_prob(rw->context, rws->index));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* call-seq:
|
|
||||||
* deconstruct_keys(keys) -> hash
|
|
||||||
*
|
|
||||||
* Possible keys: :start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next
|
|
||||||
*
|
|
||||||
* whisper.each_segment do |segment|
|
|
||||||
* segment => {start_time:, end_time:, text:, no_speech_prob:, speaker_turn_next:}
|
|
||||||
*
|
|
||||||
* puts "[#{start_time} --> #{end_time}] #{text} (no speech prob: #{no_speech_prob}#{speaker_turn_next ? ', speaker turns next' : ''})"
|
|
||||||
* end
|
|
||||||
*/
|
|
||||||
static VALUE
|
|
||||||
ruby_whisper_segment_deconstruct_keys(VALUE self, VALUE keys)
|
|
||||||
{
|
|
||||||
ruby_whisper_segment *rws;
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
|
||||||
ruby_whisper *rw;
|
|
||||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
|
|
||||||
VALUE hash = rb_hash_new();
|
|
||||||
long n_keys;
|
|
||||||
if (NIL_P(keys)) {
|
|
||||||
keys = key_names;
|
|
||||||
n_keys = N_KEY_NAMES;
|
|
||||||
} else {
|
|
||||||
n_keys = RARRAY_LEN(keys);
|
|
||||||
if (n_keys > N_KEY_NAMES) {
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < n_keys; i++) {
|
|
||||||
VALUE key = rb_ary_entry(keys, i);
|
|
||||||
if (key == sym_start_time) {
|
|
||||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_start_time(self));
|
|
||||||
}
|
|
||||||
if (key == sym_end_time) {
|
|
||||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_end_time(self));
|
|
||||||
}
|
|
||||||
if (key == sym_text) {
|
|
||||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_text(self));
|
|
||||||
}
|
|
||||||
if (key == sym_no_speech_prob) {
|
|
||||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_no_speech_prob(self));
|
|
||||||
}
|
|
||||||
if (key == sym_speaker_turn_next) {
|
|
||||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_speaker_turn_next(self));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cContext)
|
|
||||||
{
|
|
||||||
cSegment = rb_define_class_under(*mWhisper, "Segment", rb_cObject);
|
|
||||||
|
|
||||||
sym_start_time = ID2SYM(rb_intern("start_time"));
|
|
||||||
sym_end_time = ID2SYM(rb_intern("end_time"));
|
|
||||||
sym_text = ID2SYM(rb_intern("text"));
|
|
||||||
sym_no_speech_prob = ID2SYM(rb_intern("no_speech_prob"));
|
|
||||||
sym_speaker_turn_next = ID2SYM(rb_intern("speaker_turn_next"));
|
|
||||||
key_names = rb_ary_new3(
|
|
||||||
N_KEY_NAMES,
|
|
||||||
sym_start_time,
|
|
||||||
sym_end_time,
|
|
||||||
sym_text,
|
|
||||||
sym_no_speech_prob,
|
|
||||||
sym_speaker_turn_next
|
|
||||||
);
|
|
||||||
|
|
||||||
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
|
|
||||||
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
|
|
||||||
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
|
||||||
rb_define_method(cSegment, "speaker_turn_next?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
|
||||||
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
|
||||||
rb_define_method(cSegment, "no_speech_prob", ruby_whisper_segment_get_no_speech_prob, 0);
|
|
||||||
rb_define_method(cSegment, "deconstruct_keys", ruby_whisper_segment_deconstruct_keys, 1);
|
|
||||||
}
|
|
@ -1,93 +0,0 @@
|
|||||||
#include <ruby.h>
|
|
||||||
#include "ruby_whisper.h"
|
|
||||||
#include "common-whisper.h"
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern const rb_data_type_t ruby_whisper_type;
|
|
||||||
extern const rb_data_type_t ruby_whisper_params_type;
|
|
||||||
|
|
||||||
extern ID id_to_s;
|
|
||||||
extern ID id_call;
|
|
||||||
extern ID transcribe_option_names[1];
|
|
||||||
|
|
||||||
extern void
|
|
||||||
prepare_transcription(ruby_whisper_params * rwp, VALUE * self);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* transcribe a single file
|
|
||||||
* can emit to a block results
|
|
||||||
*
|
|
||||||
* params = Whisper::Params.new
|
|
||||||
* params.duration = 60_000
|
|
||||||
* whisper.transcribe "path/to/audio.wav", params do |text|
|
|
||||||
* puts text
|
|
||||||
* end
|
|
||||||
*
|
|
||||||
* call-seq:
|
|
||||||
* transcribe(path_to_audio, params) {|text| ...}
|
|
||||||
**/
|
|
||||||
VALUE
|
|
||||||
ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
|
||||||
ruby_whisper *rw;
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
VALUE wave_file_path, blk, params, kws;
|
|
||||||
VALUE opts[1];
|
|
||||||
|
|
||||||
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "2:&", &wave_file_path, ¶ms, &kws, &blk);
|
|
||||||
rb_get_kwargs(kws, transcribe_option_names, 0, 1, opts);
|
|
||||||
|
|
||||||
int n_processors = opts[0] == Qundef ? 1 : NUM2INT(opts[0]);
|
|
||||||
|
|
||||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
|
||||||
TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
|
||||||
|
|
||||||
if (!rb_respond_to(wave_file_path, id_to_s)) {
|
|
||||||
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string fname_inp = StringValueCStr(wave_file_path);
|
|
||||||
|
|
||||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
|
||||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
|
||||||
|
|
||||||
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
|
|
||||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
// Commented out because it is work in progress
|
|
||||||
// {
|
|
||||||
// static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
|
||||||
|
|
||||||
// rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
|
||||||
// bool is_aborted = *(bool*)user_data;
|
|
||||||
// return !is_aborted;
|
|
||||||
// };
|
|
||||||
// rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
|
||||||
// }
|
|
||||||
|
|
||||||
prepare_transcription(rwp, &self);
|
|
||||||
|
|
||||||
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), n_processors) != 0) {
|
|
||||||
fprintf(stderr, "failed to process audio\n");
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
if (NIL_P(blk)) {
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
const int n_segments = whisper_full_n_segments(rw->context);
|
|
||||||
VALUE output = rb_str_new2("");
|
|
||||||
for (int i = 0; i < n_segments; ++i) {
|
|
||||||
const char * text = whisper_full_get_segment_text(rw->context, i);
|
|
||||||
output = rb_str_concat(output, rb_str_new2(text));
|
|
||||||
}
|
|
||||||
rb_funcall(blk, id_call, 1, output);
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user