mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-24 09:10:57 +00:00
Compare commits
1 Commits
master
...
cuda-cubla
Author | SHA1 | Date | |
---|---|---|---|
c8b3bc6a0d |
@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential git cmake libsdl2-dev wget git
|
||||
apt-get install -y build-essential git cmake
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@ -21,8 +21,8 @@ COPY . .
|
||||
# Set nvcc architecture
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
# Enable cuBLAS
|
||||
ENV GGML_CUDA=1
|
||||
ENV WHISPER_CUBLAS=1
|
||||
|
||||
RUN make base.en
|
||||
RUN make
|
||||
|
||||
ENTRYPOINT ["/app/main"]
|
||||
|
@ -1,40 +0,0 @@
|
||||
ARG UBUNTU_VERSION=22.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG CUDA_VERSION=12.3.1
|
||||
# Target the CUDA build image
|
||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
# Target the CUDA runtime image
|
||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||
WORKDIR /app
|
||||
|
||||
# Unless otherwise specified, we make a fat build.
|
||||
ARG CUDA_DOCKER_ARCH=all
|
||||
# Set nvcc architecture
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
# Ref: https://stackoverflow.com/a/53464012
|
||||
ENV CUDA_MAIN_VERSION=12.3
|
||||
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||
|
||||
COPY .. .
|
||||
# Enable cuBLAS
|
||||
RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
|
||||
|
||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
||||
ENV CUDA_MAIN_VERSION=12.3
|
||||
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
ENV PATH=/app/build/bin:$PATH
|
||||
ENTRYPOINT [ "bash", "-c" ]
|
@ -1,28 +0,0 @@
|
||||
ARG ONEAPI_VERSION=2025.1.1-0-devel-ubuntu24.04
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY .. .
|
||||
# Enable SYCL
|
||||
ARG GGML_SYCL_F16=OFF
|
||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||
echo "GGML_SYCL_F16 is set" \
|
||||
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||
fi && \
|
||||
make base.en CMAKE_ARGS="-DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16}"
|
||||
|
||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
ENV PATH=/app/build/bin:$PATH
|
||||
ENTRYPOINT [ "bash", "-c" ]
|
@ -1,39 +0,0 @@
|
||||
ARG UBUNTU_VERSION=22.04
|
||||
# This needs to generally match the container host's environment.
|
||||
ARG MUSA_VERSION=rc4.0.1
|
||||
# Target the MUSA build image
|
||||
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
|
||||
# Target the MUSA runtime image
|
||||
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential libsdl2-dev wget cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/*
|
||||
|
||||
COPY .. .
|
||||
# Enable muBLAS
|
||||
RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
|
||||
|
||||
RUN find /app/build -name "*.o" -delete && \
|
||||
find /app/build -name "*.a" -delete && \
|
||||
rm -rf /app/build/CMakeFiles && \
|
||||
rm -rf /app/build/cmake_install.cmake && \
|
||||
rm -rf /app/build/_deps
|
||||
|
||||
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg wget cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* /tmp/* /var/tmp/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
RUN du -sh /app/*
|
||||
RUN find /app -type f -size +100M
|
||||
ENV PATH=/app/build/bin:$PATH
|
||||
ENTRYPOINT [ "bash", "-c" ]
|
@ -1,20 +0,0 @@
|
||||
FROM ubuntu:22.04 AS build
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY .. .
|
||||
RUN make base.en
|
||||
|
||||
FROM ubuntu:22.04 AS runtime
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||
|
||||
COPY --from=build /app /app
|
||||
ENV PATH=/app/build/bin:$PATH
|
||||
ENTRYPOINT [ "bash", "-c" ]
|
@ -1,3 +0,0 @@
|
||||
build*/
|
||||
.github/
|
||||
.devops/
|
10
.github/workflows/bindings-go.yml
vendored
10
.github/workflows/bindings-go.yml
vendored
@ -10,13 +10,13 @@ on:
|
||||
- whisper.h
|
||||
|
||||
jobs:
|
||||
ubuntu-22:
|
||||
runs-on: ubuntu-22.04
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/setup-go@v5
|
||||
- uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: '^1.23'
|
||||
- uses: actions/checkout@v4
|
||||
go-version: '^1.19'
|
||||
- uses: actions/checkout@v1
|
||||
- run: |
|
||||
cd bindings/go
|
||||
make test
|
||||
|
25
.github/workflows/bindings-ruby.yml
vendored
25
.github/workflows/bindings-ruby.yml
vendored
@ -1,21 +1,22 @@
|
||||
name: Bindings Tests (Ruby)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- bindings/ruby/**
|
||||
- whisper.h
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- bindings/ruby/**
|
||||
- whisper.h
|
||||
|
||||
jobs:
|
||||
ubuntu-22:
|
||||
runs-on: ubuntu-22.04
|
||||
defaults:
|
||||
run:
|
||||
working-directory: bindings/ruby
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: '3.2'
|
||||
- uses: actions/checkout@v4
|
||||
- run: rake test
|
||||
ruby-version: '3.0'
|
||||
- uses: actions/checkout@v1
|
||||
- run: |
|
||||
cd bindings/ruby/ext
|
||||
ruby extconf.rb && make
|
||||
|
1167
.github/workflows/build.yml
vendored
1167
.github/workflows/build.yml
vendored
File diff suppressed because it is too large
Load Diff
63
.github/workflows/docker.yml
vendored
63
.github/workflows/docker.yml
vendored
@ -1,63 +0,0 @@
|
||||
name: Publish Docker image
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
push_to_registry:
|
||||
name: Push Docker image to Docker Hub
|
||||
if: github.event.pull_request.draft == false
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
|
||||
- { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
|
||||
- { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" }
|
||||
#TODO: the cuda image keeps failing - disable for now
|
||||
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
|
||||
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
||||
|
||||
steps:
|
||||
- name: Check out the repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:qemu-v7.0.0-28
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push Docker image (versioned)
|
||||
if: github.event_name == 'push'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
platforms: ${{ matrix.config.platform }}
|
||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
||||
file: ${{ matrix.config.dockerfile }}
|
||||
|
||||
- name: Build and push Docker image (tagged)
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name == 'push' }}
|
||||
platforms: ${{ matrix.config.platform }}
|
||||
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
|
||||
file: ${{ matrix.config.dockerfile }}
|
91
.github/workflows/examples-wasm.yml
vendored
91
.github/workflows/examples-wasm.yml
vendored
@ -1,91 +0,0 @@
|
||||
name: Examples WASM
|
||||
on:
|
||||
push:
|
||||
branches: ["master"]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: "pages"
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
deploy-wasm-github-pages:
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v4
|
||||
|
||||
- name: Setup emsdk
|
||||
uses: mymindstorm/setup-emsdk@v14
|
||||
|
||||
- name: Build WASM Examples
|
||||
# Enable for real build later in whisper.cpp
|
||||
run: |
|
||||
mkdir -p build-em && cd build-em
|
||||
emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
make -j
|
||||
|
||||
- name: Create staging directory
|
||||
run: mkdir -p staging
|
||||
|
||||
- name: Create .nojekyll file in staging directory
|
||||
run: touch staging/.nojekyll
|
||||
|
||||
- name: Copy application files
|
||||
run: |
|
||||
build_dir=build-em/bin
|
||||
|
||||
ls ${build_dir}
|
||||
|
||||
# command.wasm
|
||||
target_dir=staging/command.wasm
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libcommand.js ${target_dir}
|
||||
|
||||
# bench.wasm
|
||||
target_dir=staging/bench.wasm
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libbench.js ${target_dir}
|
||||
|
||||
# stream.wasm
|
||||
target_dir=staging/stream.wasm
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libstream.js ${target_dir}
|
||||
|
||||
# whisper.wasm (this will be the main example page)
|
||||
target_dir=staging
|
||||
mkdir -p ${target_dir}
|
||||
cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
|
||||
cp ${build_dir}/libmain.js ${target_dir}
|
||||
|
||||
# Copy Cross-Origin Isolation service worker
|
||||
cp -v examples/coi-serviceworker.js staging/
|
||||
|
||||
- name: List files in staging directory (for debugging)
|
||||
run: |
|
||||
echo "Files in staging directory:"
|
||||
find staging -type f | sort
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: ./staging
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v4
|
8
.github/workflows/examples.yml
vendored
8
.github/workflows/examples.yml
vendored
@ -10,8 +10,8 @@ on:
|
||||
- whisper.h
|
||||
|
||||
jobs:
|
||||
addon_node-ubuntu-22:
|
||||
runs-on: ubuntu-22.04
|
||||
addon_node-ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [ 16.x, 18.x ]
|
||||
@ -22,7 +22,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential git
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install cmake
|
||||
sudo apt-get install libsdl2-dev
|
||||
|
||||
@ -37,7 +37,7 @@ jobs:
|
||||
run: npm install
|
||||
|
||||
- name: Compile addon.node
|
||||
run: npx cmake-js compile -T addon.node -B Release
|
||||
run: npx cmake-js compile -T whisper-addon -B Release
|
||||
|
||||
- name: Download test model
|
||||
run: |
|
||||
|
26
.gitignore
vendored
26
.gitignore
vendored
@ -1,29 +1,29 @@
|
||||
*.o
|
||||
*.a
|
||||
*.d
|
||||
.cache/
|
||||
.coreml/
|
||||
.test/
|
||||
.venv/
|
||||
.vs/
|
||||
.vscode/
|
||||
.DS_Store
|
||||
.vimspector.json
|
||||
/CMakeSettings.json
|
||||
/talk-llama.dSYM/
|
||||
|
||||
build/
|
||||
build-*/
|
||||
build_*/
|
||||
build-coreml/
|
||||
build-em/
|
||||
build-debug/
|
||||
build-release/
|
||||
build-rwdi/
|
||||
build-static/
|
||||
build-cublas/
|
||||
build-no-accel/
|
||||
build-sanitize-addr/
|
||||
build-sanitize-thread/
|
||||
|
||||
# SPM
|
||||
.build/
|
||||
.swiftpm
|
||||
*.metallib
|
||||
|
||||
ggml-metal-embed.metal
|
||||
ggml-metal-embed.metal.tmp
|
||||
|
||||
/main
|
||||
/stream
|
||||
/command
|
||||
@ -50,8 +50,6 @@ extra/bench-gg.txt
|
||||
models/*.mlmodel
|
||||
models/*.mlmodelc
|
||||
models/*.mlpackage
|
||||
models/*-encoder-openvino.xml
|
||||
models/*-encoder-openvino-cache/
|
||||
bindings/java/.gradle/
|
||||
bindings/java/.idea/
|
||||
.idea/
|
||||
@ -60,6 +58,4 @@ benchmark_results.csv
|
||||
cmake-build-debug/
|
||||
.cxx/
|
||||
.gradle/
|
||||
local.properties
|
||||
.log
|
||||
.exe
|
||||
local.properties
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
[submodule "bindings/ios"]
|
||||
path = bindings/ios
|
||||
url = https://github.com/ggerganov/whisper.spm
|
510
AUTHORS
510
AUTHORS
@ -1,510 +0,0 @@
|
||||
# date: Tue Feb 4 13:03:35 EET 2025
|
||||
# this file is auto-generated by scripts/gen-authors.sh
|
||||
|
||||
0/0 <zero@imaskeleton.me>
|
||||
0cc4m <picard12@live.de>
|
||||
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
|
||||
65a <10104049+65a@users.noreply.github.com>
|
||||
AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
|
||||
AT <manyoso@users.noreply.github.com>
|
||||
Aarni Koskela <akx@iki.fi>
|
||||
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
|
||||
Aaron Taylor <aaron@exphat.com>
|
||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
|
||||
Adam Jones <domdomegg+git@gmail.com>
|
||||
Adrien Gallouët <adrien@gallouet.fr>
|
||||
Adrien Gallouët <angt@huggingface.co>
|
||||
AfryMask <AfryMask@163.com>
|
||||
Ahmad Bilal <ahmad.bilal@empglabs.com>
|
||||
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||
AidanBeltonS <aidan.belton@codeplay.com>
|
||||
Akarshan Biswas <akarshan.biswas@gmail.com>
|
||||
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||
Akash Mahajan <akash7190@gmail.com>
|
||||
Akash Mahajan <akashmjn@stanford.edu>
|
||||
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
|
||||
Alan <unknown>
|
||||
Albert Jin <albert.jin@gmail.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
||||
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
|
||||
Alex Azarov <alex@azarov.by>
|
||||
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
|
||||
Alex Evgrashin <aevgrashin@yandex.ru>
|
||||
Alex O'Connell <35843486+acon96@users.noreply.github.com>
|
||||
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
|
||||
Alexandru Mariuti <alex@mariuti.com>
|
||||
Alexey Kharlamov <alexey@kharlamov.biz>
|
||||
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
|
||||
Ali Alameh <ali.alameh@isae.edu.lb>
|
||||
Alter <0x7c48@gmail.com>
|
||||
Ananta Bastola <anantarajbastola@gmail.com>
|
||||
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
|
||||
Andreas Lubbe <git@lubbe.org>
|
||||
Andreu Huguet <andreuhuguet@gmail.com>
|
||||
Andrew Huynh <a5thuynh@gmail.com>
|
||||
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
||||
Andrew S <andrews54757@gmail.com>
|
||||
Andy Maloney <asmaloney@gmail.com>
|
||||
Anton Kostin <masguit42@users.noreply.github.com>
|
||||
Artyom Mezin <psycho.fading@gmail.com>
|
||||
Asad Memon <asad.lionpk@gmail.com>
|
||||
Ashraful Islam <ashraful.meche@gmail.com>
|
||||
AsukaMinato <asukaminato@nyan.eu.org>
|
||||
AustinMroz <austinmroz@utexas.edu>
|
||||
Avik Sengupta <avik@sengupta.net>
|
||||
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
|
||||
Baffin Lee <baffinlee@gmail.com>
|
||||
Ben Ashbaugh <ben.ashbaugh@intel.com>
|
||||
Ben Nortier <bjnortier@gmail.com>
|
||||
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
|
||||
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
|
||||
Binozo <70137898+Binozo@users.noreply.github.com>
|
||||
Bo-Yi Wu <appleboy.tw@gmail.com>
|
||||
Boris Bliznioukov <blib@mail.com>
|
||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||
Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
|
||||
Brian Murray <brian@bmurray.ca>
|
||||
CRD716 <crd716@gmail.com>
|
||||
Canis Lupus <Canis-UK@users.noreply.github.com>
|
||||
Carlos Zoido <mrgalleta@gmail.com>
|
||||
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||
CarterLi999 <664681047@qq.com>
|
||||
ChangSeok Oh <shivamidow@users.noreply.github.com>
|
||||
Changyeon Kim <cyzero.kim@samsung.com>
|
||||
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
|
||||
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
||||
Charles Xu <charles.xu@arm.com>
|
||||
Chen Xi <xi2.chen@intel.com>
|
||||
Chen Xi <xixichen08@foxmail.com>
|
||||
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
|
||||
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
|
||||
Chidi Williams <williamschidi1@gmail.com>
|
||||
Chris Elrod <elrodc@gmail.com>
|
||||
Christian <12550267+iceychris@users.noreply.github.com>
|
||||
Christian Kastner <ckk@kvr.at>
|
||||
Clifford Heath <clifford.heath@gmail.com>
|
||||
Clint Herron <hanclinto@gmail.com>
|
||||
Colin <github@whoisc.cc>
|
||||
Conrad Kramer <conrad@conradkramer.com>
|
||||
Corey Earwood <iamcgn+github@gmail.com>
|
||||
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
||||
DAN™ <dranger003@gmail.com>
|
||||
DGdev91 <DGdev91@users.noreply.github.com>
|
||||
Damian Czaja <trojan295@protonmail.com>
|
||||
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
|
||||
Dan Johansson <dan.johansson@arm.com>
|
||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||
Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
|
||||
Daniel Ziegenberg <daniel@ziegenberg.at>
|
||||
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||
Dave <dave-fl@users.noreply.github.com>
|
||||
Dave Airlie <airlied@gmail.com>
|
||||
Dave Airlie <airlied@redhat.com>
|
||||
Daven Sanassy <daven@vochlea.co.uk>
|
||||
David <dnhkng@gmail.com>
|
||||
David Thorpe <djt@mutablelogic.com>
|
||||
DavidKorczynski <david@adalogics.com>
|
||||
Davidson Francis <davidsondfgl@gmail.com>
|
||||
Dener Stassun <denerstassun@gmail.com>
|
||||
Dibakar Gope <dibakar.gope@arm.com>
|
||||
Didzis Gosko <didzis@users.noreply.github.com>
|
||||
Diego Devesa <slarengh@gmail.com>
|
||||
Digipom <admin@digipom.com>
|
||||
Dimo <dimo@ieee.org>
|
||||
Djip007 <3705339+Djip007@users.noreply.github.com>
|
||||
Djip007 <djip.perois@free.fr>
|
||||
Dody Suria Wijaya <dodysw@gmail.com>
|
||||
Dou Xinpeng <15529241576@163.com>
|
||||
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
|
||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||
Duncan McConnell <ddmcconnell4@gmail.com>
|
||||
Egor Egorov <me@egorfine.com>
|
||||
Elkana Bardugo <ttv200@gmail.com>
|
||||
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
|
||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||
Eric Curtin <ericcurtin17@gmail.com>
|
||||
Eric Swanson <eswanson@alloscomp.com>
|
||||
Eric Tendian <erictendian@gmail.com>
|
||||
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||
Evan Jones <evan.q.jones@gmail.com>
|
||||
Evan Martin <evan.martin@gmail.com>
|
||||
Eve <139727413+netrunnereve@users.noreply.github.com>
|
||||
Evgeny Kuznetsov <evgeny@kuznetsov.md>
|
||||
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
|
||||
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
|
||||
Fangjun Kuang <csukuangfj@gmail.com>
|
||||
Felix <stenbackfelix@gmail.com>
|
||||
Finn Voorhees <finnvoorhees@gmail.com>
|
||||
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
||||
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
|
||||
Frankie Robertson <frankier@users.noreply.github.com>
|
||||
Gang Chen <goncha@gmail.com>
|
||||
Gavin Cai <gavin1818@hotmail.com>
|
||||
George Hindle <george@georgehindle.com>
|
||||
Georgi Gerganov <ggerganov@gmail.com>
|
||||
Gilad S <7817232+giladgd@users.noreply.github.com>
|
||||
Gilad S <giladgd@users.noreply.github.com>
|
||||
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
||||
GitAritron <103900385+GitAritron@users.noreply.github.com>
|
||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||
Gleicon Moraes <gleicon@gmail.com>
|
||||
Gregor Jasny <gjasny@googlemail.com>
|
||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
|
||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||
Hang <bebound@gmail.com>
|
||||
Haus1 <haus.xda@gmail.com>
|
||||
Herman Semenov <GermanAizek@yandex.ru>
|
||||
HimariO <dsfhe49854@gmail.com>
|
||||
Hong Bo PENG <penghb@cn.ibm.com>
|
||||
Hrishikesh Barman <geekodour@users.noreply.github.com>
|
||||
Hugo <hugo@whynothugo.nl>
|
||||
Ian Bicking <ian@ianbicking.org>
|
||||
Ian Bull <irbull@eclipsesource.com>
|
||||
Ihar Hrachyshka <ihrachys@redhat.com>
|
||||
Ikko Ashimine <eltociear@gmail.com>
|
||||
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
||||
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
|
||||
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
|
||||
Ivan <nekotekina@gmail.com>
|
||||
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
||||
Ivan Gorin <ivangorin21@gmail.com>
|
||||
Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
|
||||
JJ <103335846+computerscienceiscool@users.noreply.github.com>
|
||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||
JacobLinCool <jacoblincool@gmail.com>
|
||||
Jakub Ráček <blizzcz@gmail.com>
|
||||
Jared Van Bortel <jared@nomic.ai>
|
||||
Jay Binks <jaybinks@gmail.com>
|
||||
Jayant <jayantyadav202@gmail.com>
|
||||
Jeff Bolz <jbolz@nvidia.com>
|
||||
Jeroen Mostert <jeroen.mostert@cm.com>
|
||||
Jhen-Jie Hong <developer@jhen.me>
|
||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||
JidongZhang-THU <1119708529@qq.com>
|
||||
Jo Liss <joliss42@gmail.com>
|
||||
Joe Todd <joe.todd@codeplay.com>
|
||||
Johan <jr.raffin@gmail.com>
|
||||
Johannes Gäßler <johannesg@5d6.de>
|
||||
John Balis <phobossystems@gmail.com>
|
||||
JohnnyB <jboero@users.noreply.github.com>
|
||||
Jonathan Soo <jcsoo@agora.com>
|
||||
Jonno <1160532+razodactyl@users.noreply.github.com>
|
||||
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
|
||||
Jose <34888496+Jerry-Master@users.noreply.github.com>
|
||||
Josh Bleecher Snyder <josharian@gmail.com>
|
||||
Josscii <jossciiweiyi@gmail.com>
|
||||
Judd <foldl@users.noreply.github.com>
|
||||
Jumper775 <78500318+jumpers775@users.noreply.github.com>
|
||||
Jun Hee Yoo <contact.jhyoo@gmail.com>
|
||||
Junil Kim <logyourself@gmail.com>
|
||||
Justina Cho <justcho5@gmail.com>
|
||||
Justine Tunney <jtunney@gmail.com>
|
||||
Justine Tunney <jtunney@mozilla.com>
|
||||
KITAITI Makoto <KitaitiMakoto@gmail.com>
|
||||
KP Kaiser <kirk@zothcorp.com>
|
||||
Kamilake <exjang0@gmail.com>
|
||||
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
||||
Karthick <j.karthic2004@gmail.com>
|
||||
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
|
||||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||
Kendrick Taylor <kendrick@circuitsix.com>
|
||||
Kevin Brothaler <admin@digipom.com>
|
||||
Kevin Gibbons <bakkot@gmail.com>
|
||||
Konosuke Sakai <konosuke@konosuke.work>
|
||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||
Kreijstal <rainb@tfwno.gf>
|
||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||
LBlue <153975653+lbluep@users.noreply.github.com>
|
||||
Larry Battle <larry.battle.tech@gmail.com>
|
||||
Laytan Laats <laytanlaats@hotmail.com>
|
||||
Leo Moll <leo.moll@yeasoft.com>
|
||||
Lexevolution <31176843+Lexevolution@users.noreply.github.com>
|
||||
LittleLoli <26589867+WhichWho@users.noreply.github.com>
|
||||
Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
|
||||
Luis Herrera <herrera-luis@users.noreply.github.com>
|
||||
Lukas Rist <glaslos@gmail.com>
|
||||
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
|
||||
M. Eren Akbiyik <erenakbiyik@gmail.com>
|
||||
Ma Mingfei <mingfei.ma@intel.com>
|
||||
Maciek <maciek.mab122@gmail.com>
|
||||
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
|
||||
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
|
||||
Mark Karpelès <MagicalTux@users.noreply.github.com>
|
||||
Mark Zhuang <zhuangqiubin@gmail.com>
|
||||
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
||||
Martin Delille <martin@delille.org>
|
||||
Martin Warnaar <martinwarnaar@gmail.com>
|
||||
Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
|
||||
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
|
||||
Mathieu Baudier <mbaudier@argeo.org>
|
||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||
Matija Pevec <mightymatth@users.noreply.github.com>
|
||||
Matt Stephenson <mstephenson6@users.noreply.github.com>
|
||||
Max Krasnyansky <max.krasnyansky@gmail.com>
|
||||
Max Krasnyansky <quic_maxk@quicinc.com>
|
||||
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
|
||||
Meng, Hengyu <hengyu.meng@intel.com>
|
||||
Mengqing Cao <cmq0113@163.com>
|
||||
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||
Michael Rienstra <mrienstra@gmail.com>
|
||||
Mikhail Grigorev <sleuthhound@gmail.com>
|
||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||
Mohit Agarwal <mohit@sdf.org>
|
||||
Molly Sophia <mollysophia379@gmail.com>
|
||||
Murilo Santana <mvrilo@gmail.com>
|
||||
NETZkultur GmbH <mulholland@netzkultur.de>
|
||||
Natsu <chino@hotococoa.moe>
|
||||
Neil Chudleigh <nchudleigh@users.noreply.github.com>
|
||||
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||
Neuman Vong <neuman.vong@gmail.com>
|
||||
Nicholai Tukanov <nicholaitukanov@gmail.com>
|
||||
Nicholas Albion <nalbion@yahoo.com>
|
||||
Nico Bosshard <nico@bosshome.ch>
|
||||
Nicolò Scipione <nicolo.scipione@codeplay.com>
|
||||
Niels Mayer <Niels.Mayer@gmail.com>
|
||||
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
|
||||
Nikolaj Olsson <nikse.dk@gmail.com>
|
||||
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
|
||||
Oleg Sidorov <me@whitebox.io>
|
||||
Oleg Sidorov <oleg@sidorov.nl>
|
||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||
Ondrej Kokes <ondrej.kokes@gmail.com>
|
||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||
PAB <pierreantoine.bannier@gmail.com>
|
||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||
Pedro Probst <pprobst@insiberia.net>
|
||||
Peng <hzp1024@qq.com>
|
||||
Peter <peter277@users.noreply.github.com>
|
||||
Philipp Zabel <philipp.zabel@gmail.com>
|
||||
Philippe Normand <phil@base-art.net>
|
||||
Philippe Normand <philn@igalia.com>
|
||||
Plamen Minev <pacominev@gmail.com>
|
||||
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
|
||||
R0CKSTAR <xiaodong.ye@mthreads.com>
|
||||
R0CKSTAR <yeahdongcn@gmail.com>
|
||||
Radoslav Gerganov <rgerganov@gmail.com>
|
||||
Radosław Gryta <radek.gryta@gmail.com>
|
||||
Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
|
||||
Raiya Araki <83504221+rai62@users.noreply.github.com>
|
||||
Reinforce-II <fate@eastal.com>
|
||||
Reinis Muiznieks <muiznieks.reinis@gmail.com>
|
||||
RelatedTitle <r3latedtitle@gmail.com>
|
||||
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||
Rich Jones <miserlou@gmail.com>
|
||||
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
|
||||
Robin <robin.xw@hotmail.com>
|
||||
Roddur Dasgupta <roddurd@gmail.com>
|
||||
Roland Rabien <figbug@gmail.com>
|
||||
Romain Biessy <romain.biessy@codeplay.com>
|
||||
Ronsor <ronsor@ronsor.pw>
|
||||
Rotem Dan <rotemdan@gmail.com>
|
||||
Ryan Hitchman <hitchmanr@gmail.com>
|
||||
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
|
||||
RyanChang <ftes90015@gmail.com>
|
||||
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
||||
SXX <sxx1136965276@gmail.com>
|
||||
Sacha Arbonel <sacha.arbonel@hotmail.fr>
|
||||
Salman Faroz <stsfaroz@gmail.com>
|
||||
Salvatore Mesoraca <s.mesoraca16@gmail.com>
|
||||
Sam <49637763+Onlyartist9@users.noreply.github.com>
|
||||
Sam Pullara <spullara@gmail.com>
|
||||
Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
|
||||
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
|
||||
Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
|
||||
Sergio López <slp@redhat.com>
|
||||
Sergio López <slp@sinrega.org>
|
||||
Shanshan Shen <467638484@qq.com>
|
||||
Shijie <821898965@qq.com>
|
||||
Shupei Fan <dymarkfan@outlook.com>
|
||||
Siddharth Ramakrishnan <srr2141@columbia.edu>
|
||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||
Simon Moisselin <simon.moisstoll@gmail.com>
|
||||
Sindre Sorhus <sindresorhus@gmail.com>
|
||||
Slava Primenko <primenko.s@gmail.com>
|
||||
Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
|
||||
Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
|
||||
Stefan Sydow <s.sydow@heinlein-video.de>
|
||||
Stefan Sydow <stefan@sydow.email>
|
||||
Syahmi Azhar <prsyahmi@gmail.com>
|
||||
Syed Jafri <syedjafri97@gmail.com>
|
||||
Sơn Phan Trung <phantrungson17@gmail.com>
|
||||
Taisei Mima <bhbstar.me@gmail.com>
|
||||
Takeshi Inoue <inoue.takeshi@gmail.com>
|
||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||
Taras Glek <taras@thegp.com>
|
||||
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
|
||||
Thamster <Thamster@users.noreply.github.com>
|
||||
Thijs Raymakers <thijs@raymakers.nl>
|
||||
Thomas Fitzsimmons <fitzsim@fitzsim.org>
|
||||
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
|
||||
Tienshiao Ma <tienshiao@tienshiao.org>
|
||||
Tim Miller <drasticactions@users.noreply.github.com>
|
||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||
Tobrun <tobrun.van.nuland@gmail.com>
|
||||
Todd <taf2@users.noreply.github.com>
|
||||
Toliver <teejae@gmail.com>
|
||||
Tong Li <31761981+litongjava@users.noreply.github.com>
|
||||
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
|
||||
Topping1 <78745143+Topping1@users.noreply.github.com>
|
||||
Travis Cline <travis.cline@gmail.com>
|
||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||
UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
|
||||
Vadim Peretokin <vperetokin@hey.com>
|
||||
Valentin Gosu <1454649+valenting@users.noreply.github.com>
|
||||
Vin Misra <vinith@alum.mit.edu>
|
||||
Vulcan <93451215+trholding@users.noreply.github.com>
|
||||
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
|
||||
William Tambellini <william.tambellini@gmail.com>
|
||||
William Tambellini <wtambellini@sdl.com>
|
||||
Wilson Silva <wilson.dsigns@gmail.com>
|
||||
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
||||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
|
||||
Xuan Son Nguyen <thichthat@gmail.com>
|
||||
Yajing Tang <phillis@google.com>
|
||||
Yang Shen <aplshenyang@gmail.com>
|
||||
Yunès <jean.baptiste.yunes@free.fr>
|
||||
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
|
||||
Yusuf Redžić <48274562+redzic@users.noreply.github.com>
|
||||
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
|
||||
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
|
||||
Zhiyuan Li <lizhiyuan@uniartisan.com>
|
||||
Zhiyuan Li <uniartisan2017@gmail.com>
|
||||
Zigfrid Zvezdin <ziggerZZ@gmail.com>
|
||||
Zollner <24618122+Zolliner@users.noreply.github.com>
|
||||
a3sh <38979186+A3shTnT@users.noreply.github.com>
|
||||
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
|
||||
agray3 <agray3@users.noreply.github.com>
|
||||
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
|
||||
aldorof <aldorof@users.noreply.github.com>
|
||||
alonfaraj <alonfaraj@gmail.com>
|
||||
amd-dwang <dong.wang@amd.com>
|
||||
amritahs-ibm <amritahs@linux.vnet.ibm.com>
|
||||
andypayne <apayne@gmail.com>
|
||||
ardfork <134447697+ardfork@users.noreply.github.com>
|
||||
arizhih <40765267+arizhih@users.noreply.github.com>
|
||||
automaticcat <daogiatuank54@gmail.com>
|
||||
bandoti <141645996+bandoti@users.noreply.github.com>
|
||||
be-next <jerome.ramette@gmail.com>
|
||||
bert hubert <bert@hubertnet.nl>
|
||||
billyct <billy_allen@126.com>
|
||||
bmwl <brian.marshall@tolko.com>
|
||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||
bocytko <bocytko+github@gmail.com>
|
||||
boolemancer <48014766+boolemancer@users.noreply.github.com>
|
||||
boolemancer <boolemancer@gmail.com>
|
||||
bradmit <151883577+bradmit@users.noreply.github.com>
|
||||
brunofaustino <b.fa.amorim@gmail.com>
|
||||
bssrdf <merlintiger@hotmail.com>
|
||||
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||
cdosoftei <ciprian.dosoftei@gmail.com>
|
||||
clach04 <Chris.Clark@actian.com>
|
||||
compilade <113953597+compilade@users.noreply.github.com>
|
||||
compilade <git@compilade.net>
|
||||
conradg <conradjgodfrey@gmail.com>
|
||||
crummyh <elijah@crums.us>
|
||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||
denersc <denerstassun@gmail.com>
|
||||
dscripka <dscripka@users.noreply.github.com>
|
||||
duthils <duthils@duthils.net>
|
||||
ecneladis <ecneladis@users.noreply.github.com>
|
||||
faker <nspyia2002@gmail.com>
|
||||
fitzsim <fitzsim@fitzsim.org>
|
||||
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||
genevera (she/her) <genevera@users.noreply.github.com>
|
||||
geniusnut <geniusnut@gmail.com>
|
||||
gilbertgong <gilbert.gong@gmail.com>
|
||||
gn64 <yukikaze.jp@gmail.com>
|
||||
goldwaving <77494627+goldwaving@users.noreply.github.com>
|
||||
greeshmay <greeshmay@gmail.com>
|
||||
haopeng <657407891@qq.com>
|
||||
hipudding <huafengchun@gmail.com>
|
||||
hsinhoyeh <yhh92u@gmail.com>
|
||||
hydai <z54981220@gmail.com>
|
||||
iamthad <thadeus.j.fleming@gmail.com>
|
||||
issixx <46835150+issixx@users.noreply.github.com>
|
||||
james wolf <contractorwolf@hotmail.com>
|
||||
jdomke <28772296+jdomke@users.noreply.github.com>
|
||||
jettoblack <jettoblack@gmail.com>
|
||||
jiez <373447296@qq.com>
|
||||
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||
jorismertz <35079666+jorismertz@users.noreply.github.com>
|
||||
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
||||
junkfood <69683722+JunkFood02@users.noreply.github.com>
|
||||
jwijffels <jwijffels@bnosac.be>
|
||||
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||
kamranjon <kamranjon@gmail.com>
|
||||
katsu560 <katsu560oo-@docomo.ne.jp>
|
||||
kennethge <57784063+kenneth-ge@users.noreply.github.com>
|
||||
keyehzy <msamuel@aluno.puc-rio.br>
|
||||
kunnis <kunnis@users.noreply.github.com>
|
||||
l3utterfly <gc.pthzfoldr@gmail.com>
|
||||
leejet <leejet714@gmail.com>
|
||||
leo-pony <nengjunma@outlook.com>
|
||||
lhez <quic_lih@quicinc.com>
|
||||
litong <31761981+litongjava@users.noreply.github.com>
|
||||
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
||||
lnyan <lkwq007@gmail.com>
|
||||
luoyu-intel <yu.luo@intel.com>
|
||||
m.bell <m.bell@techsmith.com>
|
||||
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
||||
mashizora <30516315+mashizora@users.noreply.github.com>
|
||||
matt23654 <matthew.webber@protonmail.com>
|
||||
matteo <matteogeniaccio@yahoo.it>
|
||||
mgrachten <maarten@grachten.eu>
|
||||
mkiol <mkiol@users.noreply.github.com>
|
||||
mky_coder <47767389+mkycoder@users.noreply.github.com>
|
||||
novag <7754358+novag@users.noreply.github.com>
|
||||
pajowu <pajowu@pajowu.de>
|
||||
pengxin99 <pengxin.yuan@intel.com>
|
||||
petterreinholdtsen <pere-github@hungry.com>
|
||||
polarmoon <90010972+polarmoon@users.noreply.github.com>
|
||||
rlapray <lapray.romain@gmail.com>
|
||||
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
|
||||
semiformal-net <84111142+semiformal-net@users.noreply.github.com>
|
||||
shibukazu <61775791+shibukazu@users.noreply.github.com>
|
||||
shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
|
||||
slaren <slarengh@gmail.com>
|
||||
slashlib <slashlib@users.noreply.github.com>
|
||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||
someone13574 <81528246+someone13574@users.noreply.github.com>
|
||||
st-gr <38470677+st-gr@users.noreply.github.com>
|
||||
stduhpf <stephduh@live.fr>
|
||||
stormofice <58337328+stormofice@users.noreply.github.com>
|
||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||
thefinaldegree <thefinaldegree@gmail.com>
|
||||
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
||||
toboil-features <160222185+toboil-features@users.noreply.github.com>
|
||||
trixirt <trix@redhat.com>
|
||||
ulatekh <ulatekh@yahoo.com>
|
||||
undef <undefdev@gmail.com>
|
||||
uvos <devnull@uvos.xyz>
|
||||
uvos <philipp@uvos.xyz>
|
||||
valVk <valVk@users.noreply.github.com>
|
||||
venkr <venkateshrameshkumar+1@gmail.com>
|
||||
vicalloy <zbirder@gmail.com>
|
||||
wangshuai09 <391746016@qq.com>
|
||||
woachk <24752637+woachk@users.noreply.github.com>
|
||||
xctan <axunlei@gmail.com>
|
||||
xdrudis <xavierdrudis@yahoo.es>
|
||||
yuri@FreeBSD <yuri@FreeBSD>
|
||||
zhangjixiong <code.zjx@gmail.com>
|
||||
zhentaoyu <zhentao.yu@intel.com>
|
||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||
zhouwg <zhouwg2000@gmail.com>
|
||||
谢乃闻 <sienaiwun@users.noreply.github.com>
|
||||
布客飞龙 <562826179@qq.com>
|
||||
Артём Земляк <azemlyak@smart-consulting.ru>
|
684
CMakeLists.txt
684
CMakeLists.txt
@ -1,31 +1,21 @@
|
||||
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
||||
project("whisper.cpp" C CXX)
|
||||
project("whisper.cpp" VERSION 1.7.5)
|
||||
include(CheckIncludeFileCXX)
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
|
||||
set(SOVERSION 1)
|
||||
|
||||
#set(CMAKE_WARN_DEPRECATED YES)
|
||||
set(CMAKE_WARN_UNUSED_CLI YES)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
project(whisper.cpp VERSION 1.5.1)
|
||||
|
||||
# Add path to modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||
set(WHISPER_STANDALONE ON)
|
||||
|
||||
include(git-vars)
|
||||
include(GitVars)
|
||||
include(BuildTypes)
|
||||
|
||||
# configure project version
|
||||
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
|
||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
|
||||
endif()
|
||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
||||
else()
|
||||
set(WHISPER_STANDALONE OFF)
|
||||
@ -35,16 +25,6 @@ if (EMSCRIPTEN)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
|
||||
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
||||
|
||||
# TODO: without these, we get the following error:
|
||||
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s TOTAL_STACK=5242880")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s TOTAL_STACK=5242880")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated")
|
||||
else()
|
||||
if (MINGW)
|
||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||
@ -53,199 +33,533 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||
# options
|
||||
|
||||
#
|
||||
# option list
|
||||
#
|
||||
if (APPLE)
|
||||
set(WHISPER_METAL_DEFAULT ON)
|
||||
else()
|
||||
set(WHISPER_METAL_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||
|
||||
# debug
|
||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||
|
||||
# build
|
||||
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
||||
option(WHISPER_USE_SYSTEM_GGML "whisper: use system-installed GGML library" OFF)
|
||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||
|
||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||
|
||||
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
||||
|
||||
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
||||
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
||||
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
||||
option(WHISPER_NO_F16C "whisper: disable F16c" OFF)
|
||||
|
||||
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
||||
|
||||
if (APPLE)
|
||||
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
||||
option(WHISPER_METAL "whisper: use Metal" ${WHISPER_METAL_DEFAULT})
|
||||
option(WHISPER_METAL_NDEBUG "whisper: disable Metal debugging" OFF)
|
||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||
else()
|
||||
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
|
||||
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
|
||||
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
|
||||
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
|
||||
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
|
||||
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
|
||||
endif()
|
||||
|
||||
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
||||
|
||||
# sanitizers
|
||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||
|
||||
# extra artifacts
|
||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||
option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
|
||||
|
||||
# 3rd party libs
|
||||
option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
|
||||
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
||||
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
||||
endif()
|
||||
|
||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
||||
|
||||
# Required for relocatable CMake package
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||
|
||||
# override ggml options
|
||||
set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
|
||||
set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
|
||||
set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
|
||||
set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
|
||||
set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
|
||||
|
||||
# transition helpers
|
||||
function (whisper_option_depr TYPE OLD NEW)
|
||||
if (${OLD})
|
||||
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
||||
set(${NEW} ON)
|
||||
if (NOT MSVC)
|
||||
if (WHISPER_SANITIZE_THREAD)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
|
||||
whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
|
||||
whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
|
||||
whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
|
||||
whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
||||
whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
|
||||
whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
|
||||
whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
|
||||
whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
|
||||
whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
||||
whisper_option_depr(WARNING WHISPER_CCACHE GGML_CCACHE)
|
||||
if (WHISPER_SANITIZE_ADDRESS)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
||||
endif()
|
||||
|
||||
if (GGML_CUDA AND NOT MSVC)
|
||||
#GGML_CUDA enabled, add the necessary compile options -Wno-deprecated-gpu-targets
|
||||
add_compile_options(-Wno-deprecated-gpu-targets)
|
||||
if (WHISPER_SANITIZE_UNDEFINED)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#
|
||||
# build the library
|
||||
#
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
||||
|
||||
if (NOT TARGET ggml)
|
||||
if (WHISPER_USE_SYSTEM_GGML)
|
||||
find_package(ggml REQUIRED)
|
||||
if (NOT ggml_FOUND)
|
||||
message(FATAL_ERROR "System-installed GGML library not found.")
|
||||
# dependencies
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
# on APPLE
|
||||
if (APPLE)
|
||||
# include Accelerate framework
|
||||
if (NOT WHISPER_NO_ACCELERATE)
|
||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
||||
|
||||
if (ACCELERATE_FRAMEWORK)
|
||||
message(STATUS "Accelerate framework found")
|
||||
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
|
||||
else()
|
||||
message(FATAL_ERROR "Accelerate framework not found")
|
||||
endif()
|
||||
add_library(ggml ALIAS ggml::ggml)
|
||||
endif()
|
||||
|
||||
if (WHISPER_METAL)
|
||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||
|
||||
if (METAL_FRAMEWORK)
|
||||
message(STATUS "Metal framework found")
|
||||
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS}
|
||||
${FOUNDATION_LIBRARY}
|
||||
${METAL_FRAMEWORK}
|
||||
${METALKIT_FRAMEWORK}
|
||||
)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_METAL)
|
||||
|
||||
if (WHISPER_METAL_NDEBUG)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_METAL_NDEBUG)
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "Metal framework not found")
|
||||
endif()
|
||||
|
||||
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
|
||||
|
||||
# copy ggml-metal.metal to bin directory
|
||||
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)
|
||||
endif()
|
||||
|
||||
if (WHISPER_COREML)
|
||||
find_library(FOUNDATION_FRAMEWORK Foundation)
|
||||
find_library(COREML_FRAMEWORK CoreML)
|
||||
|
||||
if (COREML_FRAMEWORK)
|
||||
message(STATUS "CoreML framework found")
|
||||
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
|
||||
else()
|
||||
message(FATAL_ERROR "CoreML framework not found")
|
||||
endif()
|
||||
|
||||
if (WHISPER_COREML_ALLOW_FALLBACK)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_COREML_ALLOW_FALLBACK)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WHISPER_OPENBLAS)
|
||||
set(WHISPER_BLAS_VENDOR "OpenBLAS")
|
||||
set(WHISPER_BLAS ON)
|
||||
endif()
|
||||
|
||||
if (WHISPER_BLAS)
|
||||
if (WIN32)
|
||||
if(DEFINED ENV{OPENBLAS_PATH})
|
||||
set(BLAS_LIBRARIES $ENV{OPENBLAS_PATH}/lib/libopenblas.dll.a)
|
||||
message(STATUS "Libraries ${BLAS_LIBRARIES}")
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
|
||||
include_directories($ENV{OPENBLAS_PATH}/include)
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
|
||||
else ()
|
||||
message(FATAL_ERROR "BLAS library was not found. Environment variable OPENBLAS_PATH not defined.")
|
||||
endif ()
|
||||
else ()
|
||||
set(BLA_STATIC 1)
|
||||
set(BLA_VENDOR ${WHISPER_BLAS_VENDOR})
|
||||
set(BLA_SIZEOF_INTEGER 8)
|
||||
set(BLA_PREFER_PKGCONFIG 1)
|
||||
find_package(BLAS)
|
||||
|
||||
if(BLAS_FOUND)
|
||||
message(STATUS "BLAS compatible library found")
|
||||
message(STATUS "Libraries ${BLAS_LIBRARIES}")
|
||||
find_path(BLAS_INCLUDE_DIRS cblas.h /usr/include/openblas /usr/local/include/openblas $ENV{BLAS_HOME}/include)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
|
||||
include_directories(${BLAS_INCLUDE_DIRS})
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
|
||||
else()
|
||||
message(FATAL_ERROR "BLAS library was not found")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (WHISPER_CUBLAS)
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
|
||||
find_package(CUDAToolkit)
|
||||
|
||||
if (CUDAToolkit_FOUND)
|
||||
message(STATUS "cuBLAS found")
|
||||
|
||||
enable_language(CUDA)
|
||||
|
||||
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
|
||||
|
||||
add_compile_definitions(GGML_USE_CUBLAS)
|
||||
|
||||
if (WHISPER_STATIC)
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
|
||||
else()
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
|
||||
endif()
|
||||
|
||||
else()
|
||||
add_subdirectory(ggml)
|
||||
if(WIN32)
|
||||
# The following adds a _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR macro and is a workaround for
|
||||
# the Windows C++ standard library which does not support constexpr mutexes.
|
||||
# From the release notes://github.com/microsoft/STL/wiki/Changelog
|
||||
# Disable constexpr mutex constructor on Windows
|
||||
# Fixed mutex's constructor to be constexpr. #3824 #4000 #4339
|
||||
# Note: Programs that aren't following the documented restrictions on binary compatibility may encounter
|
||||
# null dereferences in mutex machinery. You must follow this rule:
|
||||
# When you mix binaries built by different supported versions of the toolset, the Redistributable version
|
||||
# must be at least as new as the latest toolset used by any app component.
|
||||
# You can define _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR as an escape hatch.
|
||||
#
|
||||
# Specifically to whisper.cpp this would cause a crash when using the Java bindings.
|
||||
# resulting in a Invalid memory access error.
|
||||
target_compile_definitions(ggml-base PRIVATE _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
|
||||
message(FATAL_ERROR "cuBLAS not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if (WHISPER_HIPBLAS)
|
||||
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
|
||||
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
|
||||
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
|
||||
endif()
|
||||
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
|
||||
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
|
||||
endif()
|
||||
|
||||
find_package(hip)
|
||||
find_package(hipblas)
|
||||
find_package(rocblas)
|
||||
|
||||
if (${hipblas_FOUND} AND ${hip_FOUND})
|
||||
message(STATUS "HIP and hipBLAS found")
|
||||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
|
||||
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
|
||||
set_property(TARGET ggml-rocm PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
|
||||
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
|
||||
|
||||
if (WHISPER_STATIC)
|
||||
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
||||
endif()
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ggml-rocm)
|
||||
else()
|
||||
message(FATAL_ERROR "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WHISPER_CLBLAST)
|
||||
find_package(CLBlast)
|
||||
if (CLBlast_FOUND)
|
||||
message(STATUS "CLBlast found")
|
||||
|
||||
set(GGML_SOURCES_OPENCL ggml-opencl.cpp ggml-opencl.h)
|
||||
|
||||
add_compile_definitions(GGML_USE_CLBLAST)
|
||||
|
||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} clblast)
|
||||
else()
|
||||
message(FATAL_ERROR "CLBlast not found")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if( WHISPER_OPENVINO )
|
||||
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
|
||||
endif()
|
||||
|
||||
# compiler flags
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
|
||||
endif ()
|
||||
|
||||
if (WHISPER_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wpedantic \
|
||||
-Wshadow \
|
||||
-Wcast-qual \
|
||||
-Wstrict-prototypes \
|
||||
-Wpointer-arith \
|
||||
-Wno-unused-function \
|
||||
")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
|
||||
-Wall \
|
||||
-Wextra \
|
||||
-Wpedantic \
|
||||
-Wcast-qual \
|
||||
")
|
||||
else()
|
||||
# todo : msvc
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT MSVC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
|
||||
endif()
|
||||
|
||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
|
||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||
message(STATUS "ARM detected")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
||||
message(STATUS "PowerPC detected")
|
||||
else()
|
||||
message(STATUS "x86 detected")
|
||||
if (MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /utf-8")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8")
|
||||
if(NOT WHISPER_NO_AVX2)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
|
||||
else()
|
||||
if(NOT WHISPER_NO_AVX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
if (EMSCRIPTEN)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
else()
|
||||
if(NOT WHISPER_NO_AVX)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
|
||||
endif()
|
||||
if(NOT WHISPER_NO_AVX2)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
|
||||
endif()
|
||||
if(NOT WHISPER_NO_FMA)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
|
||||
endif()
|
||||
if(NOT WHISPER_NO_F16C)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
||||
endif()
|
||||
add_subdirectory(src)
|
||||
|
||||
#
|
||||
# install
|
||||
# POSIX conformance
|
||||
#
|
||||
|
||||
# clock_gettime came in POSIX.1b (1993)
|
||||
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
|
||||
# posix_memalign came in POSIX.1-2001 / SUSv3
|
||||
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
|
||||
add_compile_definitions(_XOPEN_SOURCE=600)
|
||||
|
||||
# Somehow in OpenBSD whenever POSIX conformance is specified
|
||||
# some string functions rely on locale_t availability,
|
||||
# which was introduced in POSIX.1-2008, forcing us to go higher
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
|
||||
remove_definitions(-D_XOPEN_SOURCE=600)
|
||||
add_compile_definitions(_XOPEN_SOURCE=700)
|
||||
endif()
|
||||
|
||||
# Data types, macros and functions related to controlling CPU affinity
|
||||
# are available on Linux through GNU extensions in libc
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
add_compile_definitions(_GNU_SOURCE)
|
||||
endif()
|
||||
|
||||
# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
|
||||
# and on macOS its availability depends on enabling Darwin extensions
|
||||
# similarly on DragonFly, enabling BSD extensions is necessary
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
add_compile_definitions(_DARWIN_C_SOURCE)
|
||||
endif()
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "DragonFly")
|
||||
add_compile_definitions(_DARWIN_C_SOURCE)
|
||||
endif()
|
||||
|
||||
# alloca is a non-standard interface that is not visible on BSDs when
|
||||
# POSIX conformance is specified, but not all of them provide a clean way
|
||||
# to enable it in such cases
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
add_compile_definitions(__BSD_VISIBLE)
|
||||
endif()
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
|
||||
add_compile_definitions(_NETBSD_SOURCE)
|
||||
endif()
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
|
||||
add_compile_definitions(_BSD_SOURCE)
|
||||
endif()
|
||||
|
||||
if (WHISPER_PERF)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
|
||||
endif()
|
||||
|
||||
#
|
||||
# whisper.coreml - Core ML support
|
||||
#
|
||||
|
||||
if (WHISPER_COREML)
|
||||
set(TARGET whisper.coreml)
|
||||
|
||||
add_library(${TARGET}
|
||||
coreml/whisper-encoder.h
|
||||
coreml/whisper-encoder.mm
|
||||
coreml/whisper-encoder-impl.h
|
||||
coreml/whisper-encoder-impl.m
|
||||
)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
.
|
||||
)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
|
||||
|
||||
set_target_properties(${TARGET} PROPERTIES
|
||||
COMPILE_FLAGS "-fobjc-arc"
|
||||
)
|
||||
endif()
|
||||
|
||||
if (WHISPER_OPENVINO)
|
||||
set(TARGET whisper.openvino)
|
||||
|
||||
add_library(${TARGET} OBJECT
|
||||
openvino/whisper-openvino-encoder.h
|
||||
openvino/whisper-openvino-encoder.cpp
|
||||
)
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
.
|
||||
)
|
||||
|
||||
set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE openvino::runtime)
|
||||
endif()
|
||||
|
||||
#
|
||||
# whisper - this is the main library of the project
|
||||
#
|
||||
|
||||
set(TARGET whisper)
|
||||
|
||||
add_library(${TARGET}
|
||||
ggml.h
|
||||
ggml.c
|
||||
ggml-alloc.h
|
||||
ggml-alloc.c
|
||||
ggml-backend.h
|
||||
ggml-backend.c
|
||||
ggml-quants.h
|
||||
ggml-quants.c
|
||||
${GGML_SOURCES_METAL}
|
||||
${GGML_SOURCES_CUDA}
|
||||
${GGML_SOURCES_OPENCL}
|
||||
whisper.h
|
||||
whisper.cpp
|
||||
)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
.
|
||||
)
|
||||
|
||||
if (WHISPER_COREML)
|
||||
target_link_libraries(${TARGET} PRIVATE whisper.coreml)
|
||||
endif()
|
||||
|
||||
if (WHISPER_OPENVINO)
|
||||
target_link_libraries(${TARGET} PRIVATE whisper.openvino)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
||||
else()
|
||||
target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${TARGET} PUBLIC
|
||||
${CMAKE_DL_LIBS}
|
||||
)
|
||||
|
||||
target_compile_definitions(${TARGET} PUBLIC
|
||||
WHISPER_SHARED
|
||||
GGML_SHARED
|
||||
)
|
||||
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
WHISPER_BUILD
|
||||
GGML_BUILD
|
||||
)
|
||||
|
||||
if (WHISPER_METAL)
|
||||
# TODO: I think this should make ggml-metal.m "see" the ggml-metal.metal file from the "bin" directory
|
||||
# but for some reason it does not work here like it does in llama.cpp
|
||||
set_target_properties(${TARGET} PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (GGML_SOURCES_CUDA)
|
||||
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
|
||||
set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF)
|
||||
set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
||||
endif()
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
|
||||
endif()
|
||||
|
||||
target_compile_definitions(${TARGET} PUBLIC
|
||||
${WHISPER_EXTRA_FLAGS}
|
||||
)
|
||||
|
||||
set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h")
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
|
||||
set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
|
||||
set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
|
||||
install(TARGETS ${TARGET}
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib/static
|
||||
RUNTIME DESTINATION bin
|
||||
RESOURCE DESTINATION bin
|
||||
PUBLIC_HEADER DESTINATION include
|
||||
)
|
||||
|
||||
set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
||||
set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||
set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||
#
|
||||
# bindings
|
||||
#
|
||||
|
||||
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||
|
||||
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
|
||||
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
|
||||
|
||||
configure_package_config_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
|
||||
PATH_VARS
|
||||
WHISPER_INCLUDE_INSTALL_DIR
|
||||
WHISPER_LIB_INSTALL_DIR
|
||||
WHISPER_BIN_INSTALL_DIR )
|
||||
|
||||
write_basic_package_version_file(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||
VERSION ${WHISPER_INSTALL_VERSION}
|
||||
COMPATIBILITY SameMajorVersion)
|
||||
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
||||
|
||||
configure_file(cmake/whisper.pc.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||
@ONLY)
|
||||
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||
DESTINATION lib/pkgconfig)
|
||||
add_subdirectory(bindings)
|
||||
|
||||
#
|
||||
# programs, examples and tests
|
||||
#
|
||||
|
||||
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
include(CTest)
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
endif ()
|
||||
|
||||
if (WHISPER_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
set(MSVC_WARNING_FLAGS
|
||||
/wd4101 # Unreferenced local variable
|
||||
/wd4005 # Macro redefinition
|
||||
/wd4065 # switch statement contains 'default' but no 'case' labels
|
||||
/wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data
|
||||
/wd4244 # Conversion from one type to another type, possible loss of ata
|
||||
/wd4805 # Unsafe mix of type
|
||||
/wd4305 # Truncation from 'type1' to 'type2' (often double to float)
|
||||
/wd4996 # Function or variable may be unsafe/deprecated
|
||||
)
|
||||
function(disable_msvc_warnings target_name)
|
||||
if(TARGET ${target_name})
|
||||
target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS})
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if (WHISPER_BUILD_EXAMPLES)
|
||||
disable_msvc_warnings(whisper)
|
||||
disable_msvc_warnings(common)
|
||||
disable_msvc_warnings(common-sdl)
|
||||
disable_msvc_warnings(lsp)
|
||||
disable_msvc_warnings(wchess-core)
|
||||
disable_msvc_warnings(whisper-command)
|
||||
disable_msvc_warnings(whisper-cli)
|
||||
disable_msvc_warnings(whisper-server)
|
||||
disable_msvc_warnings(whisper-stream)
|
||||
disable_msvc_warnings(whisper-talk-llama)
|
||||
disable_msvc_warnings(whisper-bench)
|
||||
disable_msvc_warnings(quantize)
|
||||
disable_msvc_warnings(vad-speech-segments)
|
||||
endif()
|
||||
endif()
|
||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023-2024 The ggml authors
|
||||
Copyright (c) 2023 Georgi Gerganov
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
414
Makefile
414
Makefile
@ -1,12 +1,386 @@
|
||||
default: main bench quantize server
|
||||
|
||||
ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifndef UNAME_P
|
||||
UNAME_P := $(shell uname -p)
|
||||
endif
|
||||
|
||||
ifndef UNAME_M
|
||||
UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
ifndef NVCC_VERSION
|
||||
ifeq ($(call,$(shell which nvcc))$(.SHELLSTATUS),0)
|
||||
NVCC_VERSION := $(shell nvcc --version | egrep -o "V[0-9]+.[0-9]+.[0-9]+" | cut -c2-)
|
||||
endif
|
||||
endif
|
||||
|
||||
CCV := $(shell $(CC) --version | head -n 1)
|
||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||
|
||||
# Mac OS + Arm can report x86_64
|
||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
ifneq ($(UNAME_P),arm)
|
||||
SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
|
||||
ifeq ($(SYSCTL_M),1)
|
||||
# UNAME_P := arm
|
||||
# UNAME_M := arm64
|
||||
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#
|
||||
# Compile flags
|
||||
#
|
||||
|
||||
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
||||
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
||||
LDFLAGS =
|
||||
|
||||
# clock_gettime came in POSIX.1b (1993)
|
||||
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
|
||||
# posix_memalign came in POSIX.1-2001 / SUSv3
|
||||
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
|
||||
CFLAGS += -D_XOPEN_SOURCE=600
|
||||
CXXFLAGS += -D_XOPEN_SOURCE=600
|
||||
|
||||
# Somehow in OpenBSD whenever POSIX conformance is specified
|
||||
# some string functions rely on locale_t availability,
|
||||
# which was introduced in POSIX.1-2008, forcing us to go higher
|
||||
ifeq ($(UNAME_S),OpenBSD)
|
||||
CFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
|
||||
CXXFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
|
||||
endif
|
||||
|
||||
# Data types, macros and functions related to controlling CPU affinity
|
||||
# are available on Linux through GNU extensions in libc
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
CFLAGS += -D_GNU_SOURCE
|
||||
CXXFLAGS += -D_GNU_SOURCE
|
||||
endif
|
||||
|
||||
# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
|
||||
# and on macOS its availability depends on enabling Darwin extensions
|
||||
# similarly on DragonFly, enabling BSD extensions is necessary
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CFLAGS += -D_DARWIN_C_SOURCE
|
||||
CXXFLAGS += -D_DARWIN_C_SOURCE
|
||||
endif
|
||||
ifeq ($(UNAME_S),DragonFly)
|
||||
CFLAGS += -D__BSD_VISIBLE
|
||||
CXXFLAGS += -D__BSD_VISIBLE
|
||||
endif
|
||||
|
||||
# alloca is a non-standard interface that is not visible on BSDs when
|
||||
# POSIX conformance is specified, but not all of them provide a clean way
|
||||
# to enable it in such cases
|
||||
ifeq ($(UNAME_S),FreeBSD)
|
||||
CFLAGS += -D__BSD_VISIBLE
|
||||
CXXFLAGS += -D__BSD_VISIBLE
|
||||
endif
|
||||
ifeq ($(UNAME_S),NetBSD)
|
||||
CFLAGS += -D_NETBSD_SOURCE
|
||||
CXXFLAGS += -D_NETBSD_SOURCE
|
||||
endif
|
||||
ifeq ($(UNAME_S),OpenBSD)
|
||||
CFLAGS += -D_BSD_SOURCE
|
||||
CXXFLAGS += -D_BSD_SOURCE
|
||||
endif
|
||||
|
||||
# OS specific
|
||||
# TODO: support Windows
|
||||
ifeq ($(filter $(UNAME_S),Linux Darwin DragonFly FreeBSD NetBSD OpenBSD Haiku),$(UNAME_S))
|
||||
CFLAGS += -pthread
|
||||
CXXFLAGS += -pthread
|
||||
endif
|
||||
|
||||
# Architecture specific
|
||||
# TODO: probably these flags need to be tweaked on some architectures
|
||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CPUINFO_CMD := sysctl machdep.cpu.features machdep.cpu.leaf7_features
|
||||
else ifeq ($(UNAME_S),Linux)
|
||||
CPUINFO_CMD := cat /proc/cpuinfo
|
||||
else ifneq (,$(filter MINGW32_NT% MINGW64_NT%,$(UNAME_S)))
|
||||
CPUINFO_CMD := cat /proc/cpuinfo
|
||||
else ifneq (,$(filter DragonFly FreeBSD,$(UNAME_S)))
|
||||
CPUINFO_CMD := grep Features /var/run/dmesg.boot
|
||||
else ifeq ($(UNAME_S),Haiku)
|
||||
CPUINFO_CMD := sysinfo -cpu
|
||||
endif
|
||||
|
||||
ifdef CPUINFO_CMD
|
||||
AVX_M := $(shell $(CPUINFO_CMD) | grep -iwE 'AVX|AVX1.0')
|
||||
ifneq (,$(AVX_M))
|
||||
CFLAGS += -mavx
|
||||
CXXFLAGS += -mavx
|
||||
endif
|
||||
|
||||
AVX2_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX2')
|
||||
ifneq (,$(AVX2_M))
|
||||
CFLAGS += -mavx2
|
||||
CXXFLAGS += -mavx2
|
||||
endif
|
||||
|
||||
FMA_M := $(shell $(CPUINFO_CMD) | grep -iw 'FMA')
|
||||
ifneq (,$(FMA_M))
|
||||
CFLAGS += -mfma
|
||||
CXXFLAGS += -mfma
|
||||
endif
|
||||
|
||||
F16C_M := $(shell $(CPUINFO_CMD) | grep -iw 'F16C')
|
||||
ifneq (,$(F16C_M))
|
||||
CFLAGS += -mf16c
|
||||
CXXFLAGS += -mf16c
|
||||
endif
|
||||
|
||||
SSE3_M := $(shell $(CPUINFO_CMD) | grep -iwE 'PNI|SSE3')
|
||||
ifneq (,$(SSE3_M))
|
||||
CFLAGS += -msse3
|
||||
CXXFLAGS += -msse3
|
||||
endif
|
||||
|
||||
SSSE3_M := $(shell $(CPUINFO_CMD) | grep -iw 'SSSE3')
|
||||
ifneq (,$(SSSE3_M))
|
||||
CFLAGS += -mssse3
|
||||
CXXFLAGS += -mssse3
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
||||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
||||
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
||||
CFLAGS += -mpower9-vector
|
||||
endif
|
||||
# Require c++23's std::byteswap for big-endian support.
|
||||
ifeq ($(UNAME_M),ppc64)
|
||||
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef WHISPER_NO_ACCELERATE
|
||||
# Mac M1 - include Accelerate framework
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CFLAGS += -DGGML_USE_ACCELERATE
|
||||
LDFLAGS += -framework Accelerate
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef WHISPER_COREML
|
||||
CXXFLAGS += -DWHISPER_USE_COREML
|
||||
LDFLAGS += -framework Foundation -framework CoreML
|
||||
|
||||
ifdef WHISPER_COREML_ALLOW_FALLBACK
|
||||
CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef WHISPER_NO_METAL
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
WHISPER_METAL := 1
|
||||
|
||||
CFLAGS += -DGGML_USE_METAL
|
||||
CXXFLAGS += -DGGML_USE_METAL
|
||||
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef WHISPER_OPENBLAS
|
||||
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas
|
||||
LDFLAGS += -lopenblas
|
||||
endif
|
||||
|
||||
ifdef WHISPER_CUBLAS
|
||||
ifeq ($(shell expr $(NVCC_VERSION) \>= 11.6), 1)
|
||||
CUDA_ARCH_FLAG=native
|
||||
else
|
||||
CUDA_ARCH_FLAG=all
|
||||
endif
|
||||
|
||||
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
||||
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
|
||||
WHISPER_OBJ += ggml-cuda.o
|
||||
NVCC = nvcc
|
||||
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)
|
||||
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
endif
|
||||
|
||||
ifdef WHISPER_HIPBLAS
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
HIPCC ?= $(ROCM_PATH)/bin/hipcc
|
||||
GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
|
||||
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
|
||||
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
|
||||
LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
|
||||
LDFLAGS += -lhipblas -lamdhip64 -lrocblas
|
||||
HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
|
||||
WHISPER_OBJ += ggml-cuda.o
|
||||
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||
endif
|
||||
|
||||
ifdef WHISPER_CLBLAST
|
||||
CFLAGS += -DGGML_USE_CLBLAST
|
||||
CXXFLAGS += -DGGML_USE_CLBLAST
|
||||
LDFLAGS += -lclblast
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
LDFLAGS += -framework OpenCL
|
||||
else
|
||||
LDFLAGS += -lOpenCL
|
||||
endif
|
||||
WHISPER_OBJ += ggml-opencl.o
|
||||
|
||||
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
endif
|
||||
|
||||
ifdef WHISPER_GPROF
|
||||
CFLAGS += -pg
|
||||
CXXFLAGS += -pg
|
||||
endif
|
||||
|
||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||
CFLAGS += -mcpu=native
|
||||
CXXFLAGS += -mcpu=native
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv6%,$(UNAME_M)),)
|
||||
# 32-bit Raspberry Pi 1, 2, 3
|
||||
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv7%,$(UNAME_M)),)
|
||||
# 32-bit ARM, for example on Armbian or possibly raspbian
|
||||
#CFLAGS += -mfpu=neon -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
|
||||
#CXXFLAGS += -mfpu=neon -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
|
||||
|
||||
# 64-bit ARM on 32-bit OS, use these (TODO: auto-detect 64-bit)
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
|
||||
CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
|
||||
endif
|
||||
|
||||
ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||
# Raspberry Pi 4
|
||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
|
||||
CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
|
||||
endif
|
||||
|
||||
#
|
||||
# Print build information
|
||||
#
|
||||
|
||||
$(info I whisper.cpp build info: )
|
||||
$(info I UNAME_S: $(UNAME_S))
|
||||
$(info I UNAME_P: $(UNAME_P))
|
||||
$(info I UNAME_M: $(UNAME_M))
|
||||
$(info I CFLAGS: $(CFLAGS))
|
||||
$(info I CXXFLAGS: $(CXXFLAGS))
|
||||
$(info I LDFLAGS: $(LDFLAGS))
|
||||
$(info I CC: $(CCV))
|
||||
$(info I CXX: $(CXXV))
|
||||
$(info )
|
||||
|
||||
#
|
||||
# Build library
|
||||
#
|
||||
|
||||
ggml.o: ggml.c ggml.h ggml-cuda.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o
|
||||
|
||||
whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
ifndef WHISPER_COREML
|
||||
WHISPER_OBJ += whisper.o
|
||||
else
|
||||
whisper-encoder.o: coreml/whisper-encoder.mm coreml/whisper-encoder.h
|
||||
$(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder.mm -o whisper-encoder.o
|
||||
|
||||
whisper-encoder-impl.o: coreml/whisper-encoder-impl.m coreml/whisper-encoder-impl.h
|
||||
$(CXX) -O3 -I . -fobjc-arc -c coreml/whisper-encoder-impl.m -o whisper-encoder-impl.o
|
||||
|
||||
WHISPER_OBJ += whisper.o whisper-encoder.o whisper-encoder-impl.o
|
||||
endif
|
||||
|
||||
ifdef WHISPER_METAL
|
||||
ggml-metal.o: ggml-metal.m ggml-metal.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
WHISPER_OBJ += ggml-metal.o
|
||||
endif
|
||||
|
||||
libwhisper.a: $(WHISPER_OBJ)
|
||||
$(AR) rcs libwhisper.a $(WHISPER_OBJ)
|
||||
|
||||
libwhisper.so: $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f *.o main stream command talk talk-llama bench quantize server lsp libwhisper.a libwhisper.so
|
||||
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
|
||||
CC_SDL=`sdl2-config --cflags --libs`
|
||||
|
||||
SRC_COMMON = examples/common.cpp examples/common-ggml.cpp
|
||||
SRC_COMMON_SDL = examples/common-sdl.cpp
|
||||
|
||||
main: examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o main $(LDFLAGS)
|
||||
./main -h
|
||||
|
||||
bench: examples/bench/bench.cpp $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp $(WHISPER_OBJ) -o bench $(LDFLAGS)
|
||||
|
||||
quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
|
||||
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
|
||||
|
||||
server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
|
||||
|
||||
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
command: examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o talk-llama $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
#
|
||||
# Audio samples
|
||||
#
|
||||
|
||||
.PHONY: build
|
||||
build:
|
||||
cmake -B build $(CMAKE_ARGS)
|
||||
cmake --build build --config Release
|
||||
|
||||
# download a few audio samples into folder "./samples":
|
||||
.PHONY: samples
|
||||
samples:
|
||||
@ -18,6 +392,17 @@ samples:
|
||||
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
|
||||
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
|
||||
@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
|
||||
@echo "Converting to 16-bit WAV ..."
|
||||
@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
|
||||
@rm samples/*.ogg
|
||||
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
|
||||
@rm samples/mm1.wav
|
||||
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
|
||||
@rm samples/a13.mp3
|
||||
@ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
|
||||
@rm samples/diffusion2023-07-03.flac
|
||||
|
||||
#
|
||||
# Models
|
||||
@ -37,22 +422,27 @@ samples:
|
||||
.PHONY: large-v1
|
||||
.PHONY: large-v2
|
||||
.PHONY: large-v3
|
||||
.PHONY: large-v3-turbo
|
||||
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
|
||||
bash ./models/download-ggml-model.sh $@
|
||||
cmake -B build $(CMAKE_ARGS)
|
||||
cmake --build build --config Release
|
||||
@echo ""
|
||||
@echo "==============================================="
|
||||
@echo "Running $@ on all samples in ./samples ..."
|
||||
@echo "==============================================="
|
||||
@echo ""
|
||||
@for f in samples/*.{flac,mp3,ogg,wav}; do \
|
||||
@for f in samples/*.wav; do \
|
||||
echo "----------------------------------------------" ; \
|
||||
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
|
||||
echo "----------------------------------------------" ; \
|
||||
echo "----------------------------------------------" ; \
|
||||
echo "" ; \
|
||||
./build/bin/whisper-cli -m models/ggml-$@.bin -f $$f ; \
|
||||
./main -m models/ggml-$@.bin -f $$f ; \
|
||||
echo "" ; \
|
||||
done
|
||||
|
||||
#
|
||||
# Tests
|
||||
#
|
||||
|
||||
.PHONY: tests
|
||||
tests:
|
||||
bash ./tests/run-tests.sh $(word 2, $(MAKECMDGOALS))
|
||||
|
77
Package.swift
Normal file
77
Package.swift
Normal file
@ -0,0 +1,77 @@
|
||||
// swift-tools-version:5.5
|
||||
|
||||
import PackageDescription
|
||||
|
||||
#if arch(arm) || arch(arm64)
|
||||
let platforms: [SupportedPlatform]? = [
|
||||
.macOS(.v12),
|
||||
.iOS(.v14),
|
||||
.watchOS(.v4),
|
||||
.tvOS(.v14)
|
||||
]
|
||||
let exclude: [String] = []
|
||||
let resources: [Resource] = [
|
||||
.process("ggml-metal.metal")
|
||||
]
|
||||
let additionalSources: [String] = ["ggml-metal.m"]
|
||||
let additionalSettings: [CSetting] = [
|
||||
.unsafeFlags(["-fno-objc-arc"]),
|
||||
.define("GGML_USE_METAL")
|
||||
]
|
||||
#else
|
||||
let platforms: [SupportedPlatform]? = nil
|
||||
let exclude: [String] = ["ggml-metal.metal"]
|
||||
let resources: [Resource] = []
|
||||
let additionalSources: [String] = []
|
||||
let additionalSettings: [CSetting] = []
|
||||
#endif
|
||||
|
||||
let package = Package(
|
||||
name: "whisper",
|
||||
platforms: platforms,
|
||||
products: [
|
||||
.library(name: "whisper", targets: ["whisper"]),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
name: "whisper",
|
||||
path: ".",
|
||||
exclude: exclude + [
|
||||
"bindings",
|
||||
"cmake",
|
||||
"coreml",
|
||||
"examples",
|
||||
"extra",
|
||||
"models",
|
||||
"samples",
|
||||
"tests",
|
||||
"CMakeLists.txt",
|
||||
"ggml-cuda.cu",
|
||||
"ggml-cuda.h",
|
||||
"Makefile"
|
||||
],
|
||||
sources: [
|
||||
"ggml.c",
|
||||
"whisper.cpp",
|
||||
"ggml-alloc.c",
|
||||
"ggml-backend.c",
|
||||
"ggml-quants.c"
|
||||
] + additionalSources,
|
||||
resources: resources,
|
||||
publicHeadersPath: "spm-headers",
|
||||
cSettings: [
|
||||
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
||||
.define("GGML_USE_ACCELERATE")
|
||||
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
||||
// We should consider add this in the future when we drop support for iOS 14
|
||||
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
||||
// .define("ACCELERATE_NEW_LAPACK"),
|
||||
// .define("ACCELERATE_LAPACK_ILP64")
|
||||
] + additionalSettings,
|
||||
linkerSettings: [
|
||||
.linkedFramework("Accelerate")
|
||||
]
|
||||
)
|
||||
],
|
||||
cxxLanguageStandard: .cxx11
|
||||
)
|
249
README_sycl.md
249
README_sycl.md
@ -1,249 +0,0 @@
|
||||
# whisper.cpp for SYCL
|
||||
|
||||
[Background](#background)
|
||||
|
||||
[OS](#os)
|
||||
|
||||
[Intel GPU](#intel-gpu)
|
||||
|
||||
[Linux](#linux)
|
||||
|
||||
[Environment Variable](#environment-variable)
|
||||
|
||||
[Known Issue](#known-issue)
|
||||
|
||||
[Todo](#todo)
|
||||
|
||||
## Background
|
||||
|
||||
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
||||
|
||||
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
||||
|
||||
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
||||
|
||||
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
||||
|
||||
The whisper.cpp for SYCL is used to support Intel GPUs.
|
||||
|
||||
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
|
||||
|
||||
## OS
|
||||
|
||||
|OS|Status|Verified|
|
||||
|-|-|-|
|
||||
|Linux|Support|Ubuntu 22.04|
|
||||
|Windows|Ongoing| |
|
||||
|
||||
|
||||
## Intel GPU
|
||||
|
||||
|Intel GPU| Status | Verified Model|
|
||||
|-|-|-|
|
||||
|Intel Data Center Max Series| Support| Max 1550|
|
||||
|Intel Data Center Flex Series| Support| Flex 170|
|
||||
|Intel Arc Series| Support| Arc 770|
|
||||
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
||||
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
||||
|
||||
|
||||
## Linux
|
||||
|
||||
### Setup Environment
|
||||
|
||||
1. Install Intel GPU driver.
|
||||
|
||||
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
||||
|
||||
Note: for iGPU, please install the client GPU driver.
|
||||
|
||||
b. Add user to group: video, render.
|
||||
|
||||
```
|
||||
sudo usermod -aG render username
|
||||
sudo usermod -aG video username
|
||||
```
|
||||
|
||||
Note: re-login to enable it.
|
||||
|
||||
c. Check
|
||||
|
||||
```
|
||||
sudo apt install clinfo
|
||||
sudo clinfo -l
|
||||
```
|
||||
|
||||
Output (example):
|
||||
|
||||
```
|
||||
Platform #0: Intel(R) OpenCL Graphics
|
||||
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
||||
|
||||
|
||||
Platform #0: Intel(R) OpenCL HD Graphics
|
||||
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
||||
```
|
||||
|
||||
2. Install Intel® oneAPI Base toolkit.
|
||||
|
||||
|
||||
a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
||||
|
||||
Recommend to install to default folder: **/opt/intel/oneapi**.
|
||||
|
||||
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
||||
|
||||
b. Check
|
||||
|
||||
```
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
sycl-ls
|
||||
```
|
||||
|
||||
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
||||
|
||||
Output (example):
|
||||
```
|
||||
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
||||
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
||||
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
||||
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
||||
|
||||
```
|
||||
|
||||
2. Build locally:
|
||||
|
||||
```
|
||||
mkdir -p build
|
||||
cd build
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
#for FP16
|
||||
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
|
||||
|
||||
#for FP32
|
||||
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
|
||||
#build example/main only
|
||||
#cmake --build . --config Release --target main
|
||||
|
||||
#build all binary
|
||||
cmake --build . --config Release -v
|
||||
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
./examples/sycl/build.sh
|
||||
```
|
||||
|
||||
Note:
|
||||
|
||||
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
||||
|
||||
### Run
|
||||
|
||||
1. Put model file to folder **models**
|
||||
|
||||
2. Enable oneAPI running environment
|
||||
|
||||
```
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
```
|
||||
|
||||
3. List device ID
|
||||
|
||||
Run without parameter:
|
||||
|
||||
```
|
||||
./build/bin/ls-sycl-device
|
||||
|
||||
or
|
||||
|
||||
./build/bin/main
|
||||
```
|
||||
|
||||
Check the ID in startup log, like:
|
||||
|
||||
```
|
||||
found 4 SYCL devices:
|
||||
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||
|
||||
```
|
||||
|
||||
|Attribute|Note|
|
||||
|-|-|
|
||||
|compute capability 1.3|Level-zero running time, recommended |
|
||||
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
||||
|
||||
4. Set device ID and execute whisper.cpp
|
||||
|
||||
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
||||
|
||||
```
|
||||
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||
```
|
||||
or run by script:
|
||||
|
||||
```
|
||||
./examples/sycl/run_whisper.sh
|
||||
```
|
||||
|
||||
|
||||
|
||||
5. Check the device ID in output
|
||||
|
||||
Like:
|
||||
```
|
||||
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
```
|
||||
|
||||
|
||||
## Environment Variable
|
||||
|
||||
#### Build
|
||||
|
||||
|Name|Value|Function|
|
||||
|-|-|-|
|
||||
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
|
||||
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
|
||||
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
||||
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
|
||||
|
||||
#### Running
|
||||
|
||||
|
||||
|Name|Value|Function|
|
||||
|-|-|-|
|
||||
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
||||
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
||||
|
||||
## Known Issue
|
||||
|
||||
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
||||
|
||||
Miss to enable oneAPI running environment.
|
||||
|
||||
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
||||
|
||||
|
||||
- Hang during startup
|
||||
|
||||
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
||||
|
||||
Solution: add **--no-mmap**.
|
||||
|
||||
## Todo
|
||||
|
||||
- Support to build in Windows.
|
||||
|
||||
- Support multiple cards.
|
@ -11,16 +11,11 @@ UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
||||
BUILD_DIR := build_go
|
||||
BUILD_DIR := build
|
||||
MODELS_DIR := models
|
||||
EXAMPLES_DIR := $(wildcard examples/*)
|
||||
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
||||
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src:$(abspath ../../${BUILD_DIR}/ggml/src))
|
||||
|
||||
ifeq ($(GGML_CUDA),1)
|
||||
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
|
||||
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
|
||||
endif
|
||||
INCLUDE_PATH := $(abspath ../..)
|
||||
LIBRARY_PATH := $(abspath ../..)
|
||||
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
|
||||
@ -29,10 +24,8 @@ endif
|
||||
all: clean whisper examples
|
||||
|
||||
whisper: mkdir
|
||||
cmake -S ../.. -B ../../${BUILD_DIR} \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
cmake --build ../../${BUILD_DIR} --target whisper
|
||||
@echo Build whisper
|
||||
@${MAKE} -C ../.. libwhisper.a
|
||||
|
||||
test: model-small whisper modtidy
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
|
@ -31,7 +31,7 @@ func main() {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := context.Process(samples, nil, nil, nil); err != nil {
|
||||
if err := context.Process(samples, nil, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -51,7 +51,7 @@ func main() {
|
||||
In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ggml-org/whisper.cpp.git
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp/bindings/go
|
||||
make test
|
||||
```
|
||||
@ -62,12 +62,6 @@ This will compile a static `libwhisper.a` in a `build` folder, download a model
|
||||
make examples
|
||||
```
|
||||
|
||||
To build using cuda support add `GGML_CUDA=1`:
|
||||
|
||||
```bash
|
||||
GGML_CUDA=1 make examples
|
||||
```
|
||||
|
||||
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
||||
|
||||
```bash
|
||||
@ -98,7 +92,7 @@ The API Documentation:
|
||||
|
||||
Getting help:
|
||||
|
||||
* Follow the discussion for the go bindings [here](https://github.com/ggml-org/whisper.cpp/discussions/312)
|
||||
* Follow the discussion for the go bindings [here](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
||||
|
||||
## License
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
github.com/ggml-org/whisper.cpp/bindings/go
|
||||
github.com/ggerganov/whisper.cpp/bindings/go
|
||||
provides a speech-to-text service bindings for the Go programming language.
|
||||
*/
|
||||
package whisper
|
||||
|
@ -9,23 +9,22 @@ import (
|
||||
// ContextForSignal returns a context object which is cancelled when a signal
|
||||
// is received. It returns nil if no signal parameter is provided
|
||||
func ContextForSignal(signals ...os.Signal) context.Context {
|
||||
if len(signals) == 0 {
|
||||
return nil
|
||||
}
|
||||
if len(signals) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ch := make(chan os.Signal)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// Send message on channel when signal received
|
||||
signal.Notify(ch, signals...)
|
||||
// Send message on channel when signal received
|
||||
signal.Notify(ch, signals...)
|
||||
|
||||
// When any signal is received, call cancel
|
||||
go func() {
|
||||
<-ch
|
||||
cancel()
|
||||
}()
|
||||
// When any signal received, call cancel
|
||||
go func() {
|
||||
<-ch
|
||||
cancel()
|
||||
}()
|
||||
|
||||
// Return success
|
||||
return ctx
|
||||
// Return success
|
||||
return ctx
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,6 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
@ -18,27 +17,14 @@ import (
|
||||
// CONSTANTS
|
||||
|
||||
const (
|
||||
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
|
||||
srcExt = ".bin" // Filename extension
|
||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
|
||||
srcExt = ".bin" // Filename extension
|
||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||
)
|
||||
|
||||
var (
|
||||
// The models which will be downloaded, if no model is specified as an argument
|
||||
modelNames = []string{
|
||||
"tiny", "tiny-q5_1", "tiny-q8_0",
|
||||
"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
|
||||
"base", "base-q5_1", "base-q8_0",
|
||||
"base.en", "base.en-q5_1", "base.en-q8_0",
|
||||
"small", "small-q5_1", "small-q8_0",
|
||||
"small.en", "small.en-q5_1", "small.en-q8_0",
|
||||
"medium", "medium-q5_0", "medium-q8_0",
|
||||
"medium.en", "medium.en-q5_0", "medium.en-q8_0",
|
||||
"large-v1",
|
||||
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
|
||||
"large-v3", "large-v3-q5_0",
|
||||
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
|
||||
}
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
|
||||
)
|
||||
|
||||
var (
|
||||
@ -58,25 +44,7 @@ var (
|
||||
func main() {
|
||||
flag.Usage = func() {
|
||||
name := filepath.Base(flag.CommandLine.Name())
|
||||
fmt.Fprintf(flag.CommandLine.Output(), `
|
||||
Usage: %s [options] [<model>...]
|
||||
|
||||
Options:
|
||||
-out string Specify the output folder where models will be saved.
|
||||
Default: Current working directory.
|
||||
-timeout duration Set the maximum duration for downloading a model.
|
||||
Example: 10m, 1h (default: 30m0s).
|
||||
-quiet Suppress all output except errors.
|
||||
|
||||
Examples:
|
||||
1. Download a specific model:
|
||||
%s -out ./models tiny-q8_0
|
||||
|
||||
2. Download all models:
|
||||
%s -out ./models
|
||||
|
||||
`, name, name, name)
|
||||
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
flag.Parse()
|
||||
@ -146,87 +114,23 @@ func GetOut() (string, error) {
|
||||
// GetModels returns the list of models to download
|
||||
func GetModels() []string {
|
||||
if flag.NArg() == 0 {
|
||||
fmt.Println("No model specified.")
|
||||
fmt.Println("Preparing to download all models...")
|
||||
|
||||
// Calculate total download size
|
||||
fmt.Println("Calculating total download size...")
|
||||
totalSize, err := CalculateTotalDownloadSize(modelNames)
|
||||
if err != nil {
|
||||
fmt.Println("Error calculating download sizes:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
|
||||
fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
|
||||
fmt.Println("Would you like to download all models? (y/N)")
|
||||
|
||||
// Prompt for user input
|
||||
var response string
|
||||
fmt.Scanln(&response)
|
||||
if response != "y" && response != "Y" {
|
||||
fmt.Println("Aborting. Specify a model to download.")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
return modelNames // Return all models if confirmed
|
||||
return modelNames
|
||||
} else {
|
||||
return flag.Args()
|
||||
}
|
||||
return flag.Args() // Return specific models if arguments are provided
|
||||
}
|
||||
|
||||
func CalculateTotalDownloadSize(models []string) (int64, error) {
|
||||
var totalSize int64
|
||||
client := http.Client{}
|
||||
|
||||
for _, model := range models {
|
||||
modelURL, err := URLForModel(model)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Issue a HEAD request to get the file size
|
||||
req, err := http.NewRequest("HEAD", modelURL, nil)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
|
||||
continue
|
||||
}
|
||||
|
||||
size := resp.ContentLength
|
||||
totalSize += size
|
||||
}
|
||||
return totalSize, nil
|
||||
}
|
||||
|
||||
// URLForModel returns the URL for the given model on huggingface.co
|
||||
func URLForModel(model string) (string, error) {
|
||||
// Ensure "ggml-" prefix is added only once
|
||||
if !strings.HasPrefix(model, "ggml-") {
|
||||
model = "ggml-" + model
|
||||
}
|
||||
|
||||
// Ensure ".bin" extension is added only once
|
||||
if filepath.Ext(model) != srcExt {
|
||||
model += srcExt
|
||||
}
|
||||
|
||||
// Parse the base URL
|
||||
url, err := url.Parse(srcUrl)
|
||||
if err != nil {
|
||||
return "", err
|
||||
} else {
|
||||
url.Path = filepath.Join(url.Path, model)
|
||||
}
|
||||
|
||||
// Ensure no trailing slash in the base URL
|
||||
url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
|
||||
return url.String(), nil
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,10 @@ func (flags *Flags) GetOut() string {
|
||||
return strings.ToLower(flags.Lookup("out").Value.String())
|
||||
}
|
||||
|
||||
func (flags *Flags) IsSpeedup() bool {
|
||||
return flags.Lookup("speedup").Value.String() == "true"
|
||||
}
|
||||
|
||||
func (flags *Flags) IsTokens() bool {
|
||||
return flags.Lookup("tokens").Value.String() == "true"
|
||||
}
|
||||
@ -107,6 +111,10 @@ func (flags *Flags) SetParams(context whisper.Context) error {
|
||||
fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
|
||||
context.SetDuration(duration)
|
||||
}
|
||||
if flags.IsSpeedup() {
|
||||
fmt.Fprintf(flags.Output(), "Setting speedup to true\n")
|
||||
context.SetSpeedup(true)
|
||||
}
|
||||
if threads := flags.GetThreads(); threads != 0 {
|
||||
fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
|
||||
context.SetThreads(threads)
|
||||
@ -138,6 +146,7 @@ func registerFlags(flag *Flags) {
|
||||
flag.Duration("offset", 0, "Time offset")
|
||||
flag.Duration("duration", 0, "Duration of audio to process")
|
||||
flag.Uint("threads", 0, "Number of threads to use")
|
||||
flag.Bool("speedup", false, "Enable speedup")
|
||||
flag.Uint("max-len", 0, "Maximum segment length in characters")
|
||||
flag.Uint("max-tokens", 0, "Maximum tokens per segment")
|
||||
flag.Float64("word-thold", 0, "Maximum segment score")
|
||||
|
@ -67,7 +67,7 @@ func Process(model whisper.Model, path string, flags *Flags) error {
|
||||
// Process the data
|
||||
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
|
||||
context.ResetTimings()
|
||||
if err := context.Process(data, nil, cb, nil); err != nil {
|
||||
if err := context.Process(data, cb, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
module github.com/ggerganov/whisper.cpp/bindings/go
|
||||
|
||||
go 1.23
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/go-audio/wav v1.1.0
|
||||
github.com/stretchr/testify v1.9.0
|
||||
github.com/stretchr/testify v1.8.1
|
||||
)
|
||||
|
||||
require (
|
||||
|
@ -1,3 +1,4 @@
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||
@ -8,9 +9,15 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
@ -47,6 +47,10 @@ func (p *Params) SetPrintTimestamps(v bool) {
|
||||
p.print_timestamps = toBool(v)
|
||||
}
|
||||
|
||||
func (p *Params) SetSpeedup(v bool) {
|
||||
p.speed_up = toBool(v)
|
||||
}
|
||||
|
||||
// Set language id
|
||||
func (p *Params) SetLanguage(lang int) error {
|
||||
if lang == -1 {
|
||||
@ -119,33 +123,6 @@ func (p *Params) SetAudioCtx(n int) {
|
||||
p.audio_ctx = C.int(n)
|
||||
}
|
||||
|
||||
func (p *Params) SetMaxContext(n int) {
|
||||
p.n_max_text_ctx = C.int(n)
|
||||
}
|
||||
|
||||
func (p *Params) SetBeamSize(n int) {
|
||||
p.beam_search.beam_size = C.int(n)
|
||||
}
|
||||
|
||||
func (p *Params) SetEntropyThold(t float32) {
|
||||
p.entropy_thold = C.float(t)
|
||||
}
|
||||
|
||||
func (p *Params) SetTemperature(t float32) {
|
||||
p.temperature = C.float(t)
|
||||
}
|
||||
|
||||
// Sets the fallback temperature incrementation
|
||||
// Pass -1.0 to disable this feature
|
||||
func (p *Params) SetTemperatureFallback(t float32) {
|
||||
p.temperature_inc = C.float(t)
|
||||
}
|
||||
|
||||
// Set initial prompt
|
||||
func (p *Params) SetInitialPrompt(prompt string) {
|
||||
p.initial_prompt = C.CString(prompt)
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// PRIVATE METHODS
|
||||
|
||||
@ -170,11 +147,6 @@ func (p *Params) String() string {
|
||||
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
|
||||
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
||||
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
||||
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
||||
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
|
||||
str += fmt.Sprintf(" temperature=%f", p.temperature)
|
||||
str += fmt.Sprintf(" temperature_inc=%f", p.temperature_inc)
|
||||
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
|
||||
if p.translate {
|
||||
str += " translate"
|
||||
}
|
||||
@ -199,6 +171,9 @@ func (p *Params) String() string {
|
||||
if p.token_timestamps {
|
||||
str += " token_timestamps"
|
||||
}
|
||||
if p.speed_up {
|
||||
str += " speed_up"
|
||||
}
|
||||
|
||||
return str + ">"
|
||||
}
|
||||
|
@ -71,15 +71,16 @@ func (context *context) Language() string {
|
||||
return whisper.Whisper_lang_str(context.params.Language())
|
||||
}
|
||||
|
||||
func (context *context) DetectedLanguage() string {
|
||||
return whisper.Whisper_lang_str(context.model.ctx.Whisper_full_lang_id())
|
||||
}
|
||||
|
||||
// Set translate flag
|
||||
func (context *context) SetTranslate(v bool) {
|
||||
context.params.SetTranslate(v)
|
||||
}
|
||||
|
||||
// Set speedup flag
|
||||
func (context *context) SetSpeedup(v bool) {
|
||||
context.params.SetSpeedup(v)
|
||||
}
|
||||
|
||||
func (context *context) SetSplitOnWord(v bool) {
|
||||
context.params.SetSplitOnWord(v)
|
||||
}
|
||||
@ -129,37 +130,6 @@ func (context *context) SetAudioCtx(n uint) {
|
||||
context.params.SetAudioCtx(int(n))
|
||||
}
|
||||
|
||||
// Set maximum number of text context tokens to store
|
||||
func (context *context) SetMaxContext(n int) {
|
||||
context.params.SetMaxContext(n)
|
||||
}
|
||||
|
||||
// Set Beam Size
|
||||
func (context *context) SetBeamSize(n int) {
|
||||
context.params.SetBeamSize(n)
|
||||
}
|
||||
|
||||
// Set Entropy threshold
|
||||
func (context *context) SetEntropyThold(t float32) {
|
||||
context.params.SetEntropyThold(t)
|
||||
}
|
||||
|
||||
// Set Temperature
|
||||
func (context *context) SetTemperature(t float32) {
|
||||
context.params.SetTemperature(t)
|
||||
}
|
||||
|
||||
// Set the fallback temperature incrementation
|
||||
// Pass -1.0 to disable this feature
|
||||
func (context *context) SetTemperatureFallback(t float32) {
|
||||
context.params.SetTemperatureFallback(t)
|
||||
}
|
||||
|
||||
// Set initial prompt
|
||||
func (context *context) SetInitialPrompt(prompt string) {
|
||||
context.params.SetInitialPrompt(prompt)
|
||||
}
|
||||
|
||||
// ResetTimings resets the mode timings. Should be called before processing
|
||||
func (context *context) ResetTimings() {
|
||||
context.model.ctx.Whisper_reset_timings()
|
||||
@ -193,7 +163,6 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f
|
||||
// Process new sample data and return any errors
|
||||
func (context *context) Process(
|
||||
data []float32,
|
||||
callEncoderBegin EncoderBeginCallback,
|
||||
callNewSegment SegmentCallback,
|
||||
callProgress ProgressCallback,
|
||||
) error {
|
||||
@ -208,20 +177,7 @@ func (context *context) Process(
|
||||
// We don't do parallel processing at the moment
|
||||
processors := 0
|
||||
if processors > 1 {
|
||||
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, callEncoderBegin,
|
||||
func(new int) {
|
||||
if callNewSegment != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
for i := s0; i < num_segments; i++ {
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
}
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if err := context.model.ctx.Whisper_full(context.params, data, callEncoderBegin,
|
||||
func(new int) {
|
||||
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
|
||||
if callNewSegment != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
@ -229,11 +185,22 @@ func (context *context) Process(
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
}
|
||||
}
|
||||
}, func(progress int) {
|
||||
if callProgress != nil {
|
||||
callProgress(progress)
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
|
||||
if callNewSegment != nil {
|
||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||
s0 := num_segments - new
|
||||
for i := s0; i < num_segments; i++ {
|
||||
callNewSegment(toSegment(context.model.ctx, i))
|
||||
}
|
||||
}
|
||||
}, func(progress int) {
|
||||
if callProgress != nil {
|
||||
callProgress(progress)
|
||||
}
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -4,121 +4,52 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-audio/wav"
|
||||
// Packages
|
||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
assert "github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSetLanguage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
const (
|
||||
ModelPath = "../../models/ggml-tiny.bin"
|
||||
SamplePath = "../../samples/jfk.wav"
|
||||
)
|
||||
|
||||
func Test_Whisper_000(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, model not found:", ModelPath)
|
||||
}
|
||||
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||
}
|
||||
|
||||
// Load model
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
assert.NoError(model.Close())
|
||||
|
||||
t.Log("languages=", model.Languages())
|
||||
}
|
||||
|
||||
func Test_Whisper_001(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, model not found:", ModelPath)
|
||||
}
|
||||
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
||||
t.Skip("Skipping test, sample not found:", SamplePath)
|
||||
}
|
||||
|
||||
// Load model
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
// Get context for decoding
|
||||
ctx, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
assert.NotNil(ctx)
|
||||
|
||||
// This returns an error since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
err = context.SetLanguage("en")
|
||||
assert.Error(err)
|
||||
}
|
||||
|
||||
func TestContextModelIsMultilingual(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
isMultilingual := context.IsMultilingual()
|
||||
|
||||
// This returns false since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
assert.False(isMultilingual)
|
||||
}
|
||||
|
||||
func TestLanguage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
// This always returns en since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
expectedLanguage := "en"
|
||||
actualLanguage := context.Language()
|
||||
assert.Equal(expectedLanguage, actualLanguage)
|
||||
}
|
||||
|
||||
func TestProcess(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
fh, err := os.Open(SamplePath)
|
||||
assert.NoError(err)
|
||||
defer fh.Close()
|
||||
|
||||
// Decode the WAV file - load the full buffer
|
||||
dec := wav.NewDecoder(fh)
|
||||
buf, err := dec.FullPCMBuffer()
|
||||
assert.NoError(err)
|
||||
assert.Equal(uint16(1), dec.NumChans)
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
err = context.Process(data, nil, nil, nil)
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
func TestDetectedLanguage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
fh, err := os.Open(SamplePath)
|
||||
assert.NoError(err)
|
||||
defer fh.Close()
|
||||
|
||||
// Decode the WAV file - load the full buffer
|
||||
dec := wav.NewDecoder(fh)
|
||||
buf, err := dec.FullPCMBuffer()
|
||||
assert.NoError(err)
|
||||
assert.Equal(uint16(1), dec.NumChans)
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
|
||||
err = context.Process(data, nil, nil, nil)
|
||||
assert.NoError(err)
|
||||
|
||||
expectedLanguage := "en"
|
||||
actualLanguage := context.DetectedLanguage()
|
||||
assert.Equal(expectedLanguage, actualLanguage)
|
||||
}
|
||||
|
@ -16,10 +16,6 @@ type SegmentCallback func(Segment)
|
||||
// processing. It is called during the Process function
|
||||
type ProgressCallback func(int)
|
||||
|
||||
// EncoderBeginCallback is the callback function for checking if we want to
|
||||
// continue processing. It is called during the Process function
|
||||
type EncoderBeginCallback func() bool
|
||||
|
||||
// Model is the interface to a whisper model. Create a new model with the
|
||||
// function whisper.New(string)
|
||||
type Model interface {
|
||||
@ -35,35 +31,29 @@ type Model interface {
|
||||
Languages() []string
|
||||
}
|
||||
|
||||
// Context is the speech recognition context.
|
||||
// Context is the speach recognition context.
|
||||
type Context interface {
|
||||
SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
|
||||
SetTranslate(bool) // Set translate flag
|
||||
IsMultilingual() bool // Return true if the model is multilingual.
|
||||
Language() string // Get language
|
||||
DetectedLanguage() string // Get detected language
|
||||
|
||||
SetOffset(time.Duration) // Set offset
|
||||
SetDuration(time.Duration) // Set duration
|
||||
SetThreads(uint) // Set number of threads to use
|
||||
SetSplitOnWord(bool) // Set split on word flag
|
||||
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
||||
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
||||
SetMaxSegmentLength(uint) // Set max segment length in characters
|
||||
SetTokenTimestamps(bool) // Set token timestamps flag
|
||||
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
||||
SetAudioCtx(uint) // Set audio encoder context
|
||||
SetMaxContext(n int) // Set maximum number of text context tokens to store
|
||||
SetBeamSize(n int) // Set Beam Size
|
||||
SetEntropyThold(t float32) // Set Entropy threshold
|
||||
SetInitialPrompt(prompt string) // Set initial prompt
|
||||
SetTemperature(t float32) // Set temperature
|
||||
SetTemperatureFallback(t float32) // Set temperature incrementation
|
||||
SetOffset(time.Duration) // Set offset
|
||||
SetDuration(time.Duration) // Set duration
|
||||
SetThreads(uint) // Set number of threads to use
|
||||
SetSpeedup(bool) // Set speedup flag
|
||||
SetSplitOnWord(bool) // Set split on word flag
|
||||
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
||||
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
||||
SetMaxSegmentLength(uint) // Set max segment length in characters
|
||||
SetTokenTimestamps(bool) // Set token timestamps flag
|
||||
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
||||
SetAudioCtx(uint) // Set audio encoder context
|
||||
|
||||
// Process mono audio data and return any errors.
|
||||
// If defined, newly generated segments are passed to the
|
||||
// callback function during processing.
|
||||
Process([]float32, EncoderBeginCallback, SegmentCallback, ProgressCallback) error
|
||||
Process([]float32, SegmentCallback, ProgressCallback) error
|
||||
|
||||
// After process is called, return segments until the end of the stream
|
||||
// is reached, when io.EOF is returned.
|
||||
|
@ -1,91 +0,0 @@
|
||||
package whisper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
assert "github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
t.Run("valid model path", func(t *testing.T) {
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
})
|
||||
|
||||
t.Run("invalid model path", func(t *testing.T) {
|
||||
invalidModelPath := "invalid-model-path.bin"
|
||||
model, err := whisper.New(invalidModelPath)
|
||||
assert.Error(err)
|
||||
assert.Nil(model)
|
||||
})
|
||||
}
|
||||
|
||||
func TestClose(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
|
||||
err = model.Close()
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
func TestNewContext(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
context, err := model.NewContext()
|
||||
assert.NoError(err)
|
||||
assert.NotNil(context)
|
||||
}
|
||||
|
||||
func TestIsMultilingual(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
isMultilingual := model.IsMultilingual()
|
||||
|
||||
// This returns false since
|
||||
// the model 'models/ggml-small.en.bin'
|
||||
// that is loaded is not multilingual
|
||||
assert.False(isMultilingual)
|
||||
}
|
||||
|
||||
func TestLanguages(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
model, err := whisper.New(ModelPath)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(model)
|
||||
defer model.Close()
|
||||
|
||||
expectedLanguages := []string{
|
||||
"en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl",
|
||||
"ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk",
|
||||
"el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr",
|
||||
"bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn",
|
||||
"sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne",
|
||||
"mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn",
|
||||
"yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi",
|
||||
"lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my",
|
||||
"bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su",
|
||||
}
|
||||
|
||||
actualLanguages := model.Languages()
|
||||
|
||||
assert.Equal(expectedLanguages, actualLanguages)
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
package whisper_test
|
||||
|
||||
const (
|
||||
ModelPath = "../../models/ggml-small.en.bin"
|
||||
SamplePath = "../../samples/jfk.wav"
|
||||
)
|
@ -9,8 +9,8 @@ import (
|
||||
// CGO
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++ -fopenmp
|
||||
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
||||
#cgo LDFLAGS: -lwhisper -lm -lstdc++
|
||||
#cgo darwin LDFLAGS: -framework Accelerate
|
||||
#include <whisper.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
1
bindings/ios
Submodule
1
bindings/ios
Submodule
Submodule bindings/ios added at db6b353a38
@ -31,10 +31,10 @@ public class Example {
|
||||
var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
// custom configuration if required
|
||||
whisperParams.temperature_inc = 0f;
|
||||
|
||||
|
||||
var samples = readAudio(); // divide each value by 32767.0f
|
||||
whisper.fullTranscribe(whisperParams, samples);
|
||||
|
||||
|
||||
int segmentCount = whisper.getTextSegmentCount(context);
|
||||
for (int i = 0; i < segmentCount; i++) {
|
||||
String text = whisper.getTextSegment(context, i);
|
||||
@ -52,7 +52,7 @@ public class Example {
|
||||
In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ggml-org/whisper.cpp.git
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp/bindings/java
|
||||
|
||||
./gradlew build
|
||||
@ -67,5 +67,5 @@ copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32
|
||||
|
||||
## License
|
||||
|
||||
The license for the Java bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
|
||||
The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
|
||||
|
||||
|
@ -25,43 +25,25 @@ sourceSets {
|
||||
}
|
||||
|
||||
tasks.register('copyLibwhisperDynlib', Copy) {
|
||||
from '../../build/src'
|
||||
include 'libwhisper.dylib'
|
||||
into 'build/generated/resources/main'
|
||||
from '../../build'
|
||||
include 'libwhisper.dynlib'
|
||||
into 'build/generated/resources/main/darwin'
|
||||
}
|
||||
|
||||
tasks.register('copyLibwhisperSo', Copy) {
|
||||
from '../../build/src'
|
||||
from '../../build'
|
||||
include 'libwhisper.so'
|
||||
into 'build/generated/resources/main'
|
||||
into 'build/generated/resources/main/linux-x86-64'
|
||||
}
|
||||
|
||||
tasks.register('copyWhisperDLL', Copy) {
|
||||
from '../../build/bin/Release'
|
||||
tasks.register('copyWhisperDll', Copy) {
|
||||
from '../../build/Release'
|
||||
include 'whisper.dll'
|
||||
into 'build/generated/resources/main'
|
||||
}
|
||||
|
||||
tasks.register('copyGGML_BASE_DLL', Copy) {
|
||||
from '../../build/bin/Release'
|
||||
include 'ggml-base.dll'
|
||||
into 'build/generated/resources/main'
|
||||
}
|
||||
|
||||
tasks.register('copyGGML_DLL', Copy) {
|
||||
from '../../build/bin/Release'
|
||||
include 'ggml.dll'
|
||||
into 'build/generated/resources/main'
|
||||
}
|
||||
|
||||
tasks.register('copyGGML_CPU_DLL', Copy) {
|
||||
from '../../build/bin/Release'
|
||||
include 'ggml-cpu.dll'
|
||||
into 'build/generated/resources/main'
|
||||
into 'build/generated/resources/main/windows-x86-64'
|
||||
}
|
||||
|
||||
tasks.register('copyLibs') {
|
||||
dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDLL, copyGGML_BASE_DLL, copyGGML_DLL, copyGGML_CPU_DLL
|
||||
dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll
|
||||
}
|
||||
|
||||
test {
|
||||
@ -73,12 +55,7 @@ java {
|
||||
withJavadocJar()
|
||||
}
|
||||
|
||||
sourcesJar() {
|
||||
dependsOn copyLibs
|
||||
}
|
||||
|
||||
jar {
|
||||
dependsOn copyLibs
|
||||
exclude '**/whisper_java.exp', '**/whisper_java.lib'
|
||||
}
|
||||
|
||||
@ -90,9 +67,6 @@ tasks.withType(Test) {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
test.dependsOn copyLibs
|
||||
processResources.dependsOn copyLibs
|
||||
|
||||
dependencies {
|
||||
implementation "net.java.dev.jna:jna:5.13.0"
|
||||
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
|
||||
|
0
bindings/java/gradlew
vendored
Executable file → Normal file
0
bindings/java/gradlew
vendored
Executable file → Normal file
@ -1,24 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
/**
|
||||
* Presets for alignment heads in DTW token timestamps
|
||||
*/
|
||||
public class WhisperConstants {
|
||||
// Alignment heads presets
|
||||
public static final int WHISPER_AHEADS_NONE = 0;
|
||||
public static final int WHISPER_AHEADS_TINY_EN = 1;
|
||||
public static final int WHISPER_AHEADS_TINY = 2;
|
||||
public static final int WHISPER_AHEADS_BASE_EN = 3;
|
||||
public static final int WHISPER_AHEADS_BASE = 4;
|
||||
public static final int WHISPER_AHEADS_SMALL_EN = 5;
|
||||
public static final int WHISPER_AHEADS_SMALL = 6;
|
||||
public static final int WHISPER_AHEADS_MEDIUM_EN = 7;
|
||||
public static final int WHISPER_AHEADS_MEDIUM = 8;
|
||||
public static final int WHISPER_AHEADS_LARGE_V1 = 9;
|
||||
public static final int WHISPER_AHEADS_LARGE_V2 = 10;
|
||||
public static final int WHISPER_AHEADS_LARGE_V3 = 11;
|
||||
public static final int WHISPER_AHEADS_LARGE_V3_TURBO = 12;
|
||||
public static final int WHISPER_AHEADS_CUSTOM = 13;
|
||||
public static final int WHISPER_AHEADS_N_TOP_MOST = 14;
|
||||
public static final int WHISPER_AHEADS_COUNT = 15;
|
||||
}
|
@ -1,9 +1,7 @@
|
||||
package io.github.ggerganov.whispercpp;
|
||||
|
||||
import com.sun.jna.NativeLong;
|
||||
import com.sun.jna.Structure;
|
||||
import com.sun.jna.ptr.PointerByReference;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
||||
import io.github.ggerganov.whispercpp.WhisperModel;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
@ -11,26 +9,33 @@ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperContext extends Structure {
|
||||
public NativeLong t_load_us;
|
||||
public NativeLong t_start_us;
|
||||
int t_load_us = 0;
|
||||
int t_start_us = 0;
|
||||
|
||||
/** weight type (FP32 / FP16 / QX) */
|
||||
public GgmlType wtype = GgmlType.GGML_TYPE_F16;
|
||||
GgmlType wtype = GgmlType.GGML_TYPE_F16;
|
||||
/** intermediate type (FP32 or FP16) */
|
||||
public GgmlType itype = GgmlType.GGML_TYPE_F16;
|
||||
GgmlType itype = GgmlType.GGML_TYPE_F16;
|
||||
|
||||
public WhisperContextParams.ByValue params;
|
||||
|
||||
public Pointer model;
|
||||
public Pointer vocab;
|
||||
public Pointer state;
|
||||
// WhisperModel model;
|
||||
public PointerByReference model;
|
||||
// whisper_vocab vocab;
|
||||
// whisper_state * state = nullptr;
|
||||
public PointerByReference vocab;
|
||||
public PointerByReference state;
|
||||
|
||||
/** populated by whisper_init_from_file_with_params() */
|
||||
public Pointer path_model;
|
||||
String path_model;
|
||||
WhisperContextParams params;
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return List.of("t_load_us", "t_start_us", "wtype", "itype",
|
||||
"params", "model", "vocab", "state", "path_model");
|
||||
}
|
||||
// public static class ByReference extends WhisperContext implements Structure.ByReference {
|
||||
// }
|
||||
//
|
||||
// public static class ByValue extends WhisperContext implements Structure.ByValue {
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// protected List<String> getFieldOrder() {
|
||||
// return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
|
||||
// }
|
||||
}
|
||||
|
@ -43,11 +43,11 @@ public class WhisperCpp implements AutoCloseable {
|
||||
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||
* @param params - params to use when initialising the context
|
||||
*/
|
||||
public void initContext(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
|
||||
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
initContextImpl(modelPath, params);
|
||||
}
|
||||
|
||||
private void initContextImpl(String modelPath, WhisperContextParams.ByValue params) throws FileNotFoundException {
|
||||
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||
if (ctx != null) {
|
||||
lib.whisper_free(ctx);
|
||||
}
|
||||
@ -69,13 +69,15 @@ public class WhisperCpp implements AutoCloseable {
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||
* Returns a ByValue instance to ensure proper parameter passing to native code.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
* - call `whisper_free_context_params()`
|
||||
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||
*/
|
||||
public WhisperContextParams.ByValue getContextDefaultParams() {
|
||||
WhisperContextParams.ByValue valueParams = new WhisperContextParams.ByValue(
|
||||
lib.whisper_context_default_params_by_ref());
|
||||
valueParams.read();
|
||||
return valueParams;
|
||||
public WhisperContextParams getContextDefaultParams() {
|
||||
paramsPointer = lib.whisper_context_default_params_by_ref();
|
||||
WhisperContextParams params = new WhisperContextParams(paramsPointer);
|
||||
params.read();
|
||||
return params;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -86,7 +88,7 @@ public class WhisperCpp implements AutoCloseable {
|
||||
*
|
||||
* @param strategy - GREEDY
|
||||
*/
|
||||
public WhisperFullParams.ByValue getFullDefaultParams(WhisperSamplingStrategy strategy) {
|
||||
public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
|
||||
Pointer pointer;
|
||||
|
||||
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
||||
@ -102,7 +104,7 @@ public class WhisperCpp implements AutoCloseable {
|
||||
pointer = beamParamsPointer;
|
||||
}
|
||||
|
||||
WhisperFullParams.ByValue params = new WhisperFullParams.ByValue(pointer);
|
||||
WhisperFullParams params = new WhisperFullParams(pointer);
|
||||
params.read();
|
||||
return params;
|
||||
}
|
||||
@ -136,21 +138,15 @@ public class WhisperCpp implements AutoCloseable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*/
|
||||
public String fullTranscribe(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
|
||||
public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||
if (ctx == null) {
|
||||
throw new IllegalStateException("Model not initialised");
|
||||
}
|
||||
|
||||
/*
|
||||
WhisperFullParams.ByValue valueParams = new WhisperFullParams.ByValue(
|
||||
lib.whisper_full_default_params_by_ref(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal()));
|
||||
valueParams.read();
|
||||
*/
|
||||
|
||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||
throw new IOException("Failed to process audio");
|
||||
}
|
||||
@ -167,16 +163,7 @@ public class WhisperCpp implements AutoCloseable {
|
||||
|
||||
return str.toString().trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Full transcribe with time list.
|
||||
*
|
||||
* @param whisperParams the whisper params
|
||||
* @param audioData the audio data
|
||||
* @return the list
|
||||
* @throws IOException the io exception
|
||||
*/
|
||||
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams.ByValue whisperParams, float[] audioData) throws IOException {
|
||||
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||
if (ctx == null) {
|
||||
throw new IllegalStateException("Model not initialised");
|
||||
}
|
||||
@ -188,6 +175,7 @@ public class WhisperCpp implements AutoCloseable {
|
||||
int nSegments = lib.whisper_full_n_segments(ctx);
|
||||
List<WhisperSegment> segments= new ArrayList<>(nSegments);
|
||||
|
||||
|
||||
for (int i = 0; i < nSegments; i++) {
|
||||
long t0 = lib.whisper_full_get_segment_t0(ctx, i);
|
||||
String text = lib.whisper_full_get_segment_text(ctx, i);
|
||||
|
@ -9,7 +9,6 @@ import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
|
||||
public interface WhisperCppJnaLibrary extends Library {
|
||||
|
||||
WhisperCppJnaLibrary instance = Native.load("whisper", WhisperCppJnaLibrary.class);
|
||||
|
||||
String whisper_print_system_info();
|
||||
@ -21,7 +20,7 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file(String path_model);
|
||||
|
||||
|
||||
/**
|
||||
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||
* Because this function allocates memory for the params, the caller must call either:
|
||||
@ -39,7 +38,7 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
* @param params Pointer to whisper_context_params
|
||||
* @return Whisper context on success, null on failure
|
||||
*/
|
||||
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams.ByValue params);
|
||||
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
|
||||
|
||||
/**
|
||||
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
||||
@ -181,12 +180,12 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
/**
|
||||
* @return the id of the specified language, returns -1 if not found.
|
||||
* Examples:
|
||||
* "de" -> 2
|
||||
* "german" -> 2
|
||||
* "de" -> 2
|
||||
* "german" -> 2
|
||||
*/
|
||||
int whisper_lang_id(String lang);
|
||||
|
||||
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
|
||||
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
|
||||
String whisper_lang_str(int id);
|
||||
|
||||
/**
|
||||
@ -269,21 +268,20 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
void whisper_free_params(Pointer params);
|
||||
|
||||
/**
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*/
|
||||
int whisper_full(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples);
|
||||
int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
|
||||
public int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams.ByValue params, float[] samples, int n_samples);
|
||||
//int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
|
||||
|
||||
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||
// Result is stored in the default state of the context
|
||||
// Not thread safe if executed in parallel on the same context.
|
||||
// It seems this approach can offer some speedup in some cases.
|
||||
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||
int whisper_full_parallel(Pointer ctx, WhisperFullParams.ByValue params, final float[] samples, int n_samples, int n_processors);
|
||||
int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
|
||||
|
||||
/**
|
||||
* Number of generated text segments.
|
||||
@ -306,6 +304,14 @@ public interface WhisperCppJnaLibrary extends Library {
|
||||
/** Language id associated with the provided state */
|
||||
int whisper_full_lang_id_from_state(Pointer state);
|
||||
|
||||
/**
|
||||
* Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
|
||||
* The resulting spectrogram is stored inside the default state of the provided whisper context.
|
||||
* @return 0 on success
|
||||
*/
|
||||
int whisper_pcm_to_mel_phase_vocoder(Pointer ctx, final float[] samples, int n_samples, int n_threads);
|
||||
|
||||
int whisper_pcm_to_mel_phase_vocoder_with_state(Pointer ctx, Pointer state, final float[] samples, int n_samples, int n_threads);
|
||||
|
||||
/** Get the start time of the specified segment. */
|
||||
long whisper_full_get_segment_t0(Pointer ctx, int i_segment);
|
||||
|
@ -1,17 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.callbacks;
|
||||
|
||||
import com.sun.jna.Callback;
|
||||
|
||||
/**
|
||||
* Callback for aborting GGML computation
|
||||
* Maps to the C typedef: bool (*ggml_abort_callback)(void * data)
|
||||
*/
|
||||
public interface GgmlAbortCallback extends Callback {
|
||||
/**
|
||||
* Return true to abort the computation, false to continue
|
||||
*
|
||||
* @param data User data passed to the callback
|
||||
* @return true to abort, false to continue
|
||||
*/
|
||||
boolean invoke(com.sun.jna.Pointer data);
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
import com.sun.jna.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperAhead extends Structure {
|
||||
|
||||
public int n_text_layer;
|
||||
|
||||
public int n_head;
|
||||
|
||||
public WhisperAhead() {
|
||||
super();
|
||||
}
|
||||
|
||||
public WhisperAhead(int textLayer, int head) {
|
||||
super();
|
||||
this.n_text_layer = textLayer;
|
||||
this.n_head = head;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("n_text_layer", "n_head");
|
||||
}
|
||||
|
||||
public static class ByReference extends WhisperAhead implements Structure.ByReference {}
|
||||
|
||||
public static class ByValue extends WhisperAhead implements Structure.ByValue {}
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
import com.sun.jna.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class WhisperAheads extends Structure {
|
||||
public NativeLong n_heads;
|
||||
|
||||
public Pointer heads;
|
||||
|
||||
public WhisperAheads() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create alignment heads from an array of WhisperAhead objects
|
||||
*/
|
||||
public void setHeads(WhisperAhead[] aheadsArray) {
|
||||
this.n_heads = new NativeLong(aheadsArray.length);
|
||||
|
||||
int structSize = aheadsArray[0].size();
|
||||
Memory mem = new Memory(structSize * aheadsArray.length);
|
||||
|
||||
for (int i = 0; i < aheadsArray.length; i++) {
|
||||
aheadsArray[i].write();
|
||||
byte[] buffer = aheadsArray[i].getPointer().getByteArray(0, structSize);
|
||||
mem.write(i * structSize, buffer, 0, buffer.length);
|
||||
}
|
||||
|
||||
this.heads = mem;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("n_heads", "heads");
|
||||
}
|
||||
|
||||
public static class ByReference extends WhisperAheads implements Structure.ByReference {}
|
||||
|
||||
public static class ByValue extends WhisperAheads implements Structure.ByValue {}
|
||||
}
|
@ -1,5 +1,7 @@
|
||||
package io.github.ggerganov.whispercpp.params;
|
||||
|
||||
import com.sun.jna.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
@ -9,73 +11,21 @@ import java.util.List;
|
||||
* whisper_context_default_params()
|
||||
*/
|
||||
public class WhisperContextParams extends Structure {
|
||||
|
||||
public WhisperContextParams(Pointer p) {
|
||||
super(p);
|
||||
}
|
||||
|
||||
public WhisperContextParams() {
|
||||
super();
|
||||
}
|
||||
|
||||
/** Use GPU for inference (default = true) */
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
public CBool use_gpu;
|
||||
|
||||
/** Use flash attention (default = false) */
|
||||
public CBool flash_attn;
|
||||
|
||||
/** CUDA device to use (default = 0) */
|
||||
public int gpu_device;
|
||||
|
||||
/** [EXPERIMENTAL] Enable token-level timestamps with DTW (default = false) */
|
||||
public CBool dtw_token_timestamps;
|
||||
|
||||
/** [EXPERIMENTAL] Alignment heads preset for DTW */
|
||||
public int dtw_aheads_preset;
|
||||
|
||||
/** Number of top layers to use for DTW when using WHISPER_AHEADS_N_TOP_MOST preset */
|
||||
public int dtw_n_top;
|
||||
|
||||
public WhisperAheads.ByValue dtw_aheads;
|
||||
|
||||
/** DTW memory size (internal use) */
|
||||
public NativeLong dtw_mem_size;
|
||||
|
||||
/** Use GPU for inference */
|
||||
/** Use GPU for inference Number (default = true) */
|
||||
public void useGpu(boolean enable) {
|
||||
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Use flash attention */
|
||||
public void useFlashAttn(boolean enable) {
|
||||
flash_attn = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Enable DTW token-level timestamps */
|
||||
public void enableDtwTokenTimestamps(boolean enable) {
|
||||
dtw_token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Set DTW alignment heads preset */
|
||||
public void setDtwAheadsPreset(int preset) {
|
||||
dtw_aheads_preset = preset;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList(
|
||||
"use_gpu",
|
||||
"flash_attn",
|
||||
"gpu_device",
|
||||
"dtw_token_timestamps",
|
||||
"dtw_aheads_preset",
|
||||
"dtw_n_top",
|
||||
"dtw_aheads",
|
||||
"dtw_mem_size"
|
||||
);
|
||||
}
|
||||
|
||||
public static class ByValue extends WhisperContextParams implements Structure.ByValue {
|
||||
public ByValue() { super(); }
|
||||
public ByValue(Pointer p) { super(p); }
|
||||
return Arrays.asList("use_gpu");
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,6 @@ import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
|
||||
import io.github.ggerganov.whispercpp.callbacks.GgmlAbortCallback;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
@ -17,12 +16,10 @@ import java.util.List;
|
||||
*/
|
||||
public class WhisperFullParams extends Structure {
|
||||
|
||||
public WhisperFullParams() {
|
||||
super();
|
||||
}
|
||||
|
||||
public WhisperFullParams(Pointer p) {
|
||||
super(p);
|
||||
// super(p, ALIGN_MSVC);
|
||||
// super(p, ALIGN_GNUC);
|
||||
}
|
||||
|
||||
/** Sampling strategy for whisper_full() function. */
|
||||
@ -72,10 +69,10 @@ public class WhisperFullParams extends Structure {
|
||||
single_segment = single ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
public CBool print_special;
|
||||
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||
public void printSpecial(boolean enable) {
|
||||
print_special = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
@ -132,12 +129,12 @@ public class WhisperFullParams extends Structure {
|
||||
/** Maximum tokens per segment (0, default = no limit) */
|
||||
public int max_tokens;
|
||||
|
||||
/** [EXPERIMENTAL] Enable debug mode for extra info */
|
||||
public CBool debug_mode;
|
||||
/** Flag to speed up the audio by 2x using Phase Vocoder. (default = false) */
|
||||
public CBool speed_up;
|
||||
|
||||
/** Enable debug mode */
|
||||
public void enableDebugMode(boolean enable) {
|
||||
debug_mode = enable ? CBool.TRUE : CBool.FALSE;
|
||||
/** Flag to speed up the audio by 2x using Phase Vocoder. (default = false) */
|
||||
public void speedUp(boolean enable) {
|
||||
speed_up = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Overwrite the audio context size (0 = use default). */
|
||||
@ -151,9 +148,6 @@ public class WhisperFullParams extends Structure {
|
||||
tdrz_enable = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Regular expression matching tokens to suppress. */
|
||||
public String suppress_regex;
|
||||
|
||||
/** Tokens to provide to the whisper decoder as an initial prompt.
|
||||
* These are prepended to any existing text context from a previous call. */
|
||||
public String initial_prompt;
|
||||
@ -192,11 +186,11 @@ public class WhisperFullParams extends Structure {
|
||||
}
|
||||
|
||||
/** Flag to suppress non-speech tokens. */
|
||||
public CBool suppress_nst;
|
||||
public CBool suppress_non_speech_tokens;
|
||||
|
||||
/** Flag to suppress non-speech tokens. */
|
||||
public void suppressNonSpeechTokens(boolean enable) {
|
||||
suppress_nst = enable ? CBool.TRUE : CBool.FALSE;
|
||||
suppress_non_speech_tokens = enable ? CBool.TRUE : CBool.FALSE;
|
||||
}
|
||||
|
||||
/** Initial decoding temperature. */
|
||||
@ -285,16 +279,6 @@ public class WhisperFullParams extends Structure {
|
||||
*/
|
||||
public Pointer encoder_begin_callback_user_data;
|
||||
|
||||
/** Callback used to abort GGML computation */
|
||||
public Pointer abort_callback;
|
||||
|
||||
/** User data for the abort_callback */
|
||||
public Pointer abort_callback_user_data;
|
||||
|
||||
public void setAbortCallback(GgmlAbortCallback callback) {
|
||||
abort_callback = CallbackReference.getFunctionPointer(callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback by each decoder to filter obtained logits.
|
||||
* WhisperLogitsFilterCallback
|
||||
@ -331,28 +315,17 @@ public class WhisperFullParams extends Structure {
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx",
|
||||
"offset_ms", "duration_ms", "translate", "no_context",
|
||||
"no_timestamps", "single_segment", "print_special",
|
||||
"print_progress", "print_realtime", "print_timestamps",
|
||||
"token_timestamps", "thold_pt", "thold_ptsum", "max_len",
|
||||
"split_on_word", "max_tokens", "debug_mode", "audio_ctx",
|
||||
"tdrz_enable", "suppress_regex", "initial_prompt",
|
||||
"prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
||||
"suppress_blank", "suppress_nst", "temperature",
|
||||
"max_initial_ts", "length_penalty", "temperature_inc",
|
||||
"entropy_thold", "logprob_thold", "no_speech_thold", "greedy",
|
||||
"beam_search", "new_segment_callback", "new_segment_callback_user_data",
|
||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
|
||||
"no_context", "single_segment", "no_timestamps",
|
||||
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
|
||||
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "speed_up", "audio_ctx",
|
||||
"tdrz_enable", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
||||
"suppress_blank", "suppress_non_speech_tokens", "temperature", "max_initial_ts", "length_penalty",
|
||||
"temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
|
||||
"new_segment_callback", "new_segment_callback_user_data",
|
||||
"progress_callback", "progress_callback_user_data",
|
||||
"encoder_begin_callback", "encoder_begin_callback_user_data",
|
||||
"abort_callback", "abort_callback_user_data",
|
||||
"logits_filter_callback", "logits_filter_callback_user_data",
|
||||
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
|
||||
}
|
||||
|
||||
public static class ByValue extends WhisperFullParams implements Structure.ByValue {
|
||||
public ByValue() { super(); }
|
||||
public ByValue(Pointer p) { super(p); }
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ class WhisperCppTest {
|
||||
float[] floats = new float[b.length / 2];
|
||||
|
||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||
params.print_progress = CBool.FALSE;
|
||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||
@ -118,7 +118,7 @@ class WhisperCppTest {
|
||||
float[] floats = new float[b.length / 2];
|
||||
|
||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
WhisperFullParams.ByValue params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||
params.print_progress = CBool.FALSE;
|
||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||
|
@ -33,9 +33,6 @@ mkdir build-em && cd build-em
|
||||
emcmake cmake .. && make -j
|
||||
|
||||
# run test
|
||||
node ../tests/test-whisper.js
|
||||
|
||||
# For Node.js versions prior to v16.4.0, experimental features need to be enabled:
|
||||
node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
||||
|
||||
# publish npm package
|
||||
@ -44,7 +41,7 @@ make publish-npm
|
||||
|
||||
## Sample run
|
||||
|
||||
```text
|
||||
```java
|
||||
$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
||||
|
||||
whisper_model_load: loading model from 'whisper.bin'
|
||||
@ -66,7 +63,7 @@ whisper_model_load: ggml ctx size = 140.60 MB
|
||||
whisper_model_load: memory size = 22.83 MB
|
||||
whisper_model_load: model size = 140.54 MB
|
||||
|
||||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
||||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
||||
|
||||
operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "whisper.cpp",
|
||||
"version": "1.7.5",
|
||||
"version": "1.5.1",
|
||||
"description": "Whisper speech recognition",
|
||||
"main": "whisper.js",
|
||||
"scripts": {
|
||||
|
File diff suppressed because one or more lines are too long
9
bindings/ruby/.gitignore
vendored
9
bindings/ruby/.gitignore
vendored
@ -1,9 +0,0 @@
|
||||
LICENSE
|
||||
pkg/
|
||||
lib/whisper.*
|
||||
ext/examples/
|
||||
ext/ggml/
|
||||
ext/include/
|
||||
ext/scripts/
|
||||
ext/src/
|
||||
test/fixtures/
|
@ -1,349 +0,0 @@
|
||||
whispercpp
|
||||
==========
|
||||
|
||||

|
||||
|
||||
Ruby bindings for [whisper.cpp][], an interface of automatic speech recognition model.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
Install the gem and add to the application's Gemfile by executing:
|
||||
|
||||
$ bundle add whispercpp
|
||||
|
||||
If bundler is not being used to manage dependencies, install the gem by executing:
|
||||
|
||||
$ gem install whispercpp
|
||||
|
||||
You can pass build options for whisper.cpp, for instance:
|
||||
|
||||
$ bundle config build.whispercpp --enable-ggml-cuda
|
||||
|
||||
or,
|
||||
|
||||
$ gem install whispercpp -- --enable-ggml-cuda
|
||||
|
||||
See whisper.cpp's [README](https://github.com/ggml-org/whisper.cpp/blob/master/README.md) for available options. You need convert options present the README to Ruby-style options, for example:
|
||||
|
||||
Boolean options:
|
||||
|
||||
* `-DGGML_BLAS=1` -> `--enable-ggml-blas`
|
||||
* `-DWHISER_COREML=OFF` -> `--disable-whisper-coreml`
|
||||
|
||||
Argument options:
|
||||
|
||||
* `-DGGML_CUDA_COMPRESSION_MODE=size` -> `--ggml-cuda-compression-mode=size`
|
||||
|
||||
Combination:
|
||||
|
||||
* `-DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86"` -> `--enable-ggml-cuda --cmake_cuda-architectures="86"`
|
||||
|
||||
For boolean options like `GGML_CUDA`, the README says `-DGGML_CUDA=1`. You need strip `-D`, prepend `--enable-` for `1` or `ON` (`--disable-` for `0` or `OFF`) and make it kebab-case: `--enable-ggml-cuda`.
|
||||
For options which require arguments like `CMAKE_CUDA_ARCHITECTURES`, the README says `-DCMAKE_CUDA_ARCHITECTURES="86"`. You need strip `-D`, prepend `--`, make it kebab-case, append `=` and append argument: `--cmake-cuda-architectures="86"`.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
```ruby
|
||||
require "whisper"
|
||||
|
||||
whisper = Whisper::Context.new("base")
|
||||
|
||||
params = Whisper::Params.new(
|
||||
language: "en",
|
||||
offset: 10_000,
|
||||
duration: 60_000,
|
||||
max_text_tokens: 300,
|
||||
translate: true,
|
||||
print_timestamps: false,
|
||||
initial_prompt: "Initial prompt here."
|
||||
)
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
|
||||
puts whole_text
|
||||
end
|
||||
|
||||
```
|
||||
|
||||
### Preparing model ###
|
||||
|
||||
Some models are prepared up-front:
|
||||
|
||||
You also can use shorthand for pre-converted models:
|
||||
|
||||
```ruby
|
||||
whisper = Whisper::Context.new("base.en")
|
||||
```
|
||||
|
||||
You can see the list of prepared model names by `Whisper::Model.pre_converted_models.keys`:
|
||||
|
||||
```ruby
|
||||
puts Whisper::Model.pre_converted_models.keys
|
||||
# tiny
|
||||
# tiny.en
|
||||
# tiny-q5_1
|
||||
# tiny.en-q5_1
|
||||
# tiny-q8_0
|
||||
# base
|
||||
# base.en
|
||||
# base-q5_1
|
||||
# base.en-q5_1
|
||||
# base-q8_0
|
||||
# :
|
||||
# :
|
||||
```
|
||||
|
||||
You can also retrieve each model:
|
||||
|
||||
```ruby
|
||||
base_en = Whisper::Model.pre_converted_models["base.en"]
|
||||
whisper = Whisper::Context.new(base_en)
|
||||
```
|
||||
|
||||
At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`:
|
||||
|
||||
```ruby
|
||||
Whisper::Model.pre_converted_models["base"].clear_cache
|
||||
```
|
||||
|
||||
You can also use local model files you prepared:
|
||||
|
||||
```ruby
|
||||
whisper = Whisper::Context.new("path/to/your/model.bin")
|
||||
```
|
||||
|
||||
Or, you can download model files:
|
||||
|
||||
```ruby
|
||||
whisper = Whisper::Context.new("https://example.net/uri/of/your/model.bin")
|
||||
# Or
|
||||
whisper = Whisper::Context.new(URI("https://example.net/uri/of/your/model.bin"))
|
||||
```
|
||||
|
||||
See [models][] page for details.
|
||||
|
||||
### Preparing audio file ###
|
||||
|
||||
Currently, whisper.cpp accepts only 16-bit WAV files.
|
||||
|
||||
### Voice Activity Detection (VAD) ###
|
||||
|
||||
Support for Voice Activity Detection (VAD) can be enabled by setting `Whisper::Params`'s `vad` argument to `true` and specifying VAD model:
|
||||
|
||||
```ruby
|
||||
Whisper::Params.new(
|
||||
vad: true,
|
||||
vad_model_path: "silero-v5.1.2",
|
||||
# other arguments...
|
||||
)
|
||||
```
|
||||
|
||||
When you pass the model name (`"silero-v5.1.2"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin`), it will be downloaded automatically.
|
||||
Currently, "silero-v5.1.2" is registered as pre-converted model like ASR models. You also specify file path or URI of model.
|
||||
|
||||
If you need configure VAD behavior, pass params for that:
|
||||
|
||||
```ruby
|
||||
Whisper::Params.new(
|
||||
vad: true,
|
||||
vad_model_path: "silero-v5.1.2",
|
||||
vad_params: Whisper::VAD::Params.new(
|
||||
threshold: 1.0, # defaults to 0.5
|
||||
min_speech_duration_ms: 500, # defaults to 250
|
||||
min_silence_duration_ms: 200, # defaults to 100
|
||||
max_speech_duration_s: 30000, # default is FLT_MAX,
|
||||
speech_pad_ms: 50, # defaults to 30
|
||||
samples_overlap: 0.5 # defaults to 0.1
|
||||
),
|
||||
# other arguments...
|
||||
)
|
||||
```
|
||||
|
||||
For details on VAD, see [whisper.cpp's README](https://github.com/ggml-org/whisper.cpp?tab=readme-ov-file#voice-activity-detection-vad).
|
||||
|
||||
### Output ###
|
||||
|
||||
whispercpp supports SRT and WebVTT output:
|
||||
|
||||
```ruby
|
||||
puts whisper.transcribe("path/to/audio.wav", Whisper::Params.new).to_webvtt
|
||||
# =>
|
||||
WEBVTT
|
||||
|
||||
1
|
||||
00:00:00.000 --> 00:00:03.860
|
||||
My thought I have nobody by a beauty and will as you poured.
|
||||
|
||||
2
|
||||
00:00:03.860 --> 00:00:09.840
|
||||
Mr. Rochester is sub in that so-don't find simplest, and devoted about, to let might in
|
||||
|
||||
3
|
||||
00:00:09.840 --> 00:00:09.940
|
||||
a
|
||||
|
||||
```
|
||||
|
||||
You may call `#to_srt`, too
|
||||
|
||||
|
||||
API
|
||||
---
|
||||
|
||||
### Transcription ###
|
||||
|
||||
By default, `Whisper::Context#transcribe` works in a single thread. You can make it work in parallel by passing `n_processors` option:
|
||||
|
||||
```ruby
|
||||
whisper.transcribe("path/to/audio.wav", params, n_processors: Etc.nprocessors)
|
||||
```
|
||||
|
||||
Note that transcription occasionally might be low accuracy when it works in parallel.
|
||||
|
||||
### Segments ###
|
||||
|
||||
Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`:
|
||||
|
||||
```ruby
|
||||
def format_time(time_ms)
|
||||
sec, decimal_part = time_ms.divmod(1000)
|
||||
min, sec = sec.divmod(60)
|
||||
hour, min = min.divmod(60)
|
||||
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
||||
end
|
||||
|
||||
whisper
|
||||
.transcribe("path/to/audio.wav", params)
|
||||
.each_segment.with_index do |segment, index|
|
||||
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
|
||||
nth: index + 1,
|
||||
st: format_time(segment.start_time),
|
||||
ed: format_time(segment.end_time),
|
||||
text: segment.text
|
||||
}
|
||||
line << " (speaker turned)" if segment.speaker_turn_next?
|
||||
puts line
|
||||
end
|
||||
|
||||
```
|
||||
|
||||
You can also add hook to params called on new segment:
|
||||
|
||||
```ruby
|
||||
# Add hook before calling #transcribe
|
||||
params.on_new_segment do |segment|
|
||||
line = "[%{st} --> %{ed}] %{text}" % {
|
||||
st: format_time(segment.start_time),
|
||||
ed: format_time(segment.end_time),
|
||||
text: segment.text
|
||||
}
|
||||
line << " (speaker turned)" if segment.speaker_turn_next?
|
||||
puts line
|
||||
end
|
||||
|
||||
whisper.transcribe("path/to/audio.wav", params)
|
||||
|
||||
```
|
||||
|
||||
### Models ###
|
||||
|
||||
You can see model information:
|
||||
|
||||
```ruby
|
||||
whisper = Whisper::Context.new("base")
|
||||
model = whisper.model
|
||||
|
||||
model.n_vocab # => 51864
|
||||
model.n_audio_ctx # => 1500
|
||||
model.n_audio_state # => 512
|
||||
model.n_audio_head # => 8
|
||||
model.n_audio_layer # => 6
|
||||
model.n_text_ctx # => 448
|
||||
model.n_text_state # => 512
|
||||
model.n_text_head # => 8
|
||||
model.n_text_layer # => 6
|
||||
model.n_mels # => 80
|
||||
model.ftype # => 1
|
||||
model.type # => "base"
|
||||
|
||||
```
|
||||
|
||||
### Logging ###
|
||||
|
||||
You can set log callback:
|
||||
|
||||
```ruby
|
||||
prefix = "[MyApp] "
|
||||
log_callback = ->(level, buffer, user_data) {
|
||||
case level
|
||||
when Whisper::LOG_LEVEL_NONE
|
||||
puts "#{user_data}none: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_INFO
|
||||
puts "#{user_data}info: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_WARN
|
||||
puts "#{user_data}warn: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_ERROR
|
||||
puts "#{user_data}error: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_DEBUG
|
||||
puts "#{user_data}debug: #{buffer}"
|
||||
when Whisper::LOG_LEVEL_CONT
|
||||
puts "#{user_data}same to previous: #{buffer}"
|
||||
end
|
||||
}
|
||||
Whisper.log_set log_callback, prefix
|
||||
```
|
||||
|
||||
Using this feature, you are also able to suppress log:
|
||||
|
||||
```ruby
|
||||
Whisper.log_set ->(level, buffer, user_data) {
|
||||
# do nothing
|
||||
}, nil
|
||||
Whisper::Context.new("base")
|
||||
```
|
||||
|
||||
### Low-level API to transcribe ###
|
||||
|
||||
You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility.
|
||||
|
||||
```ruby
|
||||
require "whisper"
|
||||
require "wavefile"
|
||||
|
||||
reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
|
||||
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
|
||||
|
||||
whisper = Whisper::Context.new("base")
|
||||
whisper
|
||||
.full(Whisper::Params.new, samples)
|
||||
.each_segment do |segment|
|
||||
puts segment.text
|
||||
end
|
||||
```
|
||||
|
||||
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
|
||||
|
||||
Development
|
||||
-----------
|
||||
|
||||
% git clone https://github.com/ggml-org/whisper.cpp.git
|
||||
% cd whisper.cpp/bindings/ruby
|
||||
% rake test
|
||||
|
||||
First call of `rake test` builds an extension and downloads a model for testing. After that, you add tests in `tests` directory and modify `ext/ruby_whisper.cpp`.
|
||||
|
||||
If something seems wrong on build, running `rake clean` solves some cases.
|
||||
|
||||
### Need help ###
|
||||
|
||||
* Windows support
|
||||
* Refinement of C/C++ code, especially memory management
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
The same to [whisper.cpp][].
|
||||
|
||||
[whisper.cpp]: https://github.com/ggml-org/whisper.cpp
|
||||
[models]: https://github.com/ggml-org/whisper.cpp/tree/master/models
|
@ -1,96 +0,0 @@
|
||||
require 'rake/clean'
|
||||
require "bundler/gem_tasks"
|
||||
require "rake/testtask"
|
||||
require_relative "extsources"
|
||||
|
||||
SOURCES_DIR = "ext/sources"
|
||||
|
||||
SOURCES = FileList[]
|
||||
|
||||
EXTSOURCES.each do |src|
|
||||
basename = src.pathmap("%f")
|
||||
dest = basename == "LICENSE" ? basename
|
||||
: src.pathmap("%{\\.\\./\\.\\.,#{SOURCES_DIR}}p")
|
||||
.pathmap("%{\\.\\./javascript,#{SOURCES_DIR}/bindings/javascript}p")
|
||||
dir = dest.pathmap("%d")
|
||||
file src
|
||||
directory dir
|
||||
file dest => [src, dir] do |t|
|
||||
cp t.source, t.name
|
||||
end
|
||||
SOURCES.include dest
|
||||
end
|
||||
|
||||
CLEAN.include SOURCES
|
||||
|
||||
SRC = FileList["ext/*.{c,cpp,h}"]
|
||||
|
||||
task build: SOURCES
|
||||
|
||||
directory "pkg"
|
||||
CLOBBER.include "pkg"
|
||||
|
||||
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
||||
SO_FILE = File.join("ext", LIB_NAME)
|
||||
LIB_FILE = File.join("lib", LIB_NAME)
|
||||
|
||||
file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t|
|
||||
chdir "ext" do
|
||||
ruby "extconf.rb"
|
||||
end
|
||||
end
|
||||
if File.exist? "ext/Makefile"
|
||||
task :make_clean do
|
||||
cd "ext" do
|
||||
sh "make", "clean"
|
||||
end
|
||||
end
|
||||
task clean: :make_clean
|
||||
task :make_distclean do
|
||||
cd "ext" do
|
||||
sh "make", "distclean"
|
||||
end
|
||||
end
|
||||
task clobber: :make_distclean
|
||||
end
|
||||
|
||||
file SO_FILE => "ext/Makefile" do |t|
|
||||
chdir "ext" do
|
||||
sh "make"
|
||||
end
|
||||
end
|
||||
CLEAN.include SO_FILE
|
||||
|
||||
directory "lib"
|
||||
file LIB_FILE => [SO_FILE, "lib"] do |t|
|
||||
copy t.source, t.name
|
||||
end
|
||||
CLEAN.include LIB_FILE
|
||||
|
||||
Rake::TestTask.new
|
||||
|
||||
TEST_FIXTURE_AUDIO = "test/fixtures/jfk.wav"
|
||||
TEST_FIXTURE_AUDIO_SRC = File.expand_path(File.join(__dir__, "..", "..", "samples", "jfk.wav"))
|
||||
TEST_FIXTURE_AUDIO_DIR = TEST_FIXTURE_AUDIO.pathmap("%d")
|
||||
directory TEST_FIXTURE_AUDIO_DIR
|
||||
if File.exist? TEST_FIXTURE_AUDIO_SRC
|
||||
file TEST_FIXTURE_AUDIO => [TEST_FIXTURE_AUDIO_SRC, TEST_FIXTURE_AUDIO_DIR] do |t|
|
||||
symlink t.source, t.name
|
||||
end
|
||||
else
|
||||
require "open-uri"
|
||||
file TEST_FIXTURE_AUDIO => TEST_FIXTURE_AUDIO_DIR do |t|
|
||||
File.write t.name, URI("https://github.com/ggml-org/whisper.cpp/raw/refs/heads/master/samples/jfk.wav").read
|
||||
end
|
||||
end
|
||||
|
||||
TEST_MEMORY_VIEW = "test/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
|
||||
file TEST_MEMORY_VIEW => "test/jfk_reader/jfk_reader.c" do |t|
|
||||
chdir "test/jfk_reader" do
|
||||
ruby "extconf.rb"
|
||||
sh "make"
|
||||
end
|
||||
end
|
||||
CLEAN.include TEST_MEMORY_VIEW
|
||||
|
||||
task test: [LIB_FILE, TEST_MEMORY_VIEW, TEST_FIXTURE_AUDIO]
|
14
bindings/ruby/ext/.gitignore
vendored
14
bindings/ruby/ext/.gitignore
vendored
@ -1,9 +1,9 @@
|
||||
Makefile
|
||||
whisper.so
|
||||
ggml.c
|
||||
ggml.h
|
||||
ggml-alloc.c
|
||||
ggml-alloc.h
|
||||
whisper.bundle
|
||||
whisper.dll
|
||||
*.o
|
||||
*.a
|
||||
sources/*
|
||||
!sources/CMakeGraphVizOptions.cmake
|
||||
mkmf.log
|
||||
whisper.cpp
|
||||
whisper.h
|
||||
dr_wav.h
|
||||
|
@ -1,73 +0,0 @@
|
||||
require "tsort"
|
||||
|
||||
class Dependencies
|
||||
include TSort
|
||||
|
||||
def initialize(cmake, options)
|
||||
@cmake = cmake
|
||||
@options = options
|
||||
@static_lib_shape = nil
|
||||
@nodes = {}
|
||||
@graph = Hash.new {|h, k| h[k] = []}
|
||||
|
||||
generate_dot
|
||||
parse_dot
|
||||
end
|
||||
|
||||
def libs
|
||||
tsort.filter_map {|node|
|
||||
label, shape = @nodes[node]
|
||||
if shape == @static_lib_shape
|
||||
label.gsub(/\\n\([^)]+\)/, '')
|
||||
else
|
||||
nil
|
||||
end
|
||||
}.reverse.collect {|lib| "lib#{lib}.a"}
|
||||
end
|
||||
|
||||
def to_s
|
||||
libs.join(" ")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def dot_path
|
||||
File.join(__dir__, "build", "whisper.cpp.dot")
|
||||
end
|
||||
|
||||
def generate_dot
|
||||
args = ["-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF"]
|
||||
args << @options.to_s unless @options.to_s.empty?
|
||||
system @cmake, *args, exception: true
|
||||
end
|
||||
|
||||
def parse_dot
|
||||
File.open(dot_path).each_line do |line|
|
||||
case line
|
||||
when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/
|
||||
@static_lib_shape = $~[:shape]
|
||||
when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/
|
||||
node = $~[:node]
|
||||
label = $~[:label]
|
||||
shape = $~[:shape]
|
||||
@nodes[node] = [label, shape]
|
||||
when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/
|
||||
depender = $~[:depender]
|
||||
dependee = $~[:dependee]
|
||||
@graph[depender] << dependee
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def tsort_each_node
|
||||
@nodes.each_key do |node|
|
||||
yield node
|
||||
end
|
||||
end
|
||||
|
||||
def tsort_each_child(node)
|
||||
@graph[node].each do |child|
|
||||
yield child
|
||||
end
|
||||
end
|
||||
end
|
@ -1,22 +1,29 @@
|
||||
require "mkmf"
|
||||
require_relative "options"
|
||||
require_relative "dependencies"
|
||||
require 'mkmf'
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.c')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .")
|
||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
|
||||
|
||||
cmake = find_executable("cmake") || abort
|
||||
options = Options.new(cmake)
|
||||
have_library("gomp") rescue nil
|
||||
libs = Dependencies.new(cmake, options)
|
||||
|
||||
$INCFLAGS << " -Isources/include -Isources/ggml/include -Isources/examples"
|
||||
$LOCAL_LIBS << " #{libs}"
|
||||
$cleanfiles << " build #{libs}"
|
||||
|
||||
create_makefile "whisper" do |conf|
|
||||
conf << <<~EOF
|
||||
$(TARGET_SO): #{libs}
|
||||
#{libs}: cmake-targets
|
||||
cmake-targets:
|
||||
#{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options}
|
||||
#{"\t"}#{cmake} --build build --config Release --target common whisper
|
||||
EOF
|
||||
# need to use c++ compiler flags
|
||||
$CXXFLAGS << ' -std=c++11'
|
||||
# Set to true when building binary gems
|
||||
if enable_config('static-stdlib', false)
|
||||
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
||||
end
|
||||
|
||||
if enable_config('march-tune-native', false)
|
||||
$CFLAGS << ' -march=native -mtune=native'
|
||||
$CXXFLAGS << ' -march=native -mtune=native'
|
||||
end
|
||||
|
||||
create_makefile('whisper')
|
||||
|
87
bindings/ruby/ext/ggml-backend-impl.h
Normal file
87
bindings/ruby/ext/ggml-backend-impl.h
Normal file
@ -0,0 +1,87 @@
|
||||
#pragma once
|
||||
|
||||
// ggml-backend internal header
|
||||
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// Backend buffer
|
||||
//
|
||||
|
||||
typedef void * ggml_backend_buffer_context_t;
|
||||
|
||||
struct ggml_backend_buffer_i {
|
||||
void (*free_buffer) (ggml_backend_buffer_t buffer);
|
||||
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
|
||||
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
|
||||
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
|
||||
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
|
||||
};
|
||||
|
||||
struct ggml_backend_buffer {
|
||||
struct ggml_backend_buffer_i iface;
|
||||
|
||||
ggml_backend_t backend;
|
||||
ggml_backend_buffer_context_t context;
|
||||
|
||||
size_t size;
|
||||
};
|
||||
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
|
||||
struct ggml_backend * backend,
|
||||
struct ggml_backend_buffer_i iface,
|
||||
ggml_backend_buffer_context_t context,
|
||||
size_t size);
|
||||
|
||||
//
|
||||
// Backend
|
||||
//
|
||||
|
||||
typedef void * ggml_backend_context_t;
|
||||
|
||||
struct ggml_backend_i {
|
||||
const char * (*get_name)(ggml_backend_t backend);
|
||||
|
||||
void (*free)(ggml_backend_t backend);
|
||||
|
||||
// buffer allocation
|
||||
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
|
||||
|
||||
// get buffer alignment
|
||||
size_t (*get_alignment)(ggml_backend_t backend);
|
||||
|
||||
// tensor data access
|
||||
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
|
||||
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
void (*synchronize) (ggml_backend_t backend);
|
||||
|
||||
// (optional) copy tensor between different backends, allow for single-copy tranfers
|
||||
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
// compute graph with a plan
|
||||
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
|
||||
// compute graph without a plan
|
||||
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
// check if the backend supports an operation
|
||||
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
};
|
||||
|
||||
struct ggml_backend {
|
||||
struct ggml_backend_i iface;
|
||||
|
||||
ggml_backend_context_t context;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
950
bindings/ruby/ext/ggml-backend.c
Normal file
950
bindings/ruby/ext/ggml-backend.c
Normal file
@ -0,0 +1,950 @@
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-alloc.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define UNUSED GGML_UNUSED
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// backend buffer
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_buffer_init(
|
||||
struct ggml_backend * backend,
|
||||
struct ggml_backend_buffer_i iface,
|
||||
ggml_backend_buffer_context_t context,
|
||||
size_t size) {
|
||||
ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer));
|
||||
|
||||
GGML_ASSERT(iface.get_base != NULL);
|
||||
|
||||
(*buffer) = (struct ggml_backend_buffer) {
|
||||
/* .interface = */ iface,
|
||||
/* .backend = */ backend,
|
||||
/* .context = */ context,
|
||||
/* .size = */ size,
|
||||
};
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
||||
if (buffer == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (buffer->iface.free_buffer != NULL) {
|
||||
buffer->iface.free_buffer(buffer);
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
size_t ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) {
|
||||
return ggml_backend_get_alignment(buffer->backend);
|
||||
}
|
||||
|
||||
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
|
||||
return buffer->size;
|
||||
}
|
||||
|
||||
void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
void * base = buffer->iface.get_base(buffer);
|
||||
|
||||
GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL");
|
||||
|
||||
return base;
|
||||
}
|
||||
|
||||
size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
||||
// get_alloc_size is optional, defaults to ggml_nbytes
|
||||
if (buffer->iface.get_alloc_size) {
|
||||
return buffer->iface.get_alloc_size(buffer, tensor);
|
||||
}
|
||||
return ggml_nbytes(tensor);
|
||||
}
|
||||
|
||||
void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
||||
// init_tensor is optional
|
||||
if (buffer->iface.init_tensor) {
|
||||
buffer->iface.init_tensor(buffer, tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_backend_buffer_free_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
||||
// free_tensor is optional
|
||||
if (buffer->iface.free_tensor) {
|
||||
buffer->iface.free_tensor(buffer, tensor);
|
||||
}
|
||||
}
|
||||
|
||||
// backend
|
||||
|
||||
ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor) {
|
||||
return tensor->buffer ? tensor->buffer->backend : NULL;
|
||||
}
|
||||
|
||||
const char * ggml_backend_name(ggml_backend_t backend) {
|
||||
if (backend == NULL) {
|
||||
return "NULL";
|
||||
}
|
||||
return backend->iface.get_name(backend);
|
||||
}
|
||||
|
||||
void ggml_backend_free(ggml_backend_t backend) {
|
||||
if (backend == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
backend->iface.free(backend);
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) {
|
||||
return backend->iface.alloc_buffer(backend, size);
|
||||
}
|
||||
|
||||
size_t ggml_backend_get_alignment(ggml_backend_t backend) {
|
||||
return backend->iface.get_alignment(backend);
|
||||
}
|
||||
|
||||
void ggml_backend_tensor_set_async(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||
ggml_get_backend(tensor)->iface.set_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
|
||||
}
|
||||
|
||||
void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
ggml_get_backend(tensor)->iface.get_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
|
||||
}
|
||||
|
||||
void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||
ggml_backend_t backend = ggml_get_backend(tensor);
|
||||
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
GGML_ASSERT(backend != NULL && "tensor backend not set");
|
||||
|
||||
backend->iface.set_tensor_async(backend, tensor, data, offset, size);
|
||||
backend->iface.synchronize(backend);
|
||||
}
|
||||
|
||||
void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
ggml_backend_t backend = ggml_get_backend(tensor);
|
||||
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
GGML_ASSERT(backend != NULL && "tensor backend not set");
|
||||
|
||||
backend->iface.get_tensor_async(backend, tensor, data, offset, size);
|
||||
backend->iface.synchronize(backend);
|
||||
}
|
||||
|
||||
void ggml_backend_synchronize(ggml_backend_t backend) {
|
||||
backend->iface.synchronize(backend);
|
||||
}
|
||||
|
||||
ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
return backend->iface.graph_plan_create(backend, cgraph);
|
||||
}
|
||||
|
||||
void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
backend->iface.graph_plan_free(backend, plan);
|
||||
}
|
||||
|
||||
void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
backend->iface.graph_plan_compute(backend, plan);
|
||||
}
|
||||
|
||||
void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
backend->iface.graph_compute(backend, cgraph);
|
||||
}
|
||||
|
||||
bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
||||
return backend->iface.supports_op(backend, op);
|
||||
}
|
||||
|
||||
// backend copy
|
||||
|
||||
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
|
||||
if (a->type != b->type) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
if (a->ne[i] != b->ne[i]) {
|
||||
return false;
|
||||
}
|
||||
if (a->nb[i] != b->nb[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
|
||||
//printf("src: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", src->name, (int)src->ne[0], (int)src->ne[1], (int)src->ne[2], (int)src->ne[3], (int)src->nb[0], (int)src->nb[1], (int)src->nb[2], (int)src->nb[3]);
|
||||
//printf("dst: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], (int)dst->nb[0], (int)dst->nb[1], (int)dst->nb[2], (int)dst->nb[3]);
|
||||
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
|
||||
|
||||
// fprintf(stderr, "cpy tensor %s from %s to %s (%lu bytes)\n", src->name, ggml_backend_name(src->backend), ggml_backend_name(dst->backend), ggml_nbytes(src));
|
||||
|
||||
if (src == dst) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: allow backends to support copy to/from same backend
|
||||
|
||||
if (ggml_get_backend(dst)->iface.cpy_tensor_from != NULL) {
|
||||
ggml_get_backend(dst)->iface.cpy_tensor_from(ggml_get_backend(dst)->context, src, dst);
|
||||
} else if (ggml_get_backend(src)->iface.cpy_tensor_to != NULL) {
|
||||
ggml_get_backend(src)->iface.cpy_tensor_to(ggml_get_backend(src)->context, src, dst);
|
||||
} else {
|
||||
// shouldn't be hit when copying from/to CPU
|
||||
#ifndef NDEBUG
|
||||
fprintf(stderr, "ggml_backend_tensor_copy: neither cpy_tensor_from nor cpy_tensor_to are implemented for backends %s and %s, falling back to get/set\n", ggml_backend_name(src->buffer->backend), ggml_backend_name(dst->buffer->backend));
|
||||
#endif
|
||||
size_t nbytes = ggml_nbytes(src);
|
||||
void * data = malloc(nbytes);
|
||||
ggml_backend_tensor_get(src, data, 0, nbytes);
|
||||
ggml_backend_tensor_set(dst, data, 0, nbytes);
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
||||
// backend CPU
|
||||
|
||||
struct ggml_backend_cpu_context {
|
||||
int n_threads;
|
||||
void * work_data;
|
||||
size_t work_size;
|
||||
};
|
||||
|
||||
static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
|
||||
return "CPU";
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_free(ggml_backend_t backend) {
|
||||
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
||||
free(cpu_ctx->work_data);
|
||||
free(cpu_ctx);
|
||||
free(backend);
|
||||
}
|
||||
|
||||
static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
return (void *)buffer->context;
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
free(buffer->context);
|
||||
UNUSED(buffer);
|
||||
}
|
||||
|
||||
static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
|
||||
/* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer,
|
||||
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
|
||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||
/* .init_tensor = */ NULL, // no initialization required
|
||||
/* .free_tensor = */ NULL, // no cleanup required
|
||||
};
|
||||
|
||||
// for buffers from ptr, free is not called
|
||||
static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
|
||||
/* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
|
||||
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
|
||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||
/* .init_tensor = */ NULL,
|
||||
/* .free_tensor = */ NULL,
|
||||
};
|
||||
|
||||
static const size_t TENSOR_ALIGNMENT = 64; // should be enough for AVX 512
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_cpu_alloc_buffer(ggml_backend_t backend, size_t size) {
|
||||
size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
|
||||
void * data = malloc(size); // TODO: maybe use GGML_ALIGNED_MALLOC?
|
||||
|
||||
GGML_ASSERT(data != NULL && "failed to allocate buffer");
|
||||
|
||||
return ggml_backend_buffer_init(backend, cpu_backend_buffer_i, data, size);
|
||||
}
|
||||
|
||||
static size_t ggml_backend_cpu_get_alignment(ggml_backend_t backend) {
|
||||
return TENSOR_ALIGNMENT;
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_set_tensor_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
|
||||
memcpy((char *)tensor->data + offset, data, size);
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_get_tensor_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
|
||||
memcpy(data, (const char *)tensor->data + offset, size);
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_synchronize(ggml_backend_t backend) {
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_cpy_tensor_from(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
|
||||
ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_cpy_tensor_to(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
|
||||
ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src));
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
struct ggml_backend_plan_cpu {
|
||||
struct ggml_cplan cplan;
|
||||
struct ggml_cgraph cgraph;
|
||||
};
|
||||
|
||||
static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
||||
|
||||
struct ggml_backend_plan_cpu * cpu_plan = malloc(sizeof(struct ggml_backend_plan_cpu));
|
||||
|
||||
cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
|
||||
cpu_plan->cgraph = *cgraph;
|
||||
|
||||
if (cpu_plan->cplan.work_size > 0) {
|
||||
cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
|
||||
}
|
||||
|
||||
return cpu_plan;
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
|
||||
|
||||
free(cpu_plan->cplan.work_data);
|
||||
free(cpu_plan);
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
|
||||
|
||||
ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
||||
|
||||
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
|
||||
|
||||
if (cpu_ctx->work_size < cplan.work_size) {
|
||||
// TODO: may be faster to free and use malloc to avoid the copy
|
||||
cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
|
||||
cpu_ctx->work_size = cplan.work_size;
|
||||
}
|
||||
|
||||
cplan.work_data = cpu_ctx->work_data;
|
||||
|
||||
ggml_graph_compute(cgraph, &cplan);
|
||||
}
|
||||
|
||||
static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
||||
return true;
|
||||
UNUSED(backend);
|
||||
UNUSED(op);
|
||||
}
|
||||
|
||||
static struct ggml_backend_i cpu_backend_i = {
|
||||
/* .get_name = */ ggml_backend_cpu_name,
|
||||
/* .free = */ ggml_backend_cpu_free,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_get_alignment,
|
||||
/* .set_tensor_async = */ ggml_backend_cpu_set_tensor_async,
|
||||
/* .get_tensor_async = */ ggml_backend_cpu_get_tensor_async,
|
||||
/* .synchronize = */ ggml_backend_cpu_synchronize,
|
||||
/* .cpy_tensor_from = */ ggml_backend_cpu_cpy_tensor_from,
|
||||
/* .cpy_tensor_to = */ ggml_backend_cpu_cpy_tensor_to,
|
||||
/* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
|
||||
/* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
|
||||
/* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
|
||||
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_cpu_supports_op,
|
||||
};
|
||||
|
||||
ggml_backend_t ggml_backend_cpu_init(void) {
|
||||
struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context));
|
||||
|
||||
ctx->n_threads = GGML_DEFAULT_N_THREADS;
|
||||
ctx->work_data = NULL;
|
||||
ctx->work_size = 0;
|
||||
|
||||
ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend));
|
||||
|
||||
*cpu_backend = (struct ggml_backend) {
|
||||
/* .interface = */ cpu_backend_i,
|
||||
/* .context = */ ctx
|
||||
};
|
||||
return cpu_backend;
|
||||
}
|
||||
|
||||
bool ggml_backend_is_cpu(ggml_backend_t backend) {
|
||||
return backend->iface.get_name == ggml_backend_cpu_name;
|
||||
}
|
||||
|
||||
void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
|
||||
GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
||||
|
||||
struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
||||
ctx->n_threads = n_threads;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size) {
|
||||
return ggml_backend_buffer_init(backend_cpu, cpu_backend_buffer_i_from_ptr, ptr, size);
|
||||
}
|
||||
|
||||
// scheduler
|
||||
|
||||
#define GGML_MAX_BACKENDS 4
|
||||
#define GGML_MAX_SPLITS 256
|
||||
#define GGML_MAX_SPLIT_INPUTS 16
|
||||
|
||||
struct ggml_backend_sched_split {
|
||||
ggml_tallocr_t tallocr;
|
||||
int i_start;
|
||||
int i_end;
|
||||
struct ggml_tensor * inputs[GGML_MAX_SPLIT_INPUTS];
|
||||
int n_inputs;
|
||||
struct ggml_cgraph * graph;
|
||||
};
|
||||
|
||||
struct ggml_backend_sched {
|
||||
int n_backends;
|
||||
ggml_backend_t backends[GGML_MAX_BACKENDS];
|
||||
ggml_tallocr_t tallocs[GGML_MAX_BACKENDS];
|
||||
|
||||
ggml_gallocr_t galloc;
|
||||
|
||||
struct ggml_hash_set hash_set;
|
||||
ggml_tallocr_t * node_talloc; // [hash_set.size]
|
||||
struct ggml_tensor * (* node_copies)[GGML_MAX_BACKENDS]; // [hash_set.size][GGML_MAX_BACKENDS]
|
||||
|
||||
struct ggml_cgraph * graph;
|
||||
struct ggml_backend_sched_split splits[GGML_MAX_SPLITS];
|
||||
int n_splits;
|
||||
|
||||
struct ggml_context * ctx;
|
||||
|
||||
// align context_buffer to GGML_MEM_ALIGN
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(GGML_MEM_ALIGN))
|
||||
#else
|
||||
__attribute__((aligned(GGML_MEM_ALIGN)))
|
||||
#endif
|
||||
char context_buffer[GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS*sizeof(struct ggml_tensor) + GGML_MAX_SPLITS*sizeof(struct ggml_cgraph)];
|
||||
};
|
||||
|
||||
#define hash_id(node) ggml_hash_find_or_insert(sched->hash_set, node)
|
||||
#define node_allocr(node) sched->node_talloc[hash_id(node)]
|
||||
|
||||
static bool ggml_is_view_op(enum ggml_op op) {
|
||||
return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE;
|
||||
}
|
||||
|
||||
// returns the priority of the backend, lower is better
|
||||
static int sched_backend_prio(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
if (sched->backends[i] == backend) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
static int sched_allocr_prio(ggml_backend_sched_t sched, ggml_tallocr_t allocr) {
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
if (sched->tallocs[i] == allocr) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
// returns the backend that should be used for the node based on the current locations
|
||||
char causes[GGML_DEFAULT_GRAPH_SIZE*4 + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS][128]; // debug, remove
|
||||
static ggml_backend_t sched_backend_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * node) {
|
||||
// if the dst tensor is already allocated in a buffer, we must assume that it is critical to keep it there
|
||||
// ie. kv cache updates
|
||||
// note that this doesn't allow fallback to CPU. need to add output tensors to the splits to copy the data back to the original backend.
|
||||
// dst
|
||||
ggml_backend_t cur_backend = ggml_get_backend(node);
|
||||
if (cur_backend != NULL) {
|
||||
sprintf(causes[hash_id(node)], "1.dst");
|
||||
return cur_backend;
|
||||
}
|
||||
|
||||
// view_src
|
||||
if (node->view_src != NULL && ggml_get_backend(node->view_src) != NULL) {
|
||||
sprintf(causes[hash_id(node)], "1.vsrc");
|
||||
return ggml_get_backend(node->view_src);
|
||||
}
|
||||
|
||||
// src
|
||||
int cur_prio = INT_MAX;
|
||||
size_t cur_size = 0;
|
||||
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
const struct ggml_tensor * src = node->src[i];
|
||||
if (src == NULL) {
|
||||
break;
|
||||
}
|
||||
ggml_backend_t src_backend = ggml_get_backend(src);
|
||||
if (src_backend != NULL) {
|
||||
int src_prio = sched_backend_prio(sched, src_backend);
|
||||
size_t src_size = ggml_nbytes(src);
|
||||
if (src_prio < cur_prio && src_size >= cur_size) {
|
||||
cur_prio = src_prio;
|
||||
cur_size = src_size;
|
||||
cur_backend = src_backend;
|
||||
sprintf(causes[hash_id(node)], "1.src%d", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return cur_backend;
|
||||
}
|
||||
|
||||
static char * fmt_size(size_t size) {
|
||||
static char buffer[128];
|
||||
if (size >= 1024*1024) {
|
||||
sprintf(buffer, "%zuM", size/1024/1024);
|
||||
} else {
|
||||
sprintf(buffer, "%zuK", size/1024);
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static void sched_print_assignments(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
int cur_split = 0;
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) {
|
||||
ggml_backend_t split_backend = ggml_tallocr_get_buffer(sched->splits[cur_split].tallocr)->backend;
|
||||
fprintf(stderr, "\n## SPLIT #%d: %s # %d inputs: ", cur_split, ggml_backend_name(split_backend), sched->splits[cur_split].n_inputs);
|
||||
for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) {
|
||||
fprintf(stderr, "[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name, fmt_size(ggml_nbytes(sched->splits[cur_split].inputs[j])));
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
cur_split++;
|
||||
}
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
if (ggml_is_view_op(node->op)) {
|
||||
continue;
|
||||
}
|
||||
ggml_tallocr_t node_allocr = node_allocr(node);
|
||||
ggml_backend_t node_backend = node_allocr ? ggml_tallocr_get_buffer(node_allocr)->backend : NULL;
|
||||
fprintf(stderr, "node #%3d (%10.10s): %20.20s (%4.4s) [%4.4s %8.8s]:", i, ggml_op_name(node->op), node->name, fmt_size(ggml_nbytes(node)), node_allocr ? ggml_backend_name(node_backend) : "NULL", causes[hash_id(node)]);
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
struct ggml_tensor * src = node->src[j];
|
||||
if (src == NULL) {
|
||||
break;
|
||||
}
|
||||
ggml_tallocr_t src_allocr = node_allocr(src);
|
||||
ggml_backend_t src_backend = src_allocr ? ggml_tallocr_get_buffer(src_allocr)->backend : NULL;
|
||||
fprintf(stderr, " %20.20s (%4.4s) [%4.4s %8.8s]", src->name, fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", causes[hash_id(src)]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// creates a copy of the tensor with the same memory layout
|
||||
static struct ggml_tensor * ggml_dup_tensor_layout(struct ggml_context * ctx, const struct ggml_tensor * tensor) {
|
||||
struct ggml_tensor * dup = ggml_dup_tensor(ctx, tensor);
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
dup->nb[i] = tensor->nb[i];
|
||||
}
|
||||
return dup;
|
||||
}
|
||||
|
||||
// assigns backends to ops and splits the graph into subgraphs that can be computed on the same backend
|
||||
// TODO: merge passes
|
||||
static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
// reset state
|
||||
size_t hash_size = sched->hash_set.size;
|
||||
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size);
|
||||
memset(sched->node_talloc, 0, sizeof(sched->node_talloc[0]) * hash_size);
|
||||
memset(sched->node_copies, 0, sizeof(sched->node_copies[0]) * hash_size);
|
||||
sched->n_splits = 0;
|
||||
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size = */ sizeof(sched->context_buffer),
|
||||
/*.mem_buffer = */ sched->context_buffer,
|
||||
/*.no_alloc = */ true
|
||||
};
|
||||
|
||||
if (sched->ctx != NULL) {
|
||||
ggml_free(sched->ctx);
|
||||
}
|
||||
|
||||
sched->ctx = ggml_init(params);
|
||||
|
||||
// pass 1: assign backends to ops with allocated inputs
|
||||
for (int i = 0; i < graph->n_leafs; i++) {
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
if (node_allocr(leaf) != NULL) {
|
||||
// do not overwrite user assignments
|
||||
continue;
|
||||
}
|
||||
ggml_backend_t leaf_backend = ggml_get_backend(leaf);
|
||||
if (leaf_backend == NULL && leaf->view_src != NULL) {
|
||||
leaf_backend = ggml_get_backend(leaf->view_src);
|
||||
}
|
||||
if (leaf_backend != NULL) {
|
||||
node_allocr(leaf) = ggml_backend_sched_get_tallocr(sched, leaf_backend);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
if (node_allocr(node) != NULL) {
|
||||
// do not overwrite user assignments
|
||||
continue;
|
||||
}
|
||||
ggml_backend_t node_backend = sched_backend_from_cur(sched, node);
|
||||
if (node_backend != NULL) {
|
||||
node_allocr(node) = ggml_backend_sched_get_tallocr(sched, node_backend);
|
||||
}
|
||||
}
|
||||
//printf("PASS 1 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
|
||||
|
||||
// pass 2: assign backends to ops from current assignments
|
||||
// TODO:
|
||||
// - reuse sched_backend_from_cur
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
ggml_tallocr_t node_allocr = node_allocr(node);
|
||||
if (node_allocr == NULL) {
|
||||
int cur_prio = INT_MAX;
|
||||
size_t cur_size = 0;
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
struct ggml_tensor * src = node->src[j];
|
||||
if (src == NULL) {
|
||||
break;
|
||||
}
|
||||
ggml_tallocr_t src_allocr = node_allocr(src);
|
||||
if (src_allocr != NULL) {
|
||||
int src_prio = sched_allocr_prio(sched, src_allocr);
|
||||
size_t src_size = ggml_nbytes(src);
|
||||
if (src_prio < cur_prio && src_size >= cur_size) {
|
||||
cur_prio = src_prio;
|
||||
cur_size = src_size;
|
||||
node_allocr = src_allocr;
|
||||
sprintf(causes[hash_id(node)], "2.src%d", j);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (node_allocr != NULL) {
|
||||
node_allocr(node) = node_allocr;
|
||||
}
|
||||
}
|
||||
}
|
||||
//printf("PASS 2 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
|
||||
|
||||
// pass 3: assign backends to remaining src from dst (should only be leafs)
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
ggml_tallocr_t node_allocr = node_allocr(node);
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
struct ggml_tensor * src = node->src[j];
|
||||
if (src == NULL) {
|
||||
break;
|
||||
}
|
||||
ggml_tallocr_t src_allocr = node_allocr(src);
|
||||
if (src_allocr == NULL) {
|
||||
node_allocr(src) = node_allocr;
|
||||
}
|
||||
}
|
||||
}
|
||||
//printf("PASS 3 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
|
||||
|
||||
// pass 4: split graph, find tensors that need to be copied
|
||||
// TODO:
|
||||
// - when switching from a less preferred backend to a more preferred backend, check if it is possible to move the switch to an earlier point for the same cost
|
||||
// find first backend
|
||||
int cur_split = 0;
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
if (node->view_src == NULL) {
|
||||
sched->splits[0].tallocr = node_allocr(node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
sched->splits[0].i_start = 0;
|
||||
sched->splits[0].n_inputs = 0;
|
||||
memset(sched->splits[0].inputs, 0, sizeof(sched->splits[0].inputs)); //HACK
|
||||
ggml_tallocr_t cur_allocr = sched->splits[0].tallocr;
|
||||
size_t cur_backend_id = sched_allocr_prio(sched, cur_allocr);
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
|
||||
if (ggml_is_view_op(node->op)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_tallocr_t node_allocr = node_allocr(node);
|
||||
|
||||
if (node_allocr != cur_allocr) {
|
||||
sched->splits[cur_split].i_end = i;
|
||||
cur_split++;
|
||||
GGML_ASSERT(cur_split < GGML_MAX_SPLITS);
|
||||
sched->splits[cur_split].tallocr = node_allocr;
|
||||
sched->splits[cur_split].i_start = i;
|
||||
sched->splits[cur_split].n_inputs = 0;
|
||||
memset(sched->splits[cur_split].inputs, 0, sizeof(sched->splits[cur_split].inputs)); //HACK
|
||||
cur_allocr = node_allocr;
|
||||
cur_backend_id = sched_allocr_prio(sched, cur_allocr);
|
||||
}
|
||||
|
||||
// find inputs that are not on the same backend
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
struct ggml_tensor * src = node->src[j];
|
||||
if (src == NULL) {
|
||||
break;
|
||||
}
|
||||
ggml_tallocr_t src_allocr = node_allocr(src);
|
||||
if (src_allocr != node_allocr) {
|
||||
int n_inputs = sched->splits[cur_split].n_inputs++;
|
||||
GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS);
|
||||
sched->splits[cur_split].inputs[n_inputs] = (struct ggml_tensor *)src;
|
||||
|
||||
// create copies
|
||||
size_t id = hash_id(src);
|
||||
if (sched->node_copies[id][cur_backend_id] == NULL) {
|
||||
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
||||
sched->node_copies[id][cur_backend_id] = tensor_copy;
|
||||
node_allocr(tensor_copy) = cur_allocr;
|
||||
ggml_backend_t backend = ggml_tallocr_get_buffer(cur_allocr)->backend;
|
||||
ggml_format_name(tensor_copy, "%s#%s", ggml_backend_name(backend), src->name);
|
||||
}
|
||||
node->src[j] = sched->node_copies[id][cur_backend_id];
|
||||
}
|
||||
}
|
||||
}
|
||||
sched->splits[cur_split].i_end = graph->n_nodes;
|
||||
sched->n_splits = cur_split + 1;
|
||||
|
||||
//fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); fflush(stdout);
|
||||
|
||||
#if 1
|
||||
// sanity check: all sources should have the same backend as the node
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
ggml_tallocr_t node_allocr = node_allocr(node);
|
||||
if (node_allocr == NULL) {
|
||||
fprintf(stderr, "!!!!!!! %s has no backend\n", node->name);
|
||||
}
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
struct ggml_tensor * src = node->src[j];
|
||||
if (src == NULL) {
|
||||
break;
|
||||
}
|
||||
ggml_tallocr_t src_allocr = node_allocr(src);
|
||||
if (src_allocr != node_allocr /* && src_backend != NULL */) { // ignore nulls for now
|
||||
fprintf(stderr, "!!!! %s has backend %s, src %d (%s) has backend %s\n",
|
||||
node->name, node_allocr ? ggml_backend_name(ggml_tallocr_get_buffer(node_allocr)->backend) : "NULL",
|
||||
j, src->name, src_allocr ? ggml_backend_name(ggml_tallocr_get_buffer(src_allocr)->backend) : "NULL");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// create copies of the graph for each split
|
||||
// FIXME: avoid this copy, pass split inputs to ggml_gallocr_alloc_graph_n in some other way
|
||||
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_MAX_SPLIT_INPUTS, false);
|
||||
for (int i = 0; i < sched->n_splits; i++) {
|
||||
struct ggml_backend_sched_split * split = &sched->splits[i];
|
||||
split->graph = ggml_graph_view(sched->ctx, graph, split->i_start, split->i_end);
|
||||
|
||||
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
|
||||
for (int j = 0; j < split->n_inputs; j++) {
|
||||
struct ggml_tensor * input = split->inputs[j];
|
||||
struct ggml_tensor * input_cpy = sched->node_copies[hash_id(input)][sched_allocr_prio(sched, split->tallocr)];
|
||||
input_cpy->src[0] = input;
|
||||
graph_copy->nodes[graph_copy->n_nodes++] = input_cpy;
|
||||
}
|
||||
|
||||
for (int j = split->i_start; j < split->i_end; j++) {
|
||||
graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j];
|
||||
}
|
||||
}
|
||||
sched->graph = graph_copy;
|
||||
}
|
||||
|
||||
static void sched_alloc_splits(ggml_backend_sched_t sched) {
|
||||
ggml_gallocr_alloc_graph_n(
|
||||
sched->galloc,
|
||||
sched->graph,
|
||||
sched->hash_set,
|
||||
sched->node_talloc);
|
||||
}
|
||||
|
||||
static void sched_compute_splits(ggml_backend_sched_t sched) {
|
||||
uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
|
||||
uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
|
||||
|
||||
struct ggml_backend_sched_split * splits = sched->splits;
|
||||
|
||||
for (int i = 0; i < sched->n_splits; i++) {
|
||||
struct ggml_backend_sched_split * split = &splits[i];
|
||||
ggml_backend_t split_backend = ggml_tallocr_get_buffer(split->tallocr)->backend;
|
||||
int split_backend_id = sched_backend_prio(sched, split_backend);
|
||||
|
||||
// copy the input tensors to the split backend
|
||||
uint64_t copy_start_us = ggml_time_us();
|
||||
for (int j = 0; j < split->n_inputs; j++) {
|
||||
struct ggml_tensor * input_cpy = sched->node_copies[hash_id(split->inputs[j])][sched_backend_prio(sched, split_backend)];
|
||||
if (split->inputs[j]->buffer == NULL) {
|
||||
if (split->inputs[j]->view_src == NULL) {
|
||||
fprintf(stderr, "input %s has no buffer and no view_src\n", split->inputs[j]->name);
|
||||
exit(1);
|
||||
}
|
||||
struct ggml_tensor * view = split->inputs[j];
|
||||
view->backend = view->view_src->backend;
|
||||
view->buffer = view->view_src->buffer;
|
||||
view->data = (char *)view->view_src->data + view->view_offs;
|
||||
ggml_backend_buffer_init_tensor(ggml_backend_sched_get_buffer(sched, view->buffer->backend), view);
|
||||
}
|
||||
if (input_cpy->buffer == NULL) {
|
||||
fprintf(stderr, "input_cpy %s has no buffer\n", input_cpy->name);
|
||||
exit(1);
|
||||
}
|
||||
GGML_ASSERT(split->inputs[j]->buffer->backend != input_cpy->buffer->backend);
|
||||
GGML_ASSERT(input_cpy->buffer->backend == split_backend);
|
||||
ggml_backend_tensor_copy(split->inputs[j], input_cpy);
|
||||
}
|
||||
// ggml_backend_synchronize(split_backend);
|
||||
int64_t copy_end_us = ggml_time_us();
|
||||
copy_us[split_backend_id] += copy_end_us - copy_start_us;
|
||||
|
||||
#if 0
|
||||
char split_filename[GGML_MAX_NAME];
|
||||
snprintf(split_filename, GGML_MAX_NAME, "split_%i_%s.dot", i, ggml_backend_name(split_backend));
|
||||
ggml_graph_dump_dot(split->graph, NULL, split_filename);
|
||||
#endif
|
||||
|
||||
uint64_t compute_start_us = ggml_time_us();
|
||||
ggml_backend_graph_compute(split_backend, split->graph);
|
||||
// ggml_backend_synchronize(split_backend);
|
||||
uint64_t compute_end_us = ggml_time_us();
|
||||
compute_us[split_backend_id] += compute_end_us - compute_start_us;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// per-backend timings
|
||||
fprintf(stderr, "sched_compute_splits times (%d splits):\n", sched->n_splits);
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
if (copy_us[i] > 0 || compute_us[i] > 0) {
|
||||
fprintf(stderr, "\t%5.5s: %lu us copy, %lu us compute\n", ggml_backend_name(sched->backends[i]), copy_us[i], compute_us[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void sched_reset(ggml_backend_sched_t sched) {
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
ggml_tallocr_reset(sched->tallocs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, int n_backends) {
|
||||
GGML_ASSERT(n_backends <= GGML_MAX_BACKENDS);
|
||||
|
||||
struct ggml_backend_sched * sched = malloc(sizeof(struct ggml_backend_sched));
|
||||
memset(sched, 0, sizeof(struct ggml_backend_sched));
|
||||
|
||||
fprintf(stderr, "ggml_backend_sched size: %lu KB\n", sizeof(struct ggml_backend_sched)/1024);
|
||||
|
||||
sched->n_backends = n_backends;
|
||||
for (int i = 0; i < n_backends; i++) {
|
||||
sched->backends[i] = backends[i];
|
||||
}
|
||||
|
||||
sched->galloc = ggml_gallocr_new();
|
||||
|
||||
// init measure allocs for each backend
|
||||
for (int i = 0; i < n_backends; i++) {
|
||||
sched->tallocs[i] = ggml_tallocr_new_measure_from_backend(backends[i]);
|
||||
}
|
||||
|
||||
return sched;
|
||||
}
|
||||
|
||||
void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
||||
if (sched == NULL) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
ggml_tallocr_free(sched->tallocs[i]);
|
||||
}
|
||||
ggml_gallocr_free(sched->galloc);
|
||||
free(sched->hash_set.keys);
|
||||
free(sched->node_talloc);
|
||||
free(sched->node_copies);
|
||||
free(sched);
|
||||
}
|
||||
|
||||
void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
||||
// initialize hash tables
|
||||
size_t hash_size = measure_graph->visited_hash_table.size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS;
|
||||
sched->hash_set.size = hash_size;
|
||||
sched->hash_set.keys = malloc(sizeof(sched->hash_set.keys[0]) * hash_size);
|
||||
sched->node_talloc = malloc(sizeof(sched->node_talloc[0]) * hash_size);
|
||||
sched->node_copies = malloc(sizeof(sched->node_copies[0]) * hash_size);
|
||||
|
||||
sched_split_graph(sched, measure_graph);
|
||||
sched_alloc_splits(sched);
|
||||
|
||||
// allocate buffers and reset allocators
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
size_t size = ggml_tallocr_max_size(sched->tallocs[i]);
|
||||
ggml_tallocr_free(sched->tallocs[i]);
|
||||
sched->tallocs[i] = ggml_tallocr_new_from_backend(sched->backends[i], size);
|
||||
}
|
||||
|
||||
sched_reset(sched);
|
||||
}
|
||||
|
||||
void ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
GGML_ASSERT(sched->hash_set.size >= graph->visited_hash_table.size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
|
||||
|
||||
sched_split_graph(sched, graph);
|
||||
sched_alloc_splits(sched);
|
||||
sched_compute_splits(sched);
|
||||
sched_reset(sched);
|
||||
}
|
||||
|
||||
ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
||||
int backend_index = sched_backend_prio(sched, backend);
|
||||
return sched->tallocs[backend_index];
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_sched_get_buffer(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
||||
int backend_index = sched_backend_prio(sched, backend);
|
||||
return ggml_tallocr_get_buffer(sched->tallocs[backend_index]);
|
||||
}
|
||||
|
||||
void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
|
||||
int backend_index = sched_backend_prio(sched, backend);
|
||||
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
||||
node_allocr(node) = sched->tallocs[backend_index];
|
||||
}
|
136
bindings/ruby/ext/ggml-backend.h
Normal file
136
bindings/ruby/ext/ggml-backend.h
Normal file
@ -0,0 +1,136 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-alloc.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// Backend buffer
|
||||
//
|
||||
|
||||
struct ggml_backend_buffer;
|
||||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||
|
||||
// backend buffer functions
|
||||
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
GGML_API void ggml_backend_buffer_free_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||
|
||||
//
|
||||
// Backend
|
||||
//
|
||||
|
||||
struct ggml_backend;
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
typedef void * ggml_backend_graph_plan_t;
|
||||
|
||||
GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
|
||||
|
||||
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
||||
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||
|
||||
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_tensor_set_async( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
|
||||
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
|
||||
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||
|
||||
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
|
||||
// tensor copy between different backends
|
||||
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
//
|
||||
// CPU backend
|
||||
//
|
||||
|
||||
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
||||
|
||||
GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
|
||||
|
||||
// Create a backend buffer from an existing pointer
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);
|
||||
|
||||
|
||||
//
|
||||
// Backend scheduler
|
||||
//
|
||||
|
||||
// The backend scheduler allows for multiple backends to be used together
|
||||
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
||||
// The backends are selected based on:
|
||||
// - the backend that supports the operation
|
||||
// - the location of the pre-allocated tensors (e.g. the weights)
|
||||
/*
|
||||
Example usage:
|
||||
|
||||
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends);
|
||||
// sched is initialized with measure allocators and cannot be used until allocated with a measure graph
|
||||
|
||||
// initialize buffers from a measure graph
|
||||
measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed
|
||||
|
||||
// in build_graph:
|
||||
build_graph(...) {
|
||||
// allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer)
|
||||
alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
|
||||
ggml_allocr_alloc(alloc_cpu, tensor);
|
||||
|
||||
// manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
|
||||
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||
ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
|
||||
}
|
||||
|
||||
// allocate backend buffers from measure graph
|
||||
ggml_backend_sched_init_measure(sched, measure_graph);
|
||||
|
||||
// the scheduler is now ready to compute graphs
|
||||
|
||||
// compute
|
||||
graph = build_graph(sched);
|
||||
ggml_backend_sched_graph_compute(sched, graph);
|
||||
*/
|
||||
|
||||
struct ggml_backend_sched;
|
||||
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
||||
|
||||
// Initialize a backend scheduler
|
||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, int n_backends);
|
||||
|
||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||
|
||||
// Initialize backend buffers from a measure graph
|
||||
GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
||||
|
||||
GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
|
||||
// Allocate a graph on the backend scheduler
|
||||
GGML_API void ggml_backend_sched_graph_compute(
|
||||
ggml_backend_sched_t sched,
|
||||
struct ggml_cgraph * graph);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
249
bindings/ruby/ext/ggml-impl.h
Normal file
249
bindings/ruby/ext/ggml-impl.h
Normal file
@ -0,0 +1,249 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h> // memcpy
|
||||
#include <math.h> // fabsf
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// static_assert should be a #define, but if it's not,
|
||||
// fall back to the _Static_assert C11 keyword.
|
||||
// if C99 - static_assert is noop
|
||||
// ref: https://stackoverflow.com/a/53923785/4039976
|
||||
#ifndef static_assert
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
||||
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
||||
#else
|
||||
#define static_assert(cond, msg) struct global_scope_noop_trick
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
|
||||
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
|
||||
#ifndef __FMA__
|
||||
#define __FMA__
|
||||
#endif
|
||||
#ifndef __F16C__
|
||||
#define __F16C__
|
||||
#endif
|
||||
#ifndef __SSE3__
|
||||
#define __SSE3__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// 16-bit float
|
||||
// on Arm, we use __fp16
|
||||
// on x86, we use uint16_t
|
||||
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||
|
||||
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
||||
//
|
||||
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
||||
//
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x))
|
||||
#define GGML_COMPUTE_FP32_TO_FP16(x) (x)
|
||||
|
||||
#define GGML_FP16_TO_FP32(x) ((float) (x))
|
||||
#define GGML_FP32_TO_FP16(x) (x)
|
||||
|
||||
#else
|
||||
|
||||
#ifdef __wasm_simd128__
|
||||
#include <wasm_simd128.h>
|
||||
#else
|
||||
#ifdef __POWER9_VECTOR__
|
||||
#include <altivec.h>
|
||||
#undef bool
|
||||
#define bool _Bool
|
||||
#else
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
|
||||
#if !defined(__riscv)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __riscv_v_intrinsic
|
||||
#include <riscv_vector.h>
|
||||
#endif
|
||||
|
||||
#ifdef __F16C__
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
|
||||
#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
|
||||
#else
|
||||
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
|
||||
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
|
||||
#endif
|
||||
|
||||
#elif defined(__POWER9_VECTOR__)
|
||||
|
||||
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||
/* the inline asm below is about 12% faster than the lookup method */
|
||||
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
||||
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||
|
||||
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||
register float f;
|
||||
register double d;
|
||||
__asm__(
|
||||
"mtfprd %0,%2\n"
|
||||
"xscvhpdp %0,%0\n"
|
||||
"frsp %1,%0\n" :
|
||||
/* temp */ "=d"(d),
|
||||
/* out */ "=f"(f):
|
||||
/* in */ "r"(h));
|
||||
return f;
|
||||
}
|
||||
|
||||
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||
register double d;
|
||||
register ggml_fp16_t r;
|
||||
__asm__( /* xscvdphp can work on double or single precision */
|
||||
"xscvdphp %0,%2\n"
|
||||
"mffprd %1,%0\n" :
|
||||
/* temp */ "=d"(d),
|
||||
/* out */ "=r"(r):
|
||||
/* in */ "f"(f));
|
||||
return r;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// FP16 <-> FP32
|
||||
// ref: https://github.com/Maratyszcza/FP16
|
||||
|
||||
static inline float fp32_from_bits(uint32_t w) {
|
||||
union {
|
||||
uint32_t as_bits;
|
||||
float as_value;
|
||||
} fp32;
|
||||
fp32.as_bits = w;
|
||||
return fp32.as_value;
|
||||
}
|
||||
|
||||
static inline uint32_t fp32_to_bits(float f) {
|
||||
union {
|
||||
float as_value;
|
||||
uint32_t as_bits;
|
||||
} fp32;
|
||||
fp32.as_value = f;
|
||||
return fp32.as_bits;
|
||||
}
|
||||
|
||||
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||
const uint32_t w = (uint32_t) h << 16;
|
||||
const uint32_t sign = w & UINT32_C(0x80000000);
|
||||
const uint32_t two_w = w + w;
|
||||
|
||||
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
const float exp_scale = 0x1.0p-112f;
|
||||
#else
|
||||
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
|
||||
#endif
|
||||
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
||||
|
||||
const uint32_t magic_mask = UINT32_C(126) << 23;
|
||||
const float magic_bias = 0.5f;
|
||||
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
|
||||
|
||||
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
|
||||
const uint32_t result = sign |
|
||||
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
|
||||
return fp32_from_bits(result);
|
||||
}
|
||||
|
||||
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
const float scale_to_inf = 0x1.0p+112f;
|
||||
const float scale_to_zero = 0x1.0p-110f;
|
||||
#else
|
||||
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
|
||||
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
|
||||
#endif
|
||||
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
|
||||
|
||||
const uint32_t w = fp32_to_bits(f);
|
||||
const uint32_t shl1_w = w + w;
|
||||
const uint32_t sign = w & UINT32_C(0x80000000);
|
||||
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
|
||||
if (bias < UINT32_C(0x71000000)) {
|
||||
bias = UINT32_C(0x71000000);
|
||||
}
|
||||
|
||||
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
|
||||
const uint32_t bits = fp32_to_bits(base);
|
||||
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
|
||||
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
|
||||
const uint32_t nonsign = exp_bits + mantissa_bits;
|
||||
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
|
||||
}
|
||||
|
||||
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||
|
||||
#endif // __F16C__
|
||||
|
||||
#endif // __ARM_NEON
|
||||
|
||||
// precomputed f32 table for f16 (256 KB)
|
||||
// defined in ggml.c, initialized in ggml_init()
|
||||
extern float ggml_table_f32_f16[1 << 16];
|
||||
|
||||
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
||||
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
||||
// This is also true for POWER9.
|
||||
#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
|
||||
|
||||
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
||||
uint16_t s;
|
||||
memcpy(&s, &f, sizeof(uint16_t));
|
||||
return ggml_table_f32_f16[s];
|
||||
}
|
||||
|
||||
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
|
||||
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||
|
||||
#endif
|
||||
|
||||
#define GGML_HASHTABLE_FULL ((size_t)-1)
|
||||
#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
|
||||
|
||||
bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||
|
||||
// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
|
||||
size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||
|
||||
// returns GGML_HAHSHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
|
||||
size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||
|
||||
// return index, asserts if table is full
|
||||
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
7282
bindings/ruby/ext/ggml-quants.c
Normal file
7282
bindings/ruby/ext/ggml-quants.c
Normal file
File diff suppressed because it is too large
Load Diff
224
bindings/ruby/ext/ggml-quants.h
Normal file
224
bindings/ruby/ext/ggml-quants.h
Normal file
@ -0,0 +1,224 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define QK4_0 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
||||
} block_q4_0;
|
||||
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
||||
|
||||
#define QK4_1 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
ggml_fp16_t m; // min
|
||||
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
||||
} block_q4_1;
|
||||
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_fp16_t) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
||||
|
||||
#define QK5_0 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} block_q5_0;
|
||||
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
||||
|
||||
#define QK5_1 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
ggml_fp16_t m; // min
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
||||
} block_q5_1;
|
||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
||||
|
||||
#define QK8_0 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_0;
|
||||
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
||||
|
||||
#define QK8_1 32
|
||||
typedef struct {
|
||||
float d; // delta
|
||||
float s; // d * sum(qs[i])
|
||||
int8_t qs[QK8_1]; // quants
|
||||
} block_q8_1;
|
||||
static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding");
|
||||
|
||||
//
|
||||
// Super-block quantization structures
|
||||
//
|
||||
|
||||
// Super-block size
|
||||
#ifdef GGML_QKK_64
|
||||
#define QK_K 64
|
||||
#define K_SCALE_SIZE 4
|
||||
#else
|
||||
#define QK_K 256
|
||||
#define K_SCALE_SIZE 12
|
||||
#endif
|
||||
|
||||
// 2-bit quantization
|
||||
// weight is represented as x = a * q + b
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 2.5625 bits per weight
|
||||
typedef struct {
|
||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||
uint8_t qs[QK_K/4]; // quants
|
||||
ggml_fp16_t d; // super-block scale for quantized scales
|
||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
||||
} block_q2_K;
|
||||
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
||||
|
||||
// 3-bit quantization
|
||||
// weight is represented as x = a * q
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 3.4375 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
uint8_t scales[2];
|
||||
ggml_fp16_t d; // super-block scale
|
||||
} block_q3_K;
|
||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
uint8_t scales[12]; // scales, quantized with 6 bits
|
||||
ggml_fp16_t d; // super-block scale
|
||||
} block_q3_K;
|
||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 4-bit quantization
|
||||
// 8 blocks of 32 elements each
|
||||
// weight is represented as x = a * q + b
|
||||
// Effectively 4.5 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
ggml_fp16_t d[2]; // super-block scales/mins
|
||||
uint8_t scales[2]; // 4-bit block scales/mins
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // super-block scale for quantized scales
|
||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 5-bit quantization
|
||||
// 8 blocks of 32 elements each
|
||||
// weight is represented as x = a * q + b
|
||||
// Effectively 5.5 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // super-block scale
|
||||
int8_t scales[QK_K/16]; // 8-bit block scales
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // super-block scale for quantized scales
|
||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 6-bit quantization
|
||||
// weight is represented as x = a * q
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 6.5625 bits per weight
|
||||
typedef struct {
|
||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
||||
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
||||
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
|
||||
ggml_fp16_t d; // super-block scale
|
||||
} block_q6_K;
|
||||
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding");
|
||||
|
||||
// This is only used for intermediate quantization and dot products
|
||||
typedef struct {
|
||||
float d; // delta
|
||||
int8_t qs[QK_K]; // quants
|
||||
int16_t bsums[QK_K/16]; // sum of quants in groups of 16
|
||||
} block_q8_K;
|
||||
static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding");
|
||||
|
||||
|
||||
// Quantization
|
||||
void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
|
||||
void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
|
||||
void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
|
||||
void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
|
||||
void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
|
||||
void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
|
||||
|
||||
void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
|
||||
void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
|
||||
void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
|
||||
void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
|
||||
void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
|
||||
void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
|
||||
|
||||
void quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
|
||||
|
||||
void quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
|
||||
void quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
|
||||
|
||||
// Dequantization
|
||||
void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
|
||||
//void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
|
||||
|
||||
void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
|
||||
void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
|
||||
|
||||
// Dot product
|
||||
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
|
||||
void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
||||
void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
|
@ -1,82 +0,0 @@
|
||||
class Options
|
||||
def initialize(cmake="cmake")
|
||||
@cmake = cmake
|
||||
@options = {}
|
||||
|
||||
configure
|
||||
end
|
||||
|
||||
def to_s
|
||||
@options
|
||||
.reject {|name, (type, value)| value.nil?}
|
||||
.collect {|name, (type, value)| "-D #{name}=#{value == true ? "ON" : value == false ? "OFF" : value.shellescape}"}
|
||||
.join(" ")
|
||||
end
|
||||
|
||||
def cmake_options
|
||||
return @cmake_options if @cmake_options
|
||||
|
||||
output = nil
|
||||
Dir.chdir __dir__ do
|
||||
output = `#{@cmake.shellescape} -S sources -B build -L`
|
||||
end
|
||||
@cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1)
|
||||
.filter_map {|line|
|
||||
option, value = line.chomp.split("=", 2)
|
||||
name, type = option.split(":", 2)
|
||||
[
|
||||
name,
|
||||
[
|
||||
type,
|
||||
type == "BOOL" ? value == "ON" : value
|
||||
]
|
||||
]
|
||||
}.to_h
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def configure
|
||||
cmake_options.each_pair do |name, (type, default_value)|
|
||||
option = option_name(name)
|
||||
value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
|
||||
@options[name] = [type, value]
|
||||
end
|
||||
|
||||
configure_accelerate
|
||||
configure_metal
|
||||
configure_coreml
|
||||
end
|
||||
|
||||
# See ggml/src/ggml-cpu/CMakeLists.txt
|
||||
def configure_accelerate
|
||||
if RUBY_PLATFORM.match?(/darwin/) && enabled?("GGML_ACCELERATE")
|
||||
$LDFLAGS << " -framework Accelerate"
|
||||
end
|
||||
end
|
||||
|
||||
# See ggml/src/ggml-metal/CMakeLists.txt
|
||||
def configure_metal
|
||||
$LDFLAGS << " -framework Foundation -framework Metal -framework MetalKit" if enabled?("GGML_METAL")
|
||||
end
|
||||
|
||||
# See src/CmakeLists.txt
|
||||
def configure_coreml
|
||||
if enabled?("WHISPER_COREML")
|
||||
$LDFLAGS << " -framework Foundation -framework CoreML"
|
||||
$CPPFLAGS << " -DRUBY_WHISPER_USE_COREML"
|
||||
end
|
||||
end
|
||||
|
||||
def option_name(name)
|
||||
name.downcase.gsub("_", "-")
|
||||
end
|
||||
|
||||
def enabled?(option)
|
||||
if @options[option][1].nil?
|
||||
cmake_options[option][1]
|
||||
else
|
||||
@options[option][1]
|
||||
end
|
||||
end
|
||||
end
|
@ -1,176 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include <ruby/memory_view.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
VALUE mWhisper;
|
||||
VALUE mVAD;
|
||||
VALUE cContext;
|
||||
VALUE cParams;
|
||||
VALUE cVADParams;
|
||||
VALUE eError;
|
||||
|
||||
VALUE cSegment;
|
||||
VALUE cModel;
|
||||
|
||||
ID id_to_s;
|
||||
ID id_call;
|
||||
ID id___method__;
|
||||
ID id_to_enum;
|
||||
ID id_length;
|
||||
ID id_next;
|
||||
ID id_new;
|
||||
ID id_to_path;
|
||||
ID id_URI;
|
||||
ID id_pre_converted_models;
|
||||
ID id_coreml_compiled_models;
|
||||
ID id_cache;
|
||||
ID id_n_processors;
|
||||
|
||||
static bool is_log_callback_finalized = false;
|
||||
|
||||
// High level API
|
||||
extern VALUE ruby_whisper_segment_allocate(VALUE klass);
|
||||
|
||||
extern void init_ruby_whisper_context(VALUE *mWhisper);
|
||||
extern void init_ruby_whisper_params(VALUE *mWhisper);
|
||||
extern void init_ruby_whisper_error(VALUE *mWhisper);
|
||||
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
|
||||
extern void init_ruby_whisper_model(VALUE *mWhisper);
|
||||
extern void init_ruby_whisper_vad_params(VALUE *mVAD);
|
||||
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_max_id -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
|
||||
return INT2NUM(whisper_lang_max_id());
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_id(lang_name) -> Integer
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
|
||||
const char * lang_str = StringValueCStr(lang);
|
||||
const int id = whisper_lang_id(lang_str);
|
||||
if (-1 == id) {
|
||||
rb_raise(rb_eArgError, "language not found: %s", lang_str);
|
||||
}
|
||||
return INT2NUM(id);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_str(lang_id) -> String
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
|
||||
const int lang_id = NUM2INT(id);
|
||||
const char * str = whisper_lang_str(lang_id);
|
||||
if (NULL == str) {
|
||||
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
||||
}
|
||||
return rb_str_new2(str);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* lang_str(lang_id) -> String
|
||||
*/
|
||||
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
||||
const int lang_id = NUM2INT(id);
|
||||
const char * str_full = whisper_lang_str_full(lang_id);
|
||||
if (NULL == str_full) {
|
||||
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
||||
}
|
||||
return rb_str_new2(str_full);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* system_info_str -> String
|
||||
*/
|
||||
static VALUE ruby_whisper_s_system_info_str(VALUE self) {
|
||||
return rb_str_new2(whisper_print_system_info());
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
||||
is_log_callback_finalized = true;
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static void
|
||||
ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) {
|
||||
if (is_log_callback_finalized) {
|
||||
return;
|
||||
}
|
||||
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
|
||||
VALUE udata = rb_iv_get(mWhisper, "user_data");
|
||||
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
|
||||
*/
|
||||
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
|
||||
VALUE old_callback = rb_iv_get(self, "log_callback");
|
||||
if (!NIL_P(old_callback)) {
|
||||
rb_undefine_finalizer(old_callback);
|
||||
}
|
||||
|
||||
rb_iv_set(self, "log_callback", log_callback);
|
||||
rb_iv_set(self, "user_data", user_data);
|
||||
|
||||
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
|
||||
rb_define_finalizer(log_callback, finalize_log_callback);
|
||||
|
||||
whisper_log_set(ruby_whisper_log_callback, NULL);
|
||||
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
void Init_whisper() {
|
||||
id_to_s = rb_intern("to_s");
|
||||
id_call = rb_intern("call");
|
||||
id___method__ = rb_intern("__method__");
|
||||
id_to_enum = rb_intern("to_enum");
|
||||
id_length = rb_intern("length");
|
||||
id_next = rb_intern("next");
|
||||
id_new = rb_intern("new");
|
||||
id_to_path = rb_intern("to_path");
|
||||
id_URI = rb_intern("URI");
|
||||
id_pre_converted_models = rb_intern("pre_converted_models");
|
||||
id_coreml_compiled_models = rb_intern("coreml_compiled_models");
|
||||
id_cache = rb_intern("cache");
|
||||
id_n_processors = rb_intern("n_processors");
|
||||
|
||||
mWhisper = rb_define_module("Whisper");
|
||||
mVAD = rb_define_module_under(mWhisper, "VAD");
|
||||
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
|
||||
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
|
||||
|
||||
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
||||
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
||||
rb_define_singleton_method(mWhisper, "system_info_str", ruby_whisper_s_system_info_str, 0);
|
||||
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
||||
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
||||
|
||||
init_ruby_whisper_context(&mWhisper);
|
||||
init_ruby_whisper_params(&mWhisper);
|
||||
init_ruby_whisper_error(&mWhisper);
|
||||
init_ruby_whisper_segment(&mWhisper, &cContext);
|
||||
init_ruby_whisper_model(&mWhisper);
|
||||
init_ruby_whisper_vad_params(&mVAD);
|
||||
|
||||
rb_require("whisper/context");
|
||||
rb_require("whisper/segment");
|
||||
rb_require("whisper/model/uri");
|
||||
}
|
426
bindings/ruby/ext/ruby_whisper.cpp
Normal file
426
bindings/ruby/ext/ruby_whisper.cpp
Normal file
@ -0,0 +1,426 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
#define DR_WAV_IMPLEMENTATION
|
||||
#include "dr_wav.h"
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BOOL_PARAMS_SETTER(self, prop, value) \
|
||||
ruby_whisper_params *rwp; \
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp); \
|
||||
if (value == Qfalse || value == Qnil) { \
|
||||
rwp->params.prop = false; \
|
||||
} else { \
|
||||
rwp->params.prop = true; \
|
||||
} \
|
||||
return value; \
|
||||
|
||||
#define BOOL_PARAMS_GETTER(self, prop) \
|
||||
ruby_whisper_params *rwp; \
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp); \
|
||||
if (rwp->params.prop) { \
|
||||
return Qtrue; \
|
||||
} else { \
|
||||
return Qfalse; \
|
||||
}
|
||||
|
||||
VALUE mWhisper;
|
||||
VALUE cContext;
|
||||
VALUE cParams;
|
||||
|
||||
static void ruby_whisper_free(ruby_whisper *rw) {
|
||||
if (rw->context) {
|
||||
whisper_free(rw->context);
|
||||
rw->context = NULL;
|
||||
}
|
||||
}
|
||||
static void ruby_whisper_params_free(ruby_whisper_params *rwp) {
|
||||
}
|
||||
|
||||
void rb_whisper_mark(ruby_whisper *rw) {
|
||||
// call rb_gc_mark on any ruby references in rw
|
||||
}
|
||||
|
||||
void rb_whisper_free(ruby_whisper *rw) {
|
||||
ruby_whisper_free(rw);
|
||||
free(rw);
|
||||
}
|
||||
|
||||
void rb_whisper_params_mark(ruby_whisper_params *rwp) {
|
||||
}
|
||||
|
||||
void rb_whisper_params_free(ruby_whisper_params *rwp) {
|
||||
ruby_whisper_params_free(rwp);
|
||||
free(rwp);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_allocate(VALUE klass) {
|
||||
ruby_whisper *rw;
|
||||
rw = ALLOC(ruby_whisper);
|
||||
rw->context = NULL;
|
||||
return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_params_allocate(VALUE klass) {
|
||||
ruby_whisper_params *rwp;
|
||||
rwp = ALLOC(ruby_whisper_params);
|
||||
rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
return Data_Wrap_Struct(klass, rb_whisper_params_mark, rb_whisper_params_free, rwp);
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
VALUE whisper_model_file_path;
|
||||
|
||||
// TODO: we can support init from buffer here too maybe another ruby object to expose
|
||||
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
|
||||
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
||||
}
|
||||
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
||||
if (rw->context == nullptr) {
|
||||
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* transcribe a single file
|
||||
* can emit to a block results
|
||||
*
|
||||
**/
|
||||
static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
ruby_whisper_params *rwp;
|
||||
VALUE wave_file_path, blk, params;
|
||||
|
||||
rb_scan_args(argc, argv, "02&", &wave_file_path, ¶ms, &blk);
|
||||
Data_Get_Struct(self, ruby_whisper, rw);
|
||||
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
||||
|
||||
if (!rb_respond_to(wave_file_path, rb_intern("to_s"))) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
|
||||
}
|
||||
|
||||
std::string fname_inp = StringValueCStr(wave_file_path);
|
||||
|
||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||
|
||||
// WAV input - this is directly from main.cpp example
|
||||
{
|
||||
drwav wav;
|
||||
std::vector<uint8_t> wav_data; // used for pipe input from stdin
|
||||
|
||||
if (fname_inp == "-") {
|
||||
{
|
||||
uint8_t buf[1024];
|
||||
while (true) {
|
||||
const size_t n = fread(buf, 1, sizeof(buf), stdin);
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
wav_data.insert(wav_data.end(), buf, buf + n);
|
||||
}
|
||||
}
|
||||
|
||||
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
||||
fprintf(stderr, "error: failed to open WAV file from stdin\n");
|
||||
return self;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
|
||||
} else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
|
||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
||||
return self;
|
||||
}
|
||||
|
||||
if (wav.channels != 1 && wav.channels != 2) {
|
||||
fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
|
||||
return self;
|
||||
}
|
||||
|
||||
if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
|
||||
fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
|
||||
return self;
|
||||
}
|
||||
|
||||
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
|
||||
fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
|
||||
return self;
|
||||
}
|
||||
|
||||
if (wav.bitsPerSample != 16) {
|
||||
fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
|
||||
return self;
|
||||
}
|
||||
|
||||
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
|
||||
|
||||
std::vector<int16_t> pcm16;
|
||||
pcm16.resize(n*wav.channels);
|
||||
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
|
||||
drwav_uninit(&wav);
|
||||
|
||||
// convert to mono, float
|
||||
pcmf32.resize(n);
|
||||
if (wav.channels == 1) {
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32[i] = float(pcm16[i])/32768.0f;
|
||||
}
|
||||
} else {
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
|
||||
}
|
||||
}
|
||||
|
||||
if (rwp->diarize) {
|
||||
// convert to stereo, float
|
||||
pcmf32s.resize(2);
|
||||
|
||||
pcmf32s[0].resize(n);
|
||||
pcmf32s[1].resize(n);
|
||||
for (uint64_t i = 0; i < n; i++) {
|
||||
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
|
||||
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||
|
||||
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
||||
bool is_aborted = *(bool*)user_data;
|
||||
return !is_aborted;
|
||||
};
|
||||
rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
||||
}
|
||||
|
||||
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
|
||||
fprintf(stderr, "failed to process audio\n");
|
||||
return self;
|
||||
}
|
||||
const int n_segments = whisper_full_n_segments(rw->context);
|
||||
VALUE output = rb_str_new2("");
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
const char * text = whisper_full_get_segment_text(rw->context, i);
|
||||
output = rb_str_concat(output, rb_str_new2(text));
|
||||
}
|
||||
VALUE idCall = rb_intern("call");
|
||||
if (blk != Qnil) {
|
||||
rb_funcall(blk, idCall, 1, output);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* params.language = "auto" | "en", etc...
|
||||
*/
|
||||
static VALUE ruby_whisper_params_set_language(VALUE self, VALUE value) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
if (value == Qfalse || value == Qnil) {
|
||||
rwp->params.language = "auto";
|
||||
} else {
|
||||
rwp->params.language = StringValueCStr(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_language(VALUE self) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
if (rwp->params.language) {
|
||||
return rb_str_new2(rwp->params.language);
|
||||
} else {
|
||||
return rb_str_new2("auto");
|
||||
}
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_translate(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, translate, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_translate(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, translate)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_no_context(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, no_context, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_no_context(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, no_context)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_single_segment(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, single_segment, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_single_segment(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, single_segment)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_print_special(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, print_special, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_print_special(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, print_special)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_print_progress(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, print_progress, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_print_progress(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, print_progress)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_print_realtime(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, print_realtime, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_print_realtime(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, print_realtime)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_print_timestamps(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, print_timestamps, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_print_timestamps(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, print_timestamps)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_suppress_blank(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, suppress_blank, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_suppress_blank(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, suppress_blank)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_suppress_non_speech_tokens(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, suppress_non_speech_tokens, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_suppress_non_speech_tokens(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, suppress_non_speech_tokens)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_token_timestamps(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, token_timestamps)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, token_timestamps, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_split_on_word(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, split_on_word)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, split_on_word, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_speed_up(VALUE self) {
|
||||
BOOL_PARAMS_GETTER(self, speed_up)
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_speed_up(VALUE self, VALUE value) {
|
||||
BOOL_PARAMS_SETTER(self, speed_up, value)
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_diarize(VALUE self) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
if (rwp->diarize) {
|
||||
return Qtrue;
|
||||
} else {
|
||||
return Qfalse;
|
||||
}
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_diarize(VALUE self, VALUE value) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
if (value == Qfalse || value == Qnil) {
|
||||
rwp->diarize = false;
|
||||
} else {
|
||||
rwp->diarize = true;
|
||||
} \
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_params_get_offset(VALUE self) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
return INT2NUM(rwp->params.offset_ms);
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_offset(VALUE self, VALUE value) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
rwp->params.offset_ms = NUM2INT(value);
|
||||
return value;
|
||||
}
|
||||
static VALUE ruby_whisper_params_get_duration(VALUE self) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
return INT2NUM(rwp->params.duration_ms);
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_duration(VALUE self, VALUE value) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
rwp->params.duration_ms = NUM2INT(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_params_get_max_text_tokens(VALUE self) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
return INT2NUM(rwp->params.n_max_text_ctx);
|
||||
}
|
||||
static VALUE ruby_whisper_params_set_max_text_tokens(VALUE self, VALUE value) {
|
||||
ruby_whisper_params *rwp;
|
||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
||||
rwp->params.n_max_text_ctx = NUM2INT(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
void Init_whisper() {
|
||||
mWhisper = rb_define_module("Whisper");
|
||||
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
|
||||
cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
|
||||
|
||||
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
||||
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
||||
|
||||
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
||||
|
||||
rb_define_alloc_func(cParams, ruby_whisper_params_allocate);
|
||||
|
||||
rb_define_method(cParams, "language=", ruby_whisper_params_set_language, 1);
|
||||
rb_define_method(cParams, "language", ruby_whisper_params_get_language, 0);
|
||||
rb_define_method(cParams, "translate=", ruby_whisper_params_set_translate, 1);
|
||||
rb_define_method(cParams, "translate", ruby_whisper_params_get_translate, 0);
|
||||
rb_define_method(cParams, "no_context=", ruby_whisper_params_set_no_context, 1);
|
||||
rb_define_method(cParams, "no_context", ruby_whisper_params_get_no_context, 0);
|
||||
rb_define_method(cParams, "single_segment=", ruby_whisper_params_set_single_segment, 1);
|
||||
rb_define_method(cParams, "single_segment", ruby_whisper_params_get_single_segment, 0);
|
||||
rb_define_method(cParams, "print_special", ruby_whisper_params_get_print_special, 0);
|
||||
rb_define_method(cParams, "print_special=", ruby_whisper_params_set_print_special, 1);
|
||||
rb_define_method(cParams, "print_progress", ruby_whisper_params_get_print_progress, 0);
|
||||
rb_define_method(cParams, "print_progress=", ruby_whisper_params_set_print_progress, 1);
|
||||
rb_define_method(cParams, "print_realtime", ruby_whisper_params_get_print_realtime, 0);
|
||||
rb_define_method(cParams, "print_realtime=", ruby_whisper_params_set_print_realtime, 1);
|
||||
rb_define_method(cParams, "print_timestamps", ruby_whisper_params_get_print_timestamps, 0);
|
||||
rb_define_method(cParams, "print_timestamps=", ruby_whisper_params_set_print_timestamps, 1);
|
||||
rb_define_method(cParams, "suppress_blank", ruby_whisper_params_get_suppress_blank, 0);
|
||||
rb_define_method(cParams, "suppress_blank=", ruby_whisper_params_set_suppress_blank, 1);
|
||||
rb_define_method(cParams, "suppress_non_speech_tokens", ruby_whisper_params_get_suppress_non_speech_tokens, 0);
|
||||
rb_define_method(cParams, "suppress_non_speech_tokens=", ruby_whisper_params_set_suppress_non_speech_tokens, 1);
|
||||
rb_define_method(cParams, "token_timestamps", ruby_whisper_params_get_token_timestamps, 0);
|
||||
rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1);
|
||||
rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0);
|
||||
rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1);
|
||||
rb_define_method(cParams, "speed_up", ruby_whisper_params_get_speed_up, 0);
|
||||
rb_define_method(cParams, "speed_up=", ruby_whisper_params_set_speed_up, 1);
|
||||
rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0);
|
||||
rb_define_method(cParams, "diarize=", ruby_whisper_params_set_diarize, 1);
|
||||
|
||||
rb_define_method(cParams, "offset", ruby_whisper_params_get_offset, 0);
|
||||
rb_define_method(cParams, "offset=", ruby_whisper_params_set_offset, 1);
|
||||
rb_define_method(cParams, "duration", ruby_whisper_params_get_duration, 0);
|
||||
rb_define_method(cParams, "duration=", ruby_whisper_params_set_duration, 1);
|
||||
|
||||
rb_define_method(cParams, "max_text_tokens", ruby_whisper_params_get_max_text_tokens, 0);
|
||||
rb_define_method(cParams, "max_text_tokens=", ruby_whisper_params_set_max_text_tokens, 1);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -1,15 +1,8 @@
|
||||
#ifndef RUBY_WHISPER_H
|
||||
#define RUBY_WHISPER_H
|
||||
#ifndef __RUBY_WHISPER_H
|
||||
#define __RUBY_WHISPER_H
|
||||
|
||||
#include "whisper.h"
|
||||
|
||||
typedef struct {
|
||||
VALUE *context;
|
||||
VALUE user_data;
|
||||
VALUE callback;
|
||||
VALUE callbacks;
|
||||
} ruby_whisper_callback_container;
|
||||
|
||||
typedef struct {
|
||||
struct whisper_context *context;
|
||||
} ruby_whisper;
|
||||
@ -17,24 +10,6 @@ typedef struct {
|
||||
typedef struct {
|
||||
struct whisper_full_params params;
|
||||
bool diarize;
|
||||
ruby_whisper_callback_container *new_segment_callback_container;
|
||||
ruby_whisper_callback_container *progress_callback_container;
|
||||
ruby_whisper_callback_container *encoder_begin_callback_container;
|
||||
ruby_whisper_callback_container *abort_callback_container;
|
||||
VALUE vad_params;
|
||||
} ruby_whisper_params;
|
||||
|
||||
typedef struct {
|
||||
struct whisper_vad_params params;
|
||||
} ruby_whisper_vad_params;
|
||||
|
||||
typedef struct {
|
||||
VALUE context;
|
||||
int index;
|
||||
} ruby_whisper_segment;
|
||||
|
||||
typedef struct {
|
||||
VALUE context;
|
||||
} ruby_whisper_model;
|
||||
|
||||
#endif
|
||||
|
@ -1,672 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include <ruby/memory_view.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
extern ID id_to_s;
|
||||
extern ID id___method__;
|
||||
extern ID id_to_enum;
|
||||
extern ID id_length;
|
||||
extern ID id_next;
|
||||
extern ID id_new;
|
||||
extern ID id_to_path;
|
||||
extern ID id_URI;
|
||||
extern ID id_pre_converted_models;
|
||||
extern ID id_coreml_compiled_models;
|
||||
extern ID id_cache;
|
||||
extern ID id_n_processors;
|
||||
|
||||
extern VALUE cContext;
|
||||
extern VALUE eError;
|
||||
extern VALUE cModel;
|
||||
|
||||
extern const rb_data_type_t ruby_whisper_params_type;
|
||||
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
|
||||
extern VALUE rb_whisper_model_s_new(VALUE context);
|
||||
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
|
||||
extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
|
||||
|
||||
ID transcribe_option_names[1];
|
||||
|
||||
static void
|
||||
ruby_whisper_free(ruby_whisper *rw)
|
||||
{
|
||||
if (rw->context) {
|
||||
whisper_free(rw->context);
|
||||
rw->context = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rb_whisper_mark(ruby_whisper *rw)
|
||||
{
|
||||
// call rb_gc_mark on any ruby references in rw
|
||||
}
|
||||
|
||||
void
|
||||
rb_whisper_free(void *p)
|
||||
{
|
||||
ruby_whisper *rw = (ruby_whisper *)p;
|
||||
ruby_whisper_free(rw);
|
||||
free(rw);
|
||||
}
|
||||
|
||||
static size_t
|
||||
ruby_whisper_memsize(const void *p)
|
||||
{
|
||||
const ruby_whisper *rw = (const ruby_whisper *)p;
|
||||
size_t size = sizeof(rw);
|
||||
if (!rw) {
|
||||
return 0;
|
||||
}
|
||||
if (rw->context) {
|
||||
size += sizeof(rw->context);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
const rb_data_type_t ruby_whisper_type = {
|
||||
"ruby_whisper",
|
||||
{0, rb_whisper_free, ruby_whisper_memsize,},
|
||||
0, 0,
|
||||
0
|
||||
};
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_allocate(VALUE klass)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
VALUE obj = TypedData_Make_Struct(klass, ruby_whisper, &ruby_whisper_type, rw);
|
||||
rw->context = NULL;
|
||||
return obj;
|
||||
}
|
||||
|
||||
VALUE
|
||||
ruby_whisper_normalize_model_path(VALUE model_path)
|
||||
{
|
||||
VALUE pre_converted_models = rb_funcall(cModel, id_pre_converted_models, 0);
|
||||
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
|
||||
if (!NIL_P(pre_converted_model)) {
|
||||
model_path = pre_converted_model;
|
||||
#ifdef RUBY_WHISPER_USE_COREML
|
||||
VALUE coreml_converted_models = rb_funcall(cModel, id_coreml_compiled_models, 0);
|
||||
VALUE coreml_converted_model = rb_hash_aref(coreml_converted_models, pre_converted_model);
|
||||
if (!NIL_P(coreml_converted_model)) {
|
||||
rb_funcall(coreml_converted_model, id_cache, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else if (TYPE(model_path) == T_STRING) {
|
||||
const char * model_path_str = StringValueCStr(model_path);
|
||||
if (strncmp("http://", model_path_str, 7) == 0 || strncmp("https://", model_path_str, 8) == 0) {
|
||||
VALUE uri_class = rb_const_get(cModel, id_URI);
|
||||
model_path = rb_class_new_instance(1, &model_path, uri_class);
|
||||
}
|
||||
}
|
||||
else if (rb_obj_is_kind_of(model_path, rb_path2class("URI::HTTP"))) {
|
||||
VALUE uri_class = rb_const_get(cModel, id_URI);
|
||||
model_path = rb_class_new_instance(1, &model_path, uri_class);
|
||||
}
|
||||
if (rb_respond_to(model_path, id_to_path)) {
|
||||
model_path = rb_funcall(model_path, id_to_path, 0);
|
||||
}
|
||||
|
||||
return model_path;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* new("base.en") -> Whisper::Context
|
||||
* new("path/to/model.bin") -> Whisper::Context
|
||||
* new(Whisper::Model::URI.new("https://example.net/uri/of/model.bin")) -> Whisper::Context
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_initialize(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
VALUE whisper_model_file_path;
|
||||
|
||||
// TODO: we can support init from buffer here too maybe another ruby object to expose
|
||||
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
|
||||
whisper_model_file_path = ruby_whisper_normalize_model_path(whisper_model_file_path);
|
||||
if (!rb_respond_to(whisper_model_file_path, id_to_s)) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
||||
}
|
||||
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
||||
if (rw->context == NULL) {
|
||||
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_vocab -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_vocab(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_ctx -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_state -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_state(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_head -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_head(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_audio_layer -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_audio_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_ctx -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_state -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_state(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_head -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_head(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_text_layer -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_text_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_n_mels -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_n_mels(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_ftype -> Integer
|
||||
*/
|
||||
VALUE ruby_whisper_model_ftype(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_ftype(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model_type -> String
|
||||
*/
|
||||
VALUE ruby_whisper_model_type(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
* Not thread safe for same context
|
||||
* Uses the specified decoding strategy to obtain the text.
|
||||
*
|
||||
* call-seq:
|
||||
* full(params, samples, n_samples) -> nil
|
||||
* full(params, samples) -> nil
|
||||
*
|
||||
* The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
|
||||
*/
|
||||
VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
if (argc < 2 || argc > 3) {
|
||||
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
||||
}
|
||||
|
||||
ruby_whisper *rw;
|
||||
ruby_whisper_params *rwp;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
VALUE params = argv[0];
|
||||
TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
||||
VALUE samples = argv[1];
|
||||
int n_samples;
|
||||
rb_memory_view_t view;
|
||||
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
||||
if (argc == 3) {
|
||||
n_samples = NUM2INT(argv[2]);
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
if (RARRAY_LEN(samples) < n_samples) {
|
||||
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
||||
}
|
||||
}
|
||||
// Should check when samples.respond_to?(:length)?
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
if (RARRAY_LEN(samples) > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)RARRAY_LEN(samples);
|
||||
} else if (memory_view_available_p) {
|
||||
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
||||
view.obj = Qnil;
|
||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||
}
|
||||
ssize_t n_samples_size = view.byte_size / view.item_size;
|
||||
if (n_samples_size > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)n_samples_size;
|
||||
} else if (rb_respond_to(samples, id_length)) {
|
||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||
} else {
|
||||
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
||||
}
|
||||
}
|
||||
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
||||
if (memory_view_available_p) {
|
||||
c_samples = (float *)view.data;
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
||||
}
|
||||
} else {
|
||||
// TODO: use rb_block_call
|
||||
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
// TODO: check if iter is exhausted and raise ArgumentError appropriately
|
||||
VALUE sample = rb_funcall(iter, id_next, 0);
|
||||
c_samples[i] = RFLOAT_VALUE(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
prepare_transcription(rwp, &self);
|
||||
const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples);
|
||||
if (0 == result) {
|
||||
return self;
|
||||
} else {
|
||||
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||
* Result is stored in the default state of the context
|
||||
* Not thread safe if executed in parallel on the same context.
|
||||
* It seems this approach can offer some speedup in some cases.
|
||||
* However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||
*
|
||||
* call-seq:
|
||||
* full_parallel(params, samples) -> nil
|
||||
* full_parallel(params, samples, n_samples) -> nil
|
||||
* full_parallel(params, samples, n_samples, n_processors) -> nil
|
||||
* full_parallel(params, samples, nil, n_processors) -> nil
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
|
||||
{
|
||||
if (argc < 2 || argc > 4) {
|
||||
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
||||
}
|
||||
|
||||
ruby_whisper *rw;
|
||||
ruby_whisper_params *rwp;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
VALUE params = argv[0];
|
||||
TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
||||
VALUE samples = argv[1];
|
||||
int n_samples;
|
||||
int n_processors;
|
||||
rb_memory_view_t view;
|
||||
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
||||
switch (argc) {
|
||||
case 2:
|
||||
n_processors = 1;
|
||||
break;
|
||||
case 3:
|
||||
n_processors = 1;
|
||||
break;
|
||||
case 4:
|
||||
n_processors = NUM2INT(argv[3]);
|
||||
break;
|
||||
}
|
||||
if (argc >= 3 && !NIL_P(argv[2])) {
|
||||
n_samples = NUM2INT(argv[2]);
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
if (RARRAY_LEN(samples) < n_samples) {
|
||||
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
||||
}
|
||||
}
|
||||
// Should check when samples.respond_to?(:length)?
|
||||
} else if (memory_view_available_p) {
|
||||
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
||||
view.obj = Qnil;
|
||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||
}
|
||||
ssize_t n_samples_size = view.byte_size / view.item_size;
|
||||
if (n_samples_size > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)n_samples_size;
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
if (RARRAY_LEN(samples) > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)RARRAY_LEN(samples);
|
||||
} else if (rb_respond_to(samples, id_length)) {
|
||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||
} else {
|
||||
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
||||
}
|
||||
}
|
||||
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
||||
if (memory_view_available_p) {
|
||||
c_samples = (float *)view.data;
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
||||
}
|
||||
} else {
|
||||
// FIXME: use rb_block_call
|
||||
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
// TODO: check if iter is exhausted and raise ArgumentError
|
||||
VALUE sample = rb_funcall(iter, id_next, 0);
|
||||
c_samples[i] = RFLOAT_VALUE(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
prepare_transcription(rwp, &self);
|
||||
const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors);
|
||||
if (0 == result) {
|
||||
return self;
|
||||
} else {
|
||||
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of segments.
|
||||
*
|
||||
* call-seq:
|
||||
* full_n_segments -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_n_segments(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_full_n_segments(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
|
||||
*
|
||||
* call-seq:
|
||||
* full_lang_id -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_lang_id(VALUE self)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_full_lang_id(rw->context));
|
||||
}
|
||||
|
||||
static int ruby_whisper_full_check_segment_index(const ruby_whisper * rw, const VALUE i_segment)
|
||||
{
|
||||
const int c_i_segment = NUM2INT(i_segment);
|
||||
if (c_i_segment < 0 || c_i_segment >= whisper_full_n_segments(rw->context)) {
|
||||
rb_raise(rb_eIndexError, "segment index %d out of range", c_i_segment);
|
||||
}
|
||||
return c_i_segment;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||
*
|
||||
* full_get_segment_t0(3) # => 1668 (16680 ms)
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_t0(segment_index) -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
||||
return LONG2NUM(t0);
|
||||
}
|
||||
|
||||
/*
|
||||
* End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||
*
|
||||
* full_get_segment_t1(3) # => 1668 (16680 ms)
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_t1(segment_index) -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
||||
return LONG2NUM(t1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
|
||||
*
|
||||
* full_get_segment_speacker_turn_next(3) # => true
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_speacker_turn_next(segment_index) -> bool
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_speaker_turn_next(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const bool speaker_turn_next = whisper_full_get_segment_speaker_turn_next(rw->context, c_i_segment);
|
||||
return speaker_turn_next ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
/*
|
||||
* Text of a segment indexed by +segment_index+.
|
||||
*
|
||||
* full_get_segment_text(3) # => "ask not what your country can do for you, ..."
|
||||
*
|
||||
* call-seq:
|
||||
* full_get_segment_text(segment_index) -> String
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const char * text = whisper_full_get_segment_text(rw->context, c_i_segment);
|
||||
return rb_str_new2(text);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* full_get_segment_no_speech_prob(segment_index) -> Float
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
|
||||
{
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const float no_speech_prob = whisper_full_get_segment_no_speech_prob(rw->context, c_i_segment);
|
||||
return DBL2NUM(no_speech_prob);
|
||||
}
|
||||
|
||||
// High level API
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
|
||||
{
|
||||
return rb_whisper_segment_s_new(self, NUM2INT(i_segment));
|
||||
}
|
||||
|
||||
/*
|
||||
* Yields each Whisper::Segment:
|
||||
*
|
||||
* whisper.transcribe("path/to/audio.wav", params)
|
||||
* whisper.each_segment do |segment|
|
||||
* puts segment.text
|
||||
* end
|
||||
*
|
||||
* Returns an Enumerator if no block given:
|
||||
*
|
||||
* whisper.transcribe("path/to/audio.wav", params)
|
||||
* enum = whisper.each_segment
|
||||
* enum.to_a # => [#<Whisper::Segment>, ...]
|
||||
*
|
||||
* call-seq:
|
||||
* each_segment {|segment| ... }
|
||||
* each_segment -> Enumerator
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_each_segment(VALUE self)
|
||||
{
|
||||
if (!rb_block_given_p()) {
|
||||
const VALUE method_name = rb_funcall(self, id___method__, 0);
|
||||
return rb_funcall(self, id_to_enum, 1, method_name);
|
||||
}
|
||||
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
|
||||
const int n_segments = whisper_full_n_segments(rw->context);
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
rb_yield(rb_whisper_segment_s_new(self, i));
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* model -> Whisper::Model
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_get_model(VALUE self)
|
||||
{
|
||||
return rb_whisper_model_s_new(self);
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_context(VALUE *mWhisper)
|
||||
{
|
||||
cContext = rb_define_class_under(*mWhisper, "Context", rb_cObject);
|
||||
|
||||
transcribe_option_names[0] = id_n_processors;
|
||||
|
||||
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
||||
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
||||
|
||||
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
||||
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
|
||||
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
||||
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
||||
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
||||
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
||||
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
||||
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
|
||||
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
|
||||
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
||||
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
|
||||
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
|
||||
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
|
||||
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
||||
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
||||
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
||||
rb_define_method(cContext, "full_get_segment_t1", ruby_whisper_full_get_segment_t1, 1);
|
||||
rb_define_method(cContext, "full_get_segment_speaker_turn_next", ruby_whisper_full_get_segment_speaker_turn_next, 1);
|
||||
rb_define_method(cContext, "full_get_segment_text", ruby_whisper_full_get_segment_text, 1);
|
||||
rb_define_method(cContext, "full_get_segment_no_speech_prob", ruby_whisper_full_get_segment_no_speech_prob, 1);
|
||||
rb_define_method(cContext, "full", ruby_whisper_full, -1);
|
||||
rb_define_method(cContext, "full_parallel", ruby_whisper_full_parallel, -1);
|
||||
|
||||
// High level
|
||||
rb_define_method(cContext, "full_get_segment", ruby_whisper_full_get_segment, 1);
|
||||
rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
|
||||
|
||||
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
#include <ruby.h>
|
||||
|
||||
extern VALUE eError;
|
||||
|
||||
VALUE ruby_whisper_error_initialize(VALUE self, VALUE code)
|
||||
{
|
||||
const int c_code = NUM2INT(code);
|
||||
const char *raw_message;
|
||||
switch (c_code) {
|
||||
case -2:
|
||||
raw_message = "failed to compute log mel spectrogram";
|
||||
break;
|
||||
case -3:
|
||||
raw_message = "failed to auto-detect language";
|
||||
break;
|
||||
case -4:
|
||||
raw_message = "too many decoders requested";
|
||||
break;
|
||||
case -5:
|
||||
raw_message = "audio_ctx is larger than the maximum allowed";
|
||||
break;
|
||||
case -6:
|
||||
raw_message = "failed to encode";
|
||||
break;
|
||||
case -7:
|
||||
raw_message = "whisper_kv_cache_init() failed for self-attention cache";
|
||||
break;
|
||||
case -8:
|
||||
raw_message = "failed to decode";
|
||||
break;
|
||||
case -9:
|
||||
raw_message = "failed to decode";
|
||||
break;
|
||||
default:
|
||||
raw_message = "unknown error";
|
||||
break;
|
||||
}
|
||||
const VALUE message = rb_str_new2(raw_message);
|
||||
rb_call_super(1, &message);
|
||||
rb_iv_set(self, "@code", code);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_error(VALUE *mWhisper)
|
||||
{
|
||||
eError = rb_define_class_under(*mWhisper, "Error", rb_eStandardError);
|
||||
|
||||
rb_define_attr(eError, "code", true, false);
|
||||
rb_define_method(eError, "initialize", ruby_whisper_error_initialize, 1);
|
||||
}
|
@ -1,232 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
extern const rb_data_type_t ruby_whisper_type;
|
||||
|
||||
extern VALUE cModel;
|
||||
|
||||
static void rb_whisper_model_mark(void *p) {
|
||||
ruby_whisper_model *rwm = (ruby_whisper_model *)p;
|
||||
if (rwm->context) {
|
||||
rb_gc_mark(rwm->context);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
ruby_whisper_model_memsize(const void *p)
|
||||
{
|
||||
const ruby_whisper_model *rwm = (const ruby_whisper_model *)p;
|
||||
size_t size = sizeof(rwm);
|
||||
if (!rwm) {
|
||||
return 0;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static const rb_data_type_t rb_whisper_model_type = {
|
||||
"ruby_whisper_model",
|
||||
{rb_whisper_model_mark, RUBY_DEFAULT_FREE, ruby_whisper_model_memsize,},
|
||||
0, 0,
|
||||
0
|
||||
};
|
||||
|
||||
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
||||
ruby_whisper_model *rwm;
|
||||
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
}
|
||||
|
||||
VALUE rb_whisper_model_s_new(VALUE context) {
|
||||
ruby_whisper_model *rwm;
|
||||
const VALUE model = ruby_whisper_model_allocate(cModel);
|
||||
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
rwm->context = context;
|
||||
return model;
|
||||
};
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_vocab -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_vocab(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_ctx -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_state -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_state(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_head -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_head(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_audio_layer -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_audio_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_ctx -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_ctx(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_state -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_state(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_head -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_head(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_text_layer -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_text_layer(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* n_mels -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_n_mels(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ftype -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_ftype(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return INT2NUM(whisper_model_ftype(rw->context));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* type -> String
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_model_type(VALUE self)
|
||||
{
|
||||
ruby_whisper_model *rwm;
|
||||
TypedData_Get_Struct(self, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rwm->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_model(VALUE *mWhisper)
|
||||
{
|
||||
cModel = rb_define_class_under(*mWhisper, "Model", rb_cObject);
|
||||
|
||||
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
|
||||
rb_define_method(cModel, "n_vocab", ruby_whisper_model_n_vocab, 0);
|
||||
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
||||
rb_define_method(cModel, "n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
||||
rb_define_method(cModel, "n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
||||
rb_define_method(cModel, "n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
||||
rb_define_method(cModel, "n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
||||
rb_define_method(cModel, "n_text_state", ruby_whisper_model_n_text_state, 0);
|
||||
rb_define_method(cModel, "n_text_head", ruby_whisper_model_n_text_head, 0);
|
||||
rb_define_method(cModel, "n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
||||
rb_define_method(cModel, "n_mels", ruby_whisper_model_n_mels, 0);
|
||||
rb_define_method(cModel, "ftype", ruby_whisper_model_ftype, 0);
|
||||
rb_define_method(cModel, "type", ruby_whisper_model_type, 0);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,220 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
#define N_KEY_NAMES 5
|
||||
|
||||
static VALUE sym_start_time;
|
||||
static VALUE sym_end_time;
|
||||
static VALUE sym_text;
|
||||
static VALUE sym_no_speech_prob;
|
||||
static VALUE sym_speaker_turn_next;
|
||||
static VALUE key_names;
|
||||
|
||||
extern const rb_data_type_t ruby_whisper_type;
|
||||
|
||||
extern VALUE cSegment;
|
||||
|
||||
static void
|
||||
rb_whisper_segment_mark(void *p)
|
||||
{
|
||||
ruby_whisper_segment *rws = (ruby_whisper_segment *)p;
|
||||
rb_gc_mark(rws->context);
|
||||
}
|
||||
|
||||
static size_t
|
||||
ruby_whisper_segment_memsize(const void *p)
|
||||
{
|
||||
const ruby_whisper_segment *rws = (const ruby_whisper_segment *)p;
|
||||
size_t size = sizeof(rws);
|
||||
if (!rws) {
|
||||
return 0;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static const rb_data_type_t ruby_whisper_segment_type = {
|
||||
"ruby_whisper_segment",
|
||||
{rb_whisper_segment_mark, RUBY_DEFAULT_FREE, ruby_whisper_segment_memsize,},
|
||||
0, 0,
|
||||
0
|
||||
};
|
||||
|
||||
VALUE
|
||||
ruby_whisper_segment_allocate(VALUE klass)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
return TypedData_Make_Struct(klass, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_whisper_segment_s_new(VALUE context, int index)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
||||
TypedData_Get_Struct(segment, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
rws->context = context;
|
||||
rws->index = index;
|
||||
return segment;
|
||||
};
|
||||
|
||||
/*
|
||||
* Start time in milliseconds.
|
||||
*
|
||||
* call-seq:
|
||||
* start_time -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_start_time(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||
return LONG2NUM(t0 * 10);
|
||||
}
|
||||
|
||||
/*
|
||||
* End time in milliseconds.
|
||||
*
|
||||
* call-seq:
|
||||
* end_time -> Integer
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_end_time(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||
return LONG2NUM(t1 * 10);
|
||||
}
|
||||
|
||||
/*
|
||||
* Whether the next segment is predicted as a speaker turn.
|
||||
*
|
||||
* call-seq:
|
||||
* speaker_turn_next? -> bool
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_speaker_turn_next(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* text -> String
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_text(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const char * text = whisper_full_get_segment_text(rw->context, rws->index);
|
||||
return rb_str_new2(text);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* no_speech_prob -> Float
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_get_no_speech_prob(VALUE self)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
return DBL2NUM(whisper_full_get_segment_no_speech_prob(rw->context, rws->index));
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* deconstruct_keys(keys) -> hash
|
||||
*
|
||||
* Possible keys: :start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next
|
||||
*
|
||||
* whisper.each_segment do |segment|
|
||||
* segment => {start_time:, end_time:, text:, no_speech_prob:, speaker_turn_next:}
|
||||
*
|
||||
* puts "[#{start_time} --> #{end_time}] #{text} (no speech prob: #{no_speech_prob}#{speaker_turn_next ? ', speaker turns next' : ''})"
|
||||
* end
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_segment_deconstruct_keys(VALUE self, VALUE keys)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
TypedData_Get_Struct(self, ruby_whisper_segment, &ruby_whisper_segment_type, rws);
|
||||
ruby_whisper *rw;
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
|
||||
VALUE hash = rb_hash_new();
|
||||
long n_keys;
|
||||
if (NIL_P(keys)) {
|
||||
keys = key_names;
|
||||
n_keys = N_KEY_NAMES;
|
||||
} else {
|
||||
n_keys = RARRAY_LEN(keys);
|
||||
if (n_keys > N_KEY_NAMES) {
|
||||
return hash;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n_keys; i++) {
|
||||
VALUE key = rb_ary_entry(keys, i);
|
||||
if (key == sym_start_time) {
|
||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_start_time(self));
|
||||
}
|
||||
if (key == sym_end_time) {
|
||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_end_time(self));
|
||||
}
|
||||
if (key == sym_text) {
|
||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_text(self));
|
||||
}
|
||||
if (key == sym_no_speech_prob) {
|
||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_no_speech_prob(self));
|
||||
}
|
||||
if (key == sym_speaker_turn_next) {
|
||||
rb_hash_aset(hash, key, ruby_whisper_segment_get_speaker_turn_next(self));
|
||||
}
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void
|
||||
init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cContext)
|
||||
{
|
||||
cSegment = rb_define_class_under(*mWhisper, "Segment", rb_cObject);
|
||||
|
||||
sym_start_time = ID2SYM(rb_intern("start_time"));
|
||||
sym_end_time = ID2SYM(rb_intern("end_time"));
|
||||
sym_text = ID2SYM(rb_intern("text"));
|
||||
sym_no_speech_prob = ID2SYM(rb_intern("no_speech_prob"));
|
||||
sym_speaker_turn_next = ID2SYM(rb_intern("speaker_turn_next"));
|
||||
key_names = rb_ary_new3(
|
||||
N_KEY_NAMES,
|
||||
sym_start_time,
|
||||
sym_end_time,
|
||||
sym_text,
|
||||
sym_no_speech_prob,
|
||||
sym_speaker_turn_next
|
||||
);
|
||||
|
||||
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
|
||||
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
|
||||
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
||||
rb_define_method(cSegment, "speaker_turn_next?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
||||
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
||||
rb_define_method(cSegment, "no_speech_prob", ruby_whisper_segment_get_no_speech_prob, 0);
|
||||
rb_define_method(cSegment, "deconstruct_keys", ruby_whisper_segment_deconstruct_keys, 1);
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
#include "common-whisper.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern const rb_data_type_t ruby_whisper_type;
|
||||
extern const rb_data_type_t ruby_whisper_params_type;
|
||||
|
||||
extern ID id_to_s;
|
||||
extern ID id_call;
|
||||
extern ID transcribe_option_names[1];
|
||||
|
||||
extern void
|
||||
prepare_transcription(ruby_whisper_params * rwp, VALUE * self);
|
||||
|
||||
/*
|
||||
* transcribe a single file
|
||||
* can emit to a block results
|
||||
*
|
||||
* params = Whisper::Params.new
|
||||
* params.duration = 60_000
|
||||
* whisper.transcribe "path/to/audio.wav", params do |text|
|
||||
* puts text
|
||||
* end
|
||||
*
|
||||
* call-seq:
|
||||
* transcribe(path_to_audio, params) {|text| ...}
|
||||
**/
|
||||
VALUE
|
||||
ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
||||
ruby_whisper *rw;
|
||||
ruby_whisper_params *rwp;
|
||||
VALUE wave_file_path, blk, params, kws;
|
||||
VALUE opts[1];
|
||||
|
||||
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "2:&", &wave_file_path, ¶ms, &kws, &blk);
|
||||
rb_get_kwargs(kws, transcribe_option_names, 0, 1, opts);
|
||||
|
||||
int n_processors = opts[0] == Qundef ? 1 : NUM2INT(opts[0]);
|
||||
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
TypedData_Get_Struct(params, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
||||
|
||||
if (!rb_respond_to(wave_file_path, id_to_s)) {
|
||||
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
|
||||
}
|
||||
|
||||
std::string fname_inp = StringValueCStr(wave_file_path);
|
||||
|
||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||
|
||||
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
|
||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
||||
return self;
|
||||
}
|
||||
// Commented out because it is work in progress
|
||||
// {
|
||||
// static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||
|
||||
// rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
||||
// bool is_aborted = *(bool*)user_data;
|
||||
// return !is_aborted;
|
||||
// };
|
||||
// rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
||||
// }
|
||||
|
||||
prepare_transcription(rwp, &self);
|
||||
|
||||
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), n_processors) != 0) {
|
||||
fprintf(stderr, "failed to process audio\n");
|
||||
return self;
|
||||
}
|
||||
if (NIL_P(blk)) {
|
||||
return self;
|
||||
}
|
||||
const int n_segments = whisper_full_n_segments(rw->context);
|
||||
VALUE output = rb_str_new2("");
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
const char * text = whisper_full_get_segment_text(rw->context, i);
|
||||
output = rb_str_concat(output, rb_str_new2(text));
|
||||
}
|
||||
rb_funcall(blk, id_call, 1, output);
|
||||
return self;
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -1,288 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include "ruby_whisper.h"
|
||||
|
||||
#define DEFINE_PARAM(param_name, nth) \
|
||||
id_ ## param_name = rb_intern(#param_name); \
|
||||
param_names[nth] = id_ ## param_name; \
|
||||
rb_define_method(cVADParams, #param_name, ruby_whisper_vad_params_get_ ## param_name, 0); \
|
||||
rb_define_method(cVADParams, #param_name "=", ruby_whisper_vad_params_set_ ## param_name, 1);
|
||||
|
||||
#define NUM_PARAMS 6
|
||||
|
||||
extern VALUE cVADParams;
|
||||
|
||||
static size_t
|
||||
ruby_whisper_vad_params_memsize(const void *p)
|
||||
{
|
||||
const struct ruby_whisper_vad_params *params = p;
|
||||
size_t size = sizeof(params);
|
||||
if (!params) {
|
||||
return 0;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static ID param_names[NUM_PARAMS];
|
||||
static ID id_threshold;
|
||||
static ID id_min_speech_duration_ms;
|
||||
static ID id_min_silence_duration_ms;
|
||||
static ID id_max_speech_duration_s;
|
||||
static ID id_speech_pad_ms;
|
||||
static ID id_samples_overlap;
|
||||
|
||||
const rb_data_type_t ruby_whisper_vad_params_type = {
|
||||
"ruby_whisper_vad_params",
|
||||
{0, 0, ruby_whisper_vad_params_memsize,},
|
||||
0, 0,
|
||||
0
|
||||
};
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_s_allocate(VALUE klass)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
VALUE obj = TypedData_Make_Struct(klass, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
rwvp->params = whisper_vad_default_params();
|
||||
return obj;
|
||||
}
|
||||
|
||||
/*
|
||||
* Probability threshold to consider as speech.
|
||||
*
|
||||
* call-seq:
|
||||
* threshold = th -> th
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_set_threshold(VALUE self, VALUE value)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
rwvp->params.threshold = RFLOAT_VALUE(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_get_threshold(VALUE self)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
return DBL2NUM(rwvp->params.threshold);
|
||||
}
|
||||
|
||||
/*
|
||||
* Min duration for a valid speech segment.
|
||||
*
|
||||
* call-seq:
|
||||
* min_speech_duration_ms = duration_ms -> duration_ms
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_set_min_speech_duration_ms(VALUE self, VALUE value)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
rwvp->params.min_speech_duration_ms = NUM2INT(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_get_min_speech_duration_ms(VALUE self)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
return INT2NUM(rwvp->params.min_speech_duration_ms);
|
||||
}
|
||||
|
||||
/*
|
||||
* Min silence duration to consider speech as ended.
|
||||
*
|
||||
* call-seq:
|
||||
* min_silence_duration_ms = duration_ms -> duration_ms
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_set_min_silence_duration_ms(VALUE self, VALUE value)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
rwvp->params.min_silence_duration_ms = NUM2INT(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_get_min_silence_duration_ms(VALUE self)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
return INT2NUM(rwvp->params.min_silence_duration_ms);
|
||||
}
|
||||
|
||||
/*
|
||||
* Max duration of a speech segment before forcing a new segment.
|
||||
*
|
||||
* call-seq:
|
||||
* max_speech_duration_s = duration_s -> duration_s
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_set_max_speech_duration_s(VALUE self, VALUE value)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
rwvp->params.max_speech_duration_s = RFLOAT_VALUE(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_get_max_speech_duration_s(VALUE self)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
return DBL2NUM(rwvp->params.max_speech_duration_s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Padding added before and after speech segments.
|
||||
*
|
||||
* call-seq:
|
||||
* speech_pad_ms = pad_ms -> pad_ms
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_set_speech_pad_ms(VALUE self, VALUE value)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
rwvp->params.speech_pad_ms = NUM2INT(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_get_speech_pad_ms(VALUE self)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
return INT2NUM(rwvp->params.speech_pad_ms);
|
||||
}
|
||||
|
||||
/*
|
||||
* Overlap in seconds when copying audio samples from speech segment.
|
||||
*
|
||||
* call-seq:
|
||||
* samples_overlap = overlap -> overlap
|
||||
*/
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_set_samples_overlap(VALUE self, VALUE value)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
rwvp->params.samples_overlap = RFLOAT_VALUE(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_get_samples_overlap(VALUE self)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
return DBL2NUM(rwvp->params.samples_overlap);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
ruby_whisper_vad_params_equal(VALUE self, VALUE other)
|
||||
{
|
||||
ruby_whisper_vad_params *rwvp1;
|
||||
ruby_whisper_vad_params *rwvp2;
|
||||
|
||||
if (self == other) {
|
||||
return Qtrue;
|
||||
}
|
||||
|
||||
if (!rb_obj_is_kind_of(other, cVADParams)) {
|
||||
return Qfalse;
|
||||
}
|
||||
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp1);
|
||||
TypedData_Get_Struct(other, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp2);
|
||||
|
||||
if (rwvp1->params.threshold != rwvp2->params.threshold) {
|
||||
return Qfalse;
|
||||
}
|
||||
if (rwvp1->params.min_speech_duration_ms != rwvp2->params.min_speech_duration_ms) {
|
||||
return Qfalse;
|
||||
}
|
||||
if (rwvp1->params.min_silence_duration_ms != rwvp2->params.min_silence_duration_ms) {
|
||||
return Qfalse;
|
||||
}
|
||||
if (rwvp1->params.max_speech_duration_s != rwvp2->params.max_speech_duration_s) {
|
||||
return Qfalse;
|
||||
}
|
||||
if (rwvp1->params.speech_pad_ms != rwvp2->params.speech_pad_ms) {
|
||||
return Qfalse;
|
||||
}
|
||||
if (rwvp1->params.samples_overlap != rwvp2->params.samples_overlap) {
|
||||
return Qfalse;
|
||||
}
|
||||
|
||||
return Qtrue;
|
||||
}
|
||||
|
||||
#define SET_PARAM_IF_SAME(param_name) \
|
||||
if (id == id_ ## param_name) { \
|
||||
ruby_whisper_vad_params_set_ ## param_name(self, value); \
|
||||
continue; \
|
||||
}
|
||||
|
||||
VALUE
|
||||
ruby_whisper_vad_params_initialize(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
VALUE kw_hash;
|
||||
VALUE values[NUM_PARAMS] = {Qundef};
|
||||
VALUE value;
|
||||
ruby_whisper_vad_params *rwvp;
|
||||
ID id;
|
||||
int i;
|
||||
|
||||
TypedData_Get_Struct(self, ruby_whisper_vad_params, &ruby_whisper_vad_params_type, rwvp);
|
||||
|
||||
rb_scan_args_kw(RB_SCAN_ARGS_KEYWORDS, argc, argv, ":", &kw_hash);
|
||||
if (NIL_P(kw_hash)) {
|
||||
return self;
|
||||
}
|
||||
|
||||
rb_get_kwargs(kw_hash, param_names, 0, NUM_PARAMS, values);
|
||||
|
||||
for (i = 0; i < NUM_PARAMS; i++) {
|
||||
id = param_names[i];
|
||||
value = values[i];
|
||||
if (value == Qundef) {
|
||||
continue;
|
||||
}
|
||||
SET_PARAM_IF_SAME(threshold)
|
||||
SET_PARAM_IF_SAME(min_speech_duration_ms)
|
||||
SET_PARAM_IF_SAME(min_silence_duration_ms)
|
||||
SET_PARAM_IF_SAME(max_speech_duration_s)
|
||||
SET_PARAM_IF_SAME(speech_pad_ms)
|
||||
SET_PARAM_IF_SAME(samples_overlap)
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
#undef SET_PARAM_IF_SAME
|
||||
|
||||
void
|
||||
init_ruby_whisper_vad_params(VALUE *mVAD)
|
||||
{
|
||||
cVADParams = rb_define_class_under(*mVAD, "Params", rb_cObject);
|
||||
rb_define_alloc_func(cVADParams, ruby_whisper_vad_params_s_allocate);
|
||||
rb_define_method(cVADParams, "initialize", ruby_whisper_vad_params_initialize, -1);
|
||||
|
||||
DEFINE_PARAM(threshold, 0)
|
||||
DEFINE_PARAM(min_speech_duration_ms, 1)
|
||||
DEFINE_PARAM(min_silence_duration_ms, 2)
|
||||
DEFINE_PARAM(max_speech_duration_s, 3)
|
||||
DEFINE_PARAM(speech_pad_ms, 4)
|
||||
DEFINE_PARAM(samples_overlap, 5)
|
||||
|
||||
rb_define_method(cVADParams, "==", ruby_whisper_vad_params_equal, 1);
|
||||
}
|
||||
|
||||
#undef DEFINE_PARAM
|
||||
#undef NUM_PARAMS
|
@ -1,8 +0,0 @@
|
||||
set(GRAPHVIZ_EXECUTABLES FALSE)
|
||||
set(GRAPHVIZ_STATIC_LIBS TRUE)
|
||||
set(GRAPHVIZ_SHARED_LIBS FALSE)
|
||||
set(GRAPHVIZ_MODULE_LIBS FALSE)
|
||||
set(GRAPHVIZ_INTERFACE_LIBS FALSE)
|
||||
set(GRAPHVIZ_OBJECT_LIBS FALSE)
|
||||
set(GRAPHVIZ_UNKNOWN_LIBS FALSE)
|
||||
set(GRAPHVIZ_GENERATE_DEPENDERS FALSE)
|
@ -1,40 +0,0 @@
|
||||
require "pathname"
|
||||
|
||||
root = Pathname("..")/".."
|
||||
ignored_dirs = %w[
|
||||
.devops
|
||||
.github
|
||||
ci
|
||||
examples/wchess/wchess.wasm
|
||||
examples/whisper.android
|
||||
examples/whisper.android.java
|
||||
examples/whisper.objc
|
||||
examples/whisper.swiftui
|
||||
grammars
|
||||
models
|
||||
samples
|
||||
scripts
|
||||
].collect {|dir| root/dir}
|
||||
ignored_files = %w[
|
||||
AUTHORS
|
||||
Makefile
|
||||
README.md
|
||||
README_sycl.md
|
||||
.gitignore
|
||||
.gitmodules
|
||||
.dockerignore
|
||||
whisper.nvim
|
||||
twitch.sh
|
||||
yt-wsp.sh
|
||||
close-issue.yml
|
||||
]
|
||||
|
||||
EXTSOURCES =
|
||||
`git ls-files -z #{root}`.split("\x0")
|
||||
.collect {|file| Pathname(file)}
|
||||
.reject {|file|
|
||||
ignored_dirs.any? {|dir| file.descend.any? {|desc| desc == dir}} ||
|
||||
ignored_files.include?(file.basename.to_path) ||
|
||||
(file.descend.to_a[1] != root && file.descend.to_a[1] != Pathname("..")/"javascript")
|
||||
}
|
||||
.collect(&:to_path)
|
@ -1,15 +0,0 @@
|
||||
module Whisper
|
||||
class Context
|
||||
def to_srt
|
||||
each_segment.with_index.reduce("") {|srt, (segment, index)|
|
||||
srt << "#{index + 1}\n#{segment.to_srt_cue}\n"
|
||||
}
|
||||
end
|
||||
|
||||
def to_webvtt
|
||||
each_segment.with_index.reduce("WEBVTT\n\n") {|webvtt, (segment, index)|
|
||||
webvtt << "#{index + 1}\n#{segment.to_webvtt_cue}\n"
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
@ -1,233 +0,0 @@
|
||||
require "uri"
|
||||
require "net/http"
|
||||
require "time"
|
||||
require "pathname"
|
||||
require "io/console/size"
|
||||
|
||||
module Whisper
|
||||
class Model
|
||||
class URI
|
||||
def initialize(uri)
|
||||
@uri = URI(uri)
|
||||
end
|
||||
|
||||
def to_path
|
||||
cache
|
||||
cache_path.to_path
|
||||
end
|
||||
|
||||
def clear_cache
|
||||
path = cache_path
|
||||
path.delete if path.exist?
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def cache_path
|
||||
base_cache_dir/@uri.host/@uri.path[1..]
|
||||
end
|
||||
|
||||
def base_cache_dir
|
||||
base = case RUBY_PLATFORM
|
||||
when /mswin|mingw/
|
||||
ENV.key?("LOCALAPPDATA") ? Pathname(ENV["LOCALAPPDATA"]) : Pathname(Dir.home)/"AppData/Local"
|
||||
when /darwin/
|
||||
Pathname(Dir.home)/"Library/Caches"
|
||||
else
|
||||
ENV.key?("XDG_CACHE_HOME") ? Pathname(ENV["XDG_CACHE_HOME"]) : Pathname(Dir.home)/".cache"
|
||||
end
|
||||
base/"whisper.cpp"
|
||||
end
|
||||
|
||||
def cache
|
||||
path = cache_path
|
||||
headers = {}
|
||||
headers["if-modified-since"] = path.mtime.httpdate if path.exist?
|
||||
request @uri, headers
|
||||
path
|
||||
end
|
||||
|
||||
def request(uri, headers)
|
||||
Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == "https" do |http|
|
||||
request = Net::HTTP::Get.new(uri, headers)
|
||||
http.request request do |response|
|
||||
case response
|
||||
when Net::HTTPNotModified
|
||||
# noop
|
||||
when Net::HTTPOK
|
||||
return if !response.key?("last-modified") && cache_path.exist?
|
||||
|
||||
download response
|
||||
when Net::HTTPRedirection
|
||||
request URI(response["location"]), headers
|
||||
else
|
||||
return if headers.key?("if-modified-since") # Use cache file
|
||||
|
||||
raise "#{response.code} #{response.message}\n#{response.body}"
|
||||
end
|
||||
end
|
||||
end
|
||||
rescue => err
|
||||
if cache_path.exist?
|
||||
warn err
|
||||
# Use cache file
|
||||
else
|
||||
raise
|
||||
end
|
||||
end
|
||||
|
||||
def download(response)
|
||||
path = cache_path
|
||||
path.dirname.mkpath unless path.dirname.exist?
|
||||
downloading_path = Pathname("#{path}.downloading")
|
||||
size = response.content_length
|
||||
downloading_path.open "wb" do |file|
|
||||
downloaded = 0
|
||||
response.read_body do |chunk|
|
||||
file << chunk
|
||||
downloaded += chunk.bytesize
|
||||
show_progress downloaded, size
|
||||
end
|
||||
$stderr.puts
|
||||
end
|
||||
downloading_path.rename path
|
||||
end
|
||||
|
||||
def show_progress(current, size)
|
||||
progress_rate_available = size && $stderr.tty?
|
||||
|
||||
unless @prev
|
||||
@prev = Time.now
|
||||
$stderr.puts "Downloading #{@uri} to #{cache_path}"
|
||||
end
|
||||
|
||||
now = Time.now
|
||||
|
||||
if progress_rate_available
|
||||
return if now - @prev < 1 && current < size
|
||||
|
||||
progress_width = 20
|
||||
progress = current.to_f / size
|
||||
arrow_length = progress * progress_width
|
||||
arrow = "=" * (arrow_length - 1) + ">" + " " * (progress_width - arrow_length)
|
||||
line = "[#{arrow}] (#{format_bytesize(current)} / #{format_bytesize(size)})"
|
||||
padding = ' ' * ($stderr.winsize[1] - line.size)
|
||||
$stderr.print "\r#{line}#{padding}"
|
||||
else
|
||||
return if now - @prev < 1
|
||||
|
||||
$stderr.print "."
|
||||
end
|
||||
@prev = now
|
||||
end
|
||||
|
||||
def format_bytesize(bytesize)
|
||||
return "0.0 B" if bytesize.zero?
|
||||
|
||||
units = %w[B KiB MiB GiB TiB]
|
||||
exp = (Math.log(bytesize) / Math.log(1024)).to_i
|
||||
format("%.1f %s", bytesize.to_f / 1024 ** exp, units[exp])
|
||||
end
|
||||
end
|
||||
|
||||
class ZipURI < URI
|
||||
def cache
|
||||
zip_path = super
|
||||
dest = unzipped_path
|
||||
return if dest.exist? && dest.mtime >= zip_path.mtime
|
||||
escaping dest do
|
||||
system "unzip", "-q", "-d", zip_path.dirname.to_path, zip_path.to_path, exception: true
|
||||
end
|
||||
zip_path
|
||||
end
|
||||
|
||||
def clear_cache
|
||||
super
|
||||
unzipped_path.rmtree if unzipped_path.exist?
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def unzipped_path
|
||||
cache_path.sub_ext("")
|
||||
end
|
||||
|
||||
def escaping(path)
|
||||
escaped = Pathname("#{path}.removing")
|
||||
if path.exist?
|
||||
escaped.rmtree if escaped.exist?
|
||||
path.rename escaped
|
||||
end
|
||||
yield
|
||||
ensure
|
||||
if path.exist?
|
||||
escaped.rmtree if escaped.exist?
|
||||
else
|
||||
escaped.rename path if escaped.exist?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@pre_converted_models = %w[
|
||||
tiny
|
||||
tiny.en
|
||||
tiny-q5_1
|
||||
tiny.en-q5_1
|
||||
tiny-q8_0
|
||||
base
|
||||
base.en
|
||||
base-q5_1
|
||||
base.en-q5_1
|
||||
base-q8_0
|
||||
small
|
||||
small.en
|
||||
small.en-tdrz
|
||||
small-q5_1
|
||||
small.en-q5_1
|
||||
small-q8_0
|
||||
medium
|
||||
medium.en
|
||||
medium-q5_0
|
||||
medium.en-q5_0
|
||||
medium-q8_0
|
||||
large-v1
|
||||
large-v2
|
||||
large-v2-q5_0
|
||||
large-v2-q8_0
|
||||
large-v3
|
||||
large-v3-q5_0
|
||||
large-v3-turbo
|
||||
large-v3-turbo-q5_0
|
||||
large-v3-turbo-q8_0
|
||||
].each_with_object({}) {|name, models|
|
||||
models[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
|
||||
}
|
||||
|
||||
%w[
|
||||
silero-v5.1.2
|
||||
].each do |name|
|
||||
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
|
||||
end
|
||||
|
||||
@coreml_compiled_models = %w[
|
||||
tiny
|
||||
tiny.en
|
||||
base
|
||||
base.en
|
||||
small
|
||||
small.en
|
||||
medium
|
||||
medium.en
|
||||
large-v1
|
||||
large-v2
|
||||
large-v3
|
||||
large-v3-turbo
|
||||
].each_with_object({}) do |name, models|
|
||||
models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
|
||||
end
|
||||
|
||||
class << self
|
||||
attr_reader :pre_converted_models, :coreml_compiled_models
|
||||
end
|
||||
end
|
||||
end
|
@ -1,58 +0,0 @@
|
||||
module Whisper
|
||||
class Segment
|
||||
SRT_ESCAPES = {
|
||||
"&" => "&",
|
||||
"<" => "<",
|
||||
">" => ">",
|
||||
}
|
||||
SRT_ESCAPES_RE = Regexp.union(SRT_ESCAPES.keys)
|
||||
private_constant :SRT_ESCAPES, :SRT_ESCAPES_RE
|
||||
|
||||
def to_srt_cue
|
||||
"#{srt_start_time} --> #{srt_end_time}\n#{srt_text}\n"
|
||||
end
|
||||
|
||||
def to_webvtt_cue
|
||||
"#{webvtt_start_time} --> #{webvtt_end_time}\n#{webvtt_text}\n"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def time_to_a(time)
|
||||
sec, decimal_part = time.divmod(1000)
|
||||
min, sec = sec.divmod(60)
|
||||
hour, min = min.divmod(60)
|
||||
[hour, min, sec, decimal_part]
|
||||
end
|
||||
|
||||
def srt_time(time)
|
||||
"%02d:%02d:%02d,%03d" % time_to_a(time)
|
||||
end
|
||||
|
||||
def srt_start_time
|
||||
srt_time(start_time)
|
||||
end
|
||||
|
||||
def srt_end_time
|
||||
srt_time(end_time)
|
||||
end
|
||||
|
||||
def srt_text
|
||||
text.gsub(SRT_ESCAPES_RE, SRT_ESCAPES)
|
||||
end
|
||||
|
||||
def webvtt_time(time)
|
||||
"%02d:%02d:%02d.%03d" % time_to_a(time)
|
||||
end
|
||||
|
||||
def webvtt_start_time
|
||||
webvtt_time(start_time)
|
||||
end
|
||||
|
||||
def webvtt_end_time
|
||||
webvtt_time(end_time)
|
||||
end
|
||||
|
||||
alias webvtt_text srt_text
|
||||
end
|
||||
end
|
@ -1,509 +0,0 @@
|
||||
module Whisper
|
||||
interface _Samples
|
||||
def length: () -> Integer
|
||||
def each: { (Float) -> void } -> void
|
||||
end
|
||||
|
||||
type log_callback = ^(Integer level, String message, Object user_data) -> void
|
||||
type new_segment_callback = ^(Whisper::Context, void, Integer n_new, Object user_data) -> void
|
||||
type progress_callback = ^(Whisper::Context, void, Integer progress, Object user_data) -> void
|
||||
type encoder_begin_callback = ^(Whisper::Context, void, Object user_data) -> void
|
||||
type abort_callback = ^(Whisper::Context, void, Object user_data) -> boolish
|
||||
|
||||
LOG_LEVEL_NONE: Integer
|
||||
LOG_LEVEL_INFO: Integer
|
||||
LOG_LEVEL_WARN: Integer
|
||||
LOG_LEVEL_ERROR: Integer
|
||||
LOG_LEVEL_DEBUG: Integer
|
||||
LOG_LEVEL_CONT: Integer
|
||||
|
||||
def self.lang_max_id: () -> Integer
|
||||
def self.lang_id: (string name) -> Integer
|
||||
def self.lang_str: (Integer id) -> String
|
||||
def self.lang_str_full: (Integer id) -> String
|
||||
def self.log_set: (log_callback, Object? user_data) -> log_callback
|
||||
def self.system_info_str: () -> String
|
||||
|
||||
class Context
|
||||
def self.new: (String | path | ::URI::HTTP) -> instance
|
||||
|
||||
# transcribe a single file
|
||||
# can emit to a block results
|
||||
#
|
||||
# params = Whisper::Params.new
|
||||
# params.duration = 60_000
|
||||
# whisper.transcribe "path/to/audio.wav", params do |text|
|
||||
# puts text
|
||||
# end
|
||||
#
|
||||
def transcribe: (string, Params, ?n_processors: Integer) -> self
|
||||
| (string, Params, ?n_processors: Integer) { (String) -> void } -> self
|
||||
|
||||
def model_n_vocab: () -> Integer
|
||||
def model_n_audio_ctx: () -> Integer
|
||||
def model_n_audio_state: () -> Integer
|
||||
def model_n_text_head: () -> Integer
|
||||
def model_n_text_layer: () -> Integer
|
||||
def model_n_mels: () -> Integer
|
||||
def model_ftype: () -> Integer
|
||||
def model_type: () -> String
|
||||
|
||||
# Yields each Whisper::Segment:
|
||||
#
|
||||
# whisper.transcribe("path/to/audio.wav", params)
|
||||
# whisper.each_segment do |segment|
|
||||
# puts segment.text
|
||||
# end
|
||||
#
|
||||
# Returns an Enumerator if no block given:
|
||||
#
|
||||
# whisper.transcribe("path/to/audio.wav", params)
|
||||
# enum = whisper.each_segment
|
||||
# enum.to_a # => [#<Whisper::Segment>, ...]
|
||||
#
|
||||
def each_segment: { (Segment) -> void } -> void
|
||||
| () -> Enumerator[Segment]
|
||||
|
||||
def model: () -> Model
|
||||
def full_get_segment: (Integer nth) -> Segment
|
||||
def full_n_segments: () -> Integer
|
||||
|
||||
# Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
|
||||
#
|
||||
def full_lang_id: () -> Integer
|
||||
|
||||
# Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||
#
|
||||
# full_get_segment_t0(3) # => 1668 (16680 ms)
|
||||
#
|
||||
def full_get_segment_t0: (Integer) -> Integer
|
||||
|
||||
# End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||
#
|
||||
# full_get_segment_t1(3) # => 1668 (16680 ms)
|
||||
#
|
||||
def full_get_segment_t1: (Integer) -> Integer
|
||||
|
||||
# Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
|
||||
#
|
||||
# full_get_segment_speacker_turn_next(3) # => true
|
||||
#
|
||||
def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
|
||||
|
||||
# Text of a segment indexed by +segment_index+.
|
||||
#
|
||||
# full_get_segment_text(3) # => "ask not what your country can do for you, ..."
|
||||
#
|
||||
def full_get_segment_text: (Integer) -> String
|
||||
|
||||
def full_get_segment_no_speech_prob: (Integer) -> Float
|
||||
|
||||
# Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||
# Not thread safe for same context
|
||||
# Uses the specified decoding strategy to obtain the text.
|
||||
#
|
||||
# The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
|
||||
#
|
||||
def full: (Params, Array[Float] samples, ?Integer n_samples) -> self
|
||||
| (Params, _Samples, ?Integer n_samples) -> self
|
||||
|
||||
# Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||
# Result is stored in the default state of the context
|
||||
# Not thread safe if executed in parallel on the same context.
|
||||
# It seems this approach can offer some speedup in some cases.
|
||||
# However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||
#
|
||||
def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
|
||||
| (Params, _Samples, ?Integer n_samples) -> self
|
||||
| (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
|
||||
|
||||
def to_srt: () -> String
|
||||
def to_webvtt: () -> String
|
||||
end
|
||||
|
||||
class Params
|
||||
def self.new: (
|
||||
?language: string,
|
||||
?translate: boolish,
|
||||
?no_context: boolish,
|
||||
?single_segment: boolish,
|
||||
?print_special: boolish,
|
||||
?print_progress: boolish,
|
||||
?print_realtime: boolish,
|
||||
?print_timestamps: boolish,
|
||||
?suppress_blank: boolish,
|
||||
?suppress_nst: boolish,
|
||||
?token_timestamps: boolish,
|
||||
?split_on_word: boolish,
|
||||
?initial_prompt: string | nil,
|
||||
?diarize: boolish,
|
||||
?offset: Integer,
|
||||
?duration: Integer,
|
||||
?max_text_tokens: Integer,
|
||||
?temperature: Float,
|
||||
?max_initial_ts: Float,
|
||||
?length_penalty: Float,
|
||||
?temperature_inc: Float,
|
||||
?entropy_thold: Float,
|
||||
?logprob_thold: Float,
|
||||
?no_speech_thold: Float,
|
||||
?new_segment_callback: new_segment_callback,
|
||||
?new_segment_callback_user_data: Object,
|
||||
?progress_callback: progress_callback,
|
||||
?progress_callback_user_data: Object,
|
||||
?encoder_begin_callback: encoder_begin_callback,
|
||||
?encoder_begin_callback_user_data: Object,
|
||||
?abort_callback: abort_callback,
|
||||
?abort_callback_user_data: Object,
|
||||
?vad: boolish,
|
||||
?vad_model_path: path | URI,
|
||||
?vad_params: Whisper::VAD::Params
|
||||
) -> instance
|
||||
|
||||
# params.language = "auto" | "en", etc...
|
||||
#
|
||||
def language=: (String) -> String # TODO: Enumerate lang names
|
||||
|
||||
def language: () -> String
|
||||
def translate=: (boolish) -> boolish
|
||||
def translate: () -> (true | false)
|
||||
def no_context=: (boolish) -> boolish
|
||||
|
||||
# If true, does not use past transcription (if any) as initial prompt for the decoder.
|
||||
#
|
||||
def no_context: () -> (true | false)
|
||||
|
||||
def single_segment=: (boolish) -> boolish
|
||||
|
||||
# If true, forces single segment output (useful for streaming).
|
||||
#
|
||||
def single_segment: () -> (true | false)
|
||||
|
||||
def print_special=: (boolish) -> boolish
|
||||
|
||||
# If true, prints special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.).
|
||||
#
|
||||
def print_special: () -> (true | false)
|
||||
|
||||
def print_progress=: (boolish) -> boolish
|
||||
|
||||
# If true, prints progress information.
|
||||
#
|
||||
def print_progress: () -> (true | false)
|
||||
|
||||
def print_realtime=: (boolish) -> boolish
|
||||
|
||||
# If true, prints results from within whisper.cpp. (avoid it, use callback instead)
|
||||
#
|
||||
def print_realtime: () -> (true | false)
|
||||
|
||||
# If true, prints timestamps for each text segment when printing realtime.
|
||||
#
|
||||
def print_timestamps=: (boolish) -> boolish
|
||||
|
||||
def print_timestamps: () -> (true | false)
|
||||
|
||||
def suppress_blank=: (boolish) -> boolish
|
||||
|
||||
# If true, suppresses blank outputs.
|
||||
#
|
||||
def suppress_blank: () -> (true | false)
|
||||
|
||||
def suppress_nst=: (boolish) -> boolish
|
||||
|
||||
# If true, suppresses non-speech-tokens.
|
||||
#
|
||||
def suppress_nst: () -> (true | false)
|
||||
|
||||
def token_timestamps=: (boolish) -> boolish
|
||||
|
||||
# If true, enables token-level timestamps.
|
||||
#
|
||||
def token_timestamps: () -> (true | false)
|
||||
|
||||
def split_on_word=: (boolish) -> boolish
|
||||
|
||||
# If true, split on word rather than on token (when used with max_len).
|
||||
#
|
||||
def split_on_word: () -> (true | false)
|
||||
|
||||
def initial_prompt=: (_ToS) -> _ToS
|
||||
|
||||
# Tokens to provide to the whisper decoder as initial prompt
|
||||
# these are prepended to any existing text context from a previous call
|
||||
# use whisper_tokenize() to convert text to tokens.
|
||||
# Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224).
|
||||
#
|
||||
def initial_prompt: () -> (String | nil)
|
||||
|
||||
def diarize=: (boolish) -> boolish
|
||||
|
||||
# If true, enables diarization.
|
||||
#
|
||||
def diarize: () -> (true | false)
|
||||
|
||||
def offset=: (Integer) -> Integer
|
||||
|
||||
# Start offset in ms.
|
||||
#
|
||||
def offset: () -> Integer
|
||||
|
||||
def duration=: (Integer) -> Integer
|
||||
|
||||
# Audio duration to process in ms.
|
||||
#
|
||||
def duration: () -> Integer
|
||||
|
||||
def max_text_tokens=: (Integer) -> Integer
|
||||
|
||||
# Max tokens to use from past text as prompt for the decoder.
|
||||
#
|
||||
def max_text_tokens: () -> Integer
|
||||
|
||||
def temperature=: (Float) -> Float
|
||||
def temperature: () -> Float
|
||||
def max_initial_ts=: (Float) -> Float
|
||||
|
||||
# See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
|
||||
#
|
||||
def max_initial_ts: () -> Float
|
||||
|
||||
def length_penalty=: (Float) -> Float
|
||||
def length_penalty: () -> Float
|
||||
def temperature_inc=: (Float) -> Float
|
||||
def temperature_inc: () -> Float
|
||||
def entropy_thold=: (Float) -> Float
|
||||
|
||||
# Similar to OpenAI's "compression_ratio_threshold"
|
||||
#
|
||||
def entropy_thold: () -> Float
|
||||
|
||||
def logprob_thold=: (Float) -> Float
|
||||
def logprob_thold: () -> Float
|
||||
def no_speech_thold=: (Float) -> Float
|
||||
def no_speech_thold: () -> Float
|
||||
|
||||
# Sets new segment callback, called for every newly generated text segment.
|
||||
#
|
||||
# params.new_segment_callback = ->(context, _, n_new, user_data) {
|
||||
# # ...
|
||||
# }
|
||||
#
|
||||
def new_segment_callback=: (new_segment_callback) -> new_segment_callback
|
||||
def new_segment_callback: () -> (new_segment_callback | nil)
|
||||
|
||||
# Sets user data passed to the last argument of new segment callback.
|
||||
#
|
||||
def new_segment_callback_user_data=: (Object) -> Object
|
||||
|
||||
def new_segment_callback_user_data: () -> Object
|
||||
|
||||
# Sets progress callback, called on each progress update.
|
||||
#
|
||||
# params.new_segment_callback = ->(context, _, progress, user_data) {
|
||||
# # ...
|
||||
# }
|
||||
#
|
||||
# +progress+ is an Integer between 0 and 100.
|
||||
#
|
||||
def progress_callback=: (progress_callback) -> progress_callback
|
||||
|
||||
def progress_callback: () -> (progress_callback | nil)
|
||||
|
||||
# Sets user data passed to the last argument of progress callback.
|
||||
#
|
||||
def progress_callback_user_data=: (Object) -> Object
|
||||
|
||||
def progress_callback_user_data: () -> Object
|
||||
|
||||
# Sets encoder begin callback, called when the encoder starts.
|
||||
#
|
||||
def encoder_begin_callback=: (encoder_begin_callback) -> encoder_begin_callback
|
||||
|
||||
def encoder_begin_callback: () -> (encoder_begin_callback | nil)
|
||||
|
||||
# Sets user data passed to the last argument of encoder begin callback.
|
||||
#
|
||||
def encoder_begin_callback_user_data=: (Object) -> Object
|
||||
|
||||
def encoder_begin_callback_user_data: () -> Object
|
||||
|
||||
# Sets abort callback, called to check if the process should be aborted.
|
||||
#
|
||||
# params.abort_callback = ->(user_data) {
|
||||
# # ...
|
||||
# }
|
||||
#
|
||||
#
|
||||
def abort_callback=: (abort_callback) -> abort_callback
|
||||
|
||||
def abort_callback: () -> (abort_callback | nil)
|
||||
|
||||
# Sets user data passed to the last argument of abort callback.
|
||||
#
|
||||
def abort_callback_user_data=: (Object) -> Object
|
||||
|
||||
def abort_callback_user_data: () -> Object
|
||||
|
||||
# Enable VAD
|
||||
#
|
||||
def vad=: (boolish) -> boolish
|
||||
|
||||
def vad: () -> (true | false)
|
||||
|
||||
# Path to the VAD model
|
||||
def vad_model_path=: (path | URI | nil) -> (path | URI | nil)
|
||||
|
||||
def vad_model_path: () -> (String | nil)
|
||||
|
||||
def vad_params=: (Whisper::VAD::Params) -> Whisper::VAD::Params
|
||||
def vad_params: () -> (Whisper::VAD::Params)
|
||||
|
||||
# Hook called on new segment. Yields each Whisper::Segment.
|
||||
#
|
||||
# whisper.on_new_segment do |segment|
|
||||
# # ...
|
||||
# end
|
||||
#
|
||||
def on_new_segment: { (Segment) -> void } -> void
|
||||
|
||||
# Hook called on progress update. Yields each progress Integer between 0 and 100.
|
||||
#
|
||||
def on_progress: { (Integer progress) -> void } -> void
|
||||
|
||||
# Hook called on encoder starts.
|
||||
#
|
||||
def on_encoder_begin: { () -> void } -> void
|
||||
|
||||
# Call block to determine whether abort or not. Return +true+ when you want to abort.
|
||||
#
|
||||
# params.abort_on do
|
||||
# if some_condition
|
||||
# true # abort
|
||||
# else
|
||||
# false # continue
|
||||
# end
|
||||
# end
|
||||
#
|
||||
def abort_on: { (Object user_data) -> boolish } -> void
|
||||
end
|
||||
|
||||
class Model
|
||||
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
||||
def self.coreml_compiled_models: () -> Hash[Model::URI, Model::ZipURI]
|
||||
def self.new: () -> instance
|
||||
def n_vocab: () -> Integer
|
||||
def n_audio_ctx: () -> Integer
|
||||
def n_audio_state: () -> Integer
|
||||
def n_audio_head: () -> Integer
|
||||
def n_audio_layer: () -> Integer
|
||||
def n_text_ctx: () -> Integer
|
||||
def n_text_state: () -> Integer
|
||||
def n_text_head: () -> Integer
|
||||
def n_text_layer: () -> Integer
|
||||
def n_mels: () -> Integer
|
||||
def ftype: () -> Integer
|
||||
def type: () -> String
|
||||
|
||||
class URI
|
||||
def self.new: (string | ::URI::HTTP) -> instance
|
||||
def to_path: -> String
|
||||
def clear_cache: -> void
|
||||
end
|
||||
|
||||
class ZipURI < URI
|
||||
def cache: () -> Pathname
|
||||
def clear_cache: () -> void
|
||||
end
|
||||
end
|
||||
|
||||
class Segment
|
||||
type deconstructed_keys = {
|
||||
start_time: (Integer | nil),
|
||||
end_time: (Integer | nil),
|
||||
text: (String | nil),
|
||||
no_speech_prob: (Float | nil),
|
||||
speaker_turn_next: (true | false | nil)
|
||||
}
|
||||
|
||||
# Start time in milliseconds.
|
||||
#
|
||||
def start_time: () -> Integer
|
||||
|
||||
# End time in milliseconds.
|
||||
#
|
||||
def end_time: () -> Integer
|
||||
|
||||
# Whether the next segment is predicted as a speaker turn.
|
||||
def speaker_turn_next?: () -> (true | false)
|
||||
|
||||
def text: () -> String
|
||||
def no_speech_prob: () -> Float
|
||||
def to_srt_cue: () -> String
|
||||
def to_webvtt_cue: () -> String
|
||||
|
||||
# Possible keys: :start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next
|
||||
#
|
||||
# whisper.each_segment do |segment|
|
||||
# segment => {start_time:, end_time:, text:, no_speech_prob:, speaker_turn_next:}
|
||||
#
|
||||
# puts "[#{start_time} --> #{end_time}] #{text} (no speech prob: #{no_speech_prob}#{speaker_turn_next ? ', speaker turns next' : ''})"
|
||||
# end
|
||||
def deconstruct_keys: (Array[:start_time | :end_time | :text | :no_speech_prob | :speaker_turn_next] | nil) -> deconstructed_keys
|
||||
end
|
||||
|
||||
module VAD
|
||||
class Params
|
||||
def self.new: (
|
||||
?threshold: Float,
|
||||
?min_speech_duration_ms: Integer,
|
||||
?min_silence_duration_ms: Integer,
|
||||
?max_speech_duration_s: Float,
|
||||
?speech_pad_ms: Integer,
|
||||
?samples_overlap: Float
|
||||
) -> instance
|
||||
|
||||
# Probability threshold to consider as speech.
|
||||
#
|
||||
def threshold=: (Float) -> Float
|
||||
|
||||
def threshold: () -> Float
|
||||
|
||||
# Min duration for a valid speech segment.
|
||||
#
|
||||
def min_speech_duration_ms=: (Integer) -> Integer
|
||||
|
||||
def min_speech_duration_ms: () -> Integer
|
||||
|
||||
# Min silence duration to consider speech as ended.
|
||||
#
|
||||
def min_silence_duration_ms=: (Integer) -> Integer
|
||||
|
||||
def min_silence_duration_ms: () -> Integer
|
||||
|
||||
# Max duration of a speech segment before forcing a new segment.
|
||||
def max_speech_duration_s=: (Float) -> Float
|
||||
|
||||
def max_speech_duration_s: () -> Float
|
||||
|
||||
# Padding added before and after speech segments.
|
||||
#
|
||||
def speech_pad_ms=: (Integer) -> Integer
|
||||
|
||||
def speech_pad_ms: () -> Integer
|
||||
|
||||
# Overlap in seconds when copying audio samples from speech segment.
|
||||
#
|
||||
def samples_overlap=: (Float) -> Float
|
||||
|
||||
def samples_overlap: () -> Float
|
||||
def ==: (Params) -> (true | false)
|
||||
end
|
||||
end
|
||||
|
||||
class Error < StandardError
|
||||
attr_reader code: Integer
|
||||
|
||||
def self.new: (Integer code) -> instance
|
||||
end
|
||||
end
|
@ -1,24 +0,0 @@
|
||||
require "test/unit"
|
||||
require "whisper"
|
||||
require_relative "jfk_reader/jfk_reader"
|
||||
|
||||
class TestBase < Test::Unit::TestCase
|
||||
AUDIO = File.join(__dir__, "fixtures", "jfk.wav")
|
||||
|
||||
class << self
|
||||
def whisper
|
||||
return @whisper if @whisper
|
||||
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
@whisper.transcribe(TestBase::AUDIO, params)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def whisper
|
||||
self.class.whisper
|
||||
end
|
||||
end
|
5
bindings/ruby/test/jfk_reader/.gitignore
vendored
5
bindings/ruby/test/jfk_reader/.gitignore
vendored
@ -1,5 +0,0 @@
|
||||
Makefile
|
||||
jfk_reader.o
|
||||
jfk_reader.so
|
||||
jfk_reader.bundle
|
||||
jfk_reader.dll
|
@ -1,3 +0,0 @@
|
||||
require "mkmf"
|
||||
|
||||
create_makefile("jfk_reader")
|
@ -1,68 +0,0 @@
|
||||
#include <ruby.h>
|
||||
#include <ruby/memory_view.h>
|
||||
#include <ruby/encoding.h>
|
||||
|
||||
static VALUE
|
||||
jfk_reader_initialize(VALUE self, VALUE audio_path)
|
||||
{
|
||||
rb_iv_set(self, "audio_path", audio_path);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
static bool
|
||||
jfk_reader_get_memory_view(const VALUE obj, rb_memory_view_t *view, int flags)
|
||||
{
|
||||
VALUE audio_path = rb_iv_get(obj, "audio_path");
|
||||
const char *audio_path_str = StringValueCStr(audio_path);
|
||||
const int n_samples = 176000;
|
||||
float *data = (float *)malloc(n_samples * sizeof(float));
|
||||
short *samples = (short *)malloc(n_samples * sizeof(short));
|
||||
FILE *file = fopen(audio_path_str, "rb");
|
||||
|
||||
fseek(file, 78, SEEK_SET);
|
||||
fread(samples, sizeof(short), n_samples, file);
|
||||
fclose(file);
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
data[i] = samples[i]/32768.0;
|
||||
}
|
||||
|
||||
view->obj = obj;
|
||||
view->data = (void *)data;
|
||||
view->byte_size = sizeof(float) * n_samples;
|
||||
view->readonly = true;
|
||||
view->format = "f";
|
||||
view->item_size = sizeof(float);
|
||||
view->item_desc.components = NULL;
|
||||
view->item_desc.length = 0;
|
||||
view->ndim = 1;
|
||||
view->shape = NULL;
|
||||
view->sub_offsets = NULL;
|
||||
view->private_data = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
jfk_reader_release_memory_view(const VALUE obj, rb_memory_view_t *view)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
jfk_reader_memory_view_available_p(const VALUE obj)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static const rb_memory_view_entry_t jfk_reader_view_entry = {
|
||||
jfk_reader_get_memory_view,
|
||||
jfk_reader_release_memory_view,
|
||||
jfk_reader_memory_view_available_p
|
||||
};
|
||||
|
||||
void Init_jfk_reader(void)
|
||||
{
|
||||
VALUE cJFKReader = rb_define_class("JFKReader", rb_cObject);
|
||||
rb_memory_view_register(cJFKReader, &jfk_reader_view_entry);
|
||||
rb_define_method(cJFKReader, "initialize", jfk_reader_initialize, 1);
|
||||
}
|
@ -1,202 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestCallback < TestBase
|
||||
def setup
|
||||
GC.start
|
||||
@params = Whisper::Params.new
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
@audio = File.join(AUDIO)
|
||||
end
|
||||
|
||||
def test_new_segment_callback
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
assert_kind_of Integer, n_new
|
||||
assert n_new > 0
|
||||
assert_same @whisper, context
|
||||
|
||||
n_segments = context.full_n_segments
|
||||
n_new.times do |i|
|
||||
i_segment = n_segments - 1 + i
|
||||
start_time = context.full_get_segment_t0(i_segment) * 10
|
||||
end_time = context.full_get_segment_t1(i_segment) * 10
|
||||
text = context.full_get_segment_text(i_segment)
|
||||
|
||||
assert_kind_of Integer, start_time
|
||||
assert start_time >= 0
|
||||
assert_kind_of Integer, end_time
|
||||
assert end_time > 0
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text) if i_segment == 0
|
||||
end
|
||||
}
|
||||
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_new_segment_callback_closure
|
||||
search_word = "what"
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
n_segments = context.full_n_segments
|
||||
n_new.times do |i|
|
||||
i_segment = n_segments - 1 + i
|
||||
text = context.full_get_segment_text(i_segment)
|
||||
if text.include?(search_word)
|
||||
t0 = context.full_get_segment_t0(i_segment)
|
||||
t1 = context.full_get_segment_t1(i_segment)
|
||||
raise "search word '#{search_word}' found at between #{t0} and #{t1}"
|
||||
end
|
||||
end
|
||||
}
|
||||
|
||||
assert_raise RuntimeError do
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
end
|
||||
|
||||
def test_new_segment_callback_user_data
|
||||
udata = Object.new
|
||||
@params.new_segment_callback_user_data = udata
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
assert_same udata, user_data
|
||||
}
|
||||
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_new_segment_callback_user_data_gc
|
||||
@params.new_segment_callback_user_data = "My user data"
|
||||
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||
assert_equal "My user data", user_data
|
||||
}
|
||||
GC.start
|
||||
|
||||
assert_same @whisper, @whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_progress_callback
|
||||
first = nil
|
||||
last = nil
|
||||
@params.progress_callback = ->(context, state, progress, user_data) {
|
||||
assert_kind_of Integer, progress
|
||||
assert 0 <= progress && progress <= 100
|
||||
assert_same @whisper, context
|
||||
first = progress if first.nil?
|
||||
last = progress
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_equal 0, first
|
||||
assert_equal 100, last
|
||||
end
|
||||
|
||||
def test_progress_callback_user_data
|
||||
udata = Object.new
|
||||
@params.progress_callback_user_data = udata
|
||||
@params.progress_callback = ->(context, state, n_new, user_data) {
|
||||
assert_same udata, user_data
|
||||
}
|
||||
|
||||
@whisper.transcribe(@audio, @params)
|
||||
end
|
||||
|
||||
def test_on_progress
|
||||
first = nil
|
||||
last = nil
|
||||
@params.on_progress do |progress|
|
||||
assert_kind_of Integer, progress
|
||||
assert 0 <= progress && progress <= 100
|
||||
first = progress if first.nil?
|
||||
last = progress
|
||||
end
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_equal 0, first
|
||||
assert_equal 100, last
|
||||
end
|
||||
|
||||
def test_encoder_begin_callback
|
||||
i = 0
|
||||
@params.encoder_begin_callback = ->(context, state, user_data) {
|
||||
i += 1
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert i > 0
|
||||
end
|
||||
|
||||
def test_encoder_begin_callback_abort
|
||||
logs = []
|
||||
Whisper.log_set -> (level, buffer, user_data) {
|
||||
logs << buffer if level == Whisper::LOG_LEVEL_ERROR
|
||||
}, logs
|
||||
@params.encoder_begin_callback = ->(context, state, user_data) {
|
||||
return false
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_match(/encoder_begin_callback returned false - aborting/, logs.join)
|
||||
Whisper.log_set ->(level, buffer, user_data) {}, nil
|
||||
end
|
||||
|
||||
def test_encoder_begin_callback_user_data
|
||||
udata = Object.new
|
||||
@params.encoder_begin_callback_user_data = udata
|
||||
yielded = nil
|
||||
@params.encoder_begin_callback = ->(context, state, user_data) {
|
||||
yielded = user_data
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_same udata, yielded
|
||||
end
|
||||
|
||||
def test_on_encoder_begin
|
||||
i = 0
|
||||
@params.on_encoder_begin do
|
||||
i += 1
|
||||
end
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert i > 0
|
||||
end
|
||||
|
||||
def test_abort_callback
|
||||
i = 0
|
||||
@params.abort_callback = ->(user_data) {
|
||||
assert_nil user_data
|
||||
i += 1
|
||||
return false
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert i > 0
|
||||
end
|
||||
|
||||
def test_abort_callback_abort
|
||||
i = 0
|
||||
@params.abort_callback = ->(user_data) {
|
||||
i += 1
|
||||
return i == 3
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_equal 3, i
|
||||
end
|
||||
|
||||
def test_abort_callback_user_data
|
||||
udata = Object.new
|
||||
@params.abort_callback_user_data = udata
|
||||
yielded = nil
|
||||
@params.abort_callback = ->(user_data) {
|
||||
yielded = user_data
|
||||
}
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert_same udata, yielded
|
||||
end
|
||||
|
||||
def test_abort_on
|
||||
do_abort = false
|
||||
_aborted_from_callback = false
|
||||
@params.on_new_segment do |segment|
|
||||
do_abort = true if segment.text.match?(/ask/)
|
||||
end
|
||||
i = 0
|
||||
@params.abort_on do
|
||||
i += 1
|
||||
do_abort
|
||||
end
|
||||
@whisper.transcribe(@audio, @params)
|
||||
assert i > 0
|
||||
end
|
||||
end
|
@ -1,20 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestError < TestBase
|
||||
def test_error
|
||||
error = Whisper::Error.new(-2)
|
||||
assert_equal "failed to compute log mel spectrogram", error.message
|
||||
assert_equal(-2, error.code)
|
||||
end
|
||||
|
||||
def test_unknown_error
|
||||
error = Whisper::Error.new(-20)
|
||||
assert_equal "unknown error", error.message
|
||||
end
|
||||
|
||||
def test_non_int_code
|
||||
assert_raise TypeError do
|
||||
_error = Whisper::Error.new("non int")
|
||||
end
|
||||
end
|
||||
end
|
@ -1,118 +0,0 @@
|
||||
require_relative "helper"
|
||||
require "pathname"
|
||||
|
||||
class TestModel < TestBase
|
||||
def test_model
|
||||
whisper = Whisper::Context.new("base.en")
|
||||
assert_instance_of Whisper::Model, whisper.model
|
||||
end
|
||||
|
||||
def test_attributes
|
||||
whisper = Whisper::Context.new("base.en")
|
||||
model = whisper.model
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_gc
|
||||
model = Whisper::Context.new("base.en").model
|
||||
GC.start
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_pathname
|
||||
path = Pathname(Whisper::Model.pre_converted_models["base.en"].to_path)
|
||||
whisper = Whisper::Context.new(path)
|
||||
model = whisper.model
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_auto_download
|
||||
path = Whisper::Model.pre_converted_models["base.en"].to_path
|
||||
|
||||
assert_path_exist path
|
||||
assert_equal 147964211, File.size(path)
|
||||
end
|
||||
|
||||
def test_uri_string
|
||||
path = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"
|
||||
whisper = Whisper::Context.new(path)
|
||||
model = whisper.model
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_uri
|
||||
path = URI("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin")
|
||||
whisper = Whisper::Context.new(path)
|
||||
model = whisper.model
|
||||
|
||||
assert_equal 51864, model.n_vocab
|
||||
assert_equal 1500, model.n_audio_ctx
|
||||
assert_equal 512, model.n_audio_state
|
||||
assert_equal 8, model.n_audio_head
|
||||
assert_equal 6, model.n_audio_layer
|
||||
assert_equal 448, model.n_text_ctx
|
||||
assert_equal 512, model.n_text_state
|
||||
assert_equal 8, model.n_text_head
|
||||
assert_equal 6, model.n_text_layer
|
||||
assert_equal 80, model.n_mels
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_coreml_model_auto_download
|
||||
uri = Whisper::Model.coreml_compiled_models[Whisper::Model.pre_converted_models["tiny"]]
|
||||
model_path = Pathname(uri.to_path).sub_ext("")
|
||||
model_path.rmtree if model_path.exist?
|
||||
|
||||
uri.cache
|
||||
assert_path_exist model_path
|
||||
end
|
||||
end
|
@ -1,50 +0,0 @@
|
||||
require_relative "helper"
|
||||
require 'tempfile'
|
||||
require 'tmpdir'
|
||||
require 'shellwords'
|
||||
|
||||
class TestPackage < TestBase
|
||||
def test_build
|
||||
Tempfile.create do |file|
|
||||
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
||||
assert file.size > 0
|
||||
assert_path_exist file.to_path
|
||||
end
|
||||
end
|
||||
|
||||
sub_test_case "Building binary on installation" do
|
||||
def setup
|
||||
system "rake", "build", exception: true
|
||||
end
|
||||
|
||||
def test_install
|
||||
gemspec = Gem::Specification.load("whispercpp.gemspec")
|
||||
Dir.mktmpdir do |dir|
|
||||
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", exception: true
|
||||
assert_installed dir, gemspec.version
|
||||
end
|
||||
end
|
||||
|
||||
def test_install_with_coreml
|
||||
omit_unless RUBY_PLATFORM.match?(/darwin/) do
|
||||
gemspec = Gem::Specification.load("whispercpp.gemspec")
|
||||
Dir.mktmpdir do |dir|
|
||||
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", "--", "--enable-whisper-coreml", exception: true
|
||||
assert_installed dir, gemspec.version
|
||||
assert_nothing_raised do
|
||||
libdir = File.join(dir, "gems", "#{gemspec.name}-#{gemspec.version}", "lib")
|
||||
system "ruby", "-I", libdir, "-r", "whisper", "-e", "Whisper::Context.new('tiny')", exception: true
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_installed(dir, version)
|
||||
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", "whisper.#{RbConfig::CONFIG["DLEXT"]}")
|
||||
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE")
|
||||
assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build")
|
||||
end
|
||||
end
|
||||
end
|
@ -1,297 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestParams < TestBase
|
||||
PARAM_NAMES = [
|
||||
:language,
|
||||
:translate,
|
||||
:no_context,
|
||||
:single_segment,
|
||||
:print_special,
|
||||
:print_progress,
|
||||
:print_realtime,
|
||||
:print_timestamps,
|
||||
:suppress_blank,
|
||||
:suppress_nst,
|
||||
:token_timestamps,
|
||||
:split_on_word,
|
||||
:initial_prompt,
|
||||
:diarize,
|
||||
:offset,
|
||||
:duration,
|
||||
:max_text_tokens,
|
||||
:temperature,
|
||||
:max_initial_ts,
|
||||
:length_penalty,
|
||||
:temperature_inc,
|
||||
:entropy_thold,
|
||||
:logprob_thold,
|
||||
:no_speech_thold,
|
||||
:new_segment_callback,
|
||||
:new_segment_callback_user_data,
|
||||
:progress_callback,
|
||||
:progress_callback_user_data,
|
||||
:abort_callback,
|
||||
:abort_callback_user_data,
|
||||
:vad,
|
||||
:vad_model_path,
|
||||
:vad_params,
|
||||
]
|
||||
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
|
||||
def test_language
|
||||
@params.language = "en"
|
||||
assert_equal @params.language, "en"
|
||||
@params.language = "auto"
|
||||
assert_equal @params.language, "auto"
|
||||
end
|
||||
|
||||
def test_offset
|
||||
@params.offset = 10_000
|
||||
assert_equal @params.offset, 10_000
|
||||
@params.offset = 0
|
||||
assert_equal @params.offset, 0
|
||||
end
|
||||
|
||||
def test_duration
|
||||
@params.duration = 60_000
|
||||
assert_equal @params.duration, 60_000
|
||||
@params.duration = 0
|
||||
assert_equal @params.duration, 0
|
||||
end
|
||||
|
||||
def test_max_text_tokens
|
||||
@params.max_text_tokens = 300
|
||||
assert_equal @params.max_text_tokens, 300
|
||||
@params.max_text_tokens = 0
|
||||
assert_equal @params.max_text_tokens, 0
|
||||
end
|
||||
|
||||
def test_translate
|
||||
@params.translate = true
|
||||
assert @params.translate
|
||||
@params.translate = false
|
||||
assert !@params.translate
|
||||
end
|
||||
|
||||
def test_no_context
|
||||
@params.no_context = true
|
||||
assert @params.no_context
|
||||
@params.no_context = false
|
||||
assert !@params.no_context
|
||||
end
|
||||
|
||||
def test_single_segment
|
||||
@params.single_segment = true
|
||||
assert @params.single_segment
|
||||
@params.single_segment = false
|
||||
assert !@params.single_segment
|
||||
end
|
||||
|
||||
def test_print_special
|
||||
@params.print_special = true
|
||||
assert @params.print_special
|
||||
@params.print_special = false
|
||||
assert !@params.print_special
|
||||
end
|
||||
|
||||
def test_print_progress
|
||||
@params.print_progress = true
|
||||
assert @params.print_progress
|
||||
@params.print_progress = false
|
||||
assert !@params.print_progress
|
||||
end
|
||||
|
||||
def test_print_realtime
|
||||
@params.print_realtime = true
|
||||
assert @params.print_realtime
|
||||
@params.print_realtime = false
|
||||
assert !@params.print_realtime
|
||||
end
|
||||
|
||||
def test_print_timestamps
|
||||
@params.print_timestamps = true
|
||||
assert @params.print_timestamps
|
||||
@params.print_timestamps = false
|
||||
assert !@params.print_timestamps
|
||||
end
|
||||
|
||||
def test_suppress_blank
|
||||
@params.suppress_blank = true
|
||||
assert @params.suppress_blank
|
||||
@params.suppress_blank = false
|
||||
assert !@params.suppress_blank
|
||||
end
|
||||
|
||||
def test_suppress_nst
|
||||
@params.suppress_nst = true
|
||||
assert @params.suppress_nst
|
||||
@params.suppress_nst = false
|
||||
assert !@params.suppress_nst
|
||||
end
|
||||
|
||||
def test_token_timestamps
|
||||
@params.token_timestamps = true
|
||||
assert @params.token_timestamps
|
||||
@params.token_timestamps = false
|
||||
assert !@params.token_timestamps
|
||||
end
|
||||
|
||||
def test_split_on_word
|
||||
@params.split_on_word = true
|
||||
assert @params.split_on_word
|
||||
@params.split_on_word = false
|
||||
assert !@params.split_on_word
|
||||
end
|
||||
|
||||
def test_initial_prompt
|
||||
assert_nil @params.initial_prompt
|
||||
@params.initial_prompt = "You are a polite person."
|
||||
assert_equal "You are a polite person.", @params.initial_prompt
|
||||
end
|
||||
|
||||
def test_temperature
|
||||
assert_equal 0.0, @params.temperature
|
||||
@params.temperature = 0.5
|
||||
assert_equal 0.5, @params.temperature
|
||||
end
|
||||
|
||||
def test_max_initial_ts
|
||||
assert_equal 1.0, @params.max_initial_ts
|
||||
@params.max_initial_ts = 600.0
|
||||
assert_equal 600.0, @params.max_initial_ts
|
||||
end
|
||||
|
||||
def test_length_penalty
|
||||
assert_equal(-1.0, @params.length_penalty)
|
||||
@params.length_penalty = 0.5
|
||||
assert_equal 0.5, @params.length_penalty
|
||||
end
|
||||
|
||||
def test_temperature_inc
|
||||
assert_in_delta 0.2, @params.temperature_inc
|
||||
@params.temperature_inc = 0.5
|
||||
assert_in_delta 0.5, @params.temperature_inc
|
||||
end
|
||||
|
||||
def test_entropy_thold
|
||||
assert_in_delta 2.4, @params.entropy_thold
|
||||
@params.entropy_thold = 3.0
|
||||
assert_in_delta 3.0, @params.entropy_thold
|
||||
end
|
||||
|
||||
def test_logprob_thold
|
||||
assert_in_delta(-1.0, @params.logprob_thold)
|
||||
@params.logprob_thold = -0.5
|
||||
assert_in_delta(-0.5, @params.logprob_thold)
|
||||
end
|
||||
|
||||
def test_no_speech_thold
|
||||
assert_in_delta 0.6, @params.no_speech_thold
|
||||
@params.no_speech_thold = 0.2
|
||||
assert_in_delta 0.2, @params.no_speech_thold
|
||||
end
|
||||
|
||||
def test_vad
|
||||
assert_false @params.vad
|
||||
@params.vad = true
|
||||
assert_true @params.vad
|
||||
end
|
||||
|
||||
def test_vad_model_path
|
||||
assert_nil @params.vad_model_path
|
||||
@params.vad_model_path = "silero-v5.1.2"
|
||||
assert_equal Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path, @params.vad_model_path
|
||||
end
|
||||
|
||||
def test_vad_model_path_with_nil
|
||||
@params.vad_model_path = "silero-v5.1.2"
|
||||
@params.vad_model_path = nil
|
||||
assert_nil @params.vad_model_path
|
||||
end
|
||||
|
||||
def test_vad_model_path_with_invalid
|
||||
assert_raise TypeError do
|
||||
@params.vad_model_path = Object.new
|
||||
end
|
||||
end
|
||||
|
||||
def test_vad_model_path_with_URI_string
|
||||
@params.vad_model_path = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin"
|
||||
assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path
|
||||
end
|
||||
|
||||
def test_vad_model_path_with_URI
|
||||
@params.vad_model_path = URI("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin")
|
||||
assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path
|
||||
end
|
||||
|
||||
def test_vad_params
|
||||
assert_kind_of Whisper::VAD::Params, @params.vad_params
|
||||
default_params = @params.vad_params
|
||||
assert_same default_params, @params.vad_params
|
||||
assert_equal 0.5, default_params.threshold
|
||||
new_params = Whisper::VAD::Params.new
|
||||
@params.vad_params = new_params
|
||||
assert_same new_params, @params.vad_params
|
||||
end
|
||||
|
||||
def test_new_with_kw_args
|
||||
params = Whisper::Params.new(language: "es")
|
||||
assert_equal "es", params.language
|
||||
assert_equal 1.0, params.max_initial_ts
|
||||
end
|
||||
|
||||
def test_new_with_kw_args_non_existent
|
||||
assert_raise ArgumentError do
|
||||
Whisper::Params.new(non_existent: "value")
|
||||
end
|
||||
end
|
||||
|
||||
def test_new_with_kw_args_wrong_type
|
||||
assert_raise TypeError do
|
||||
Whisper::Params.new(language: 3)
|
||||
end
|
||||
end
|
||||
|
||||
data(PARAM_NAMES.collect {|param| [param, param]}.to_h)
|
||||
def test_new_with_kw_args_default_values(param)
|
||||
default_value = @params.send(param)
|
||||
value = case [param, default_value]
|
||||
in [*, true | false]
|
||||
!default_value
|
||||
in [*, Integer | Float]
|
||||
default_value + 1
|
||||
in [:language, *]
|
||||
"es"
|
||||
in [:initial_prompt, *]
|
||||
"Initial prompt"
|
||||
in [/_callback\Z/, *]
|
||||
proc {}
|
||||
in [/_user_data\Z/, *]
|
||||
Object.new
|
||||
in [:vad_model_path, *]
|
||||
Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path
|
||||
in [:vad_params, *]
|
||||
Whisper::VAD::Params.new
|
||||
end
|
||||
params = Whisper::Params.new(param => value)
|
||||
if Float === value
|
||||
assert_in_delta value, params.send(param)
|
||||
else
|
||||
assert_equal value, params.send(param)
|
||||
end
|
||||
|
||||
PARAM_NAMES.reject {|name| name == param}.each do |name|
|
||||
expected = @params.send(name)
|
||||
actual = params.send(name)
|
||||
if Float === expected
|
||||
assert_in_delta expected, actual
|
||||
else
|
||||
assert_equal expected, actual
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -1,136 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestSegment < TestBase
|
||||
def test_iteration
|
||||
whisper.each_segment do |segment|
|
||||
assert_instance_of Whisper::Segment, segment
|
||||
end
|
||||
end
|
||||
|
||||
def test_enumerator
|
||||
enum = whisper.each_segment
|
||||
assert_instance_of Enumerator, enum
|
||||
enum.to_a.each_with_index do |segment, index|
|
||||
assert_instance_of Whisper::Segment, segment
|
||||
assert_kind_of Integer, index
|
||||
end
|
||||
end
|
||||
|
||||
def test_start_time
|
||||
i = 0
|
||||
whisper.each_segment do |segment|
|
||||
assert_equal 0, segment.start_time if i == 0
|
||||
i += 1
|
||||
end
|
||||
end
|
||||
|
||||
def test_end_time
|
||||
i = 0
|
||||
whisper.each_segment do |segment|
|
||||
assert_equal whisper.full_get_segment_t1(i) * 10, segment.end_time
|
||||
i += 1
|
||||
end
|
||||
end
|
||||
|
||||
def test_no_speech_prob
|
||||
no_speech_prob = nil
|
||||
whisper.each_segment do |segment|
|
||||
no_speech_prob = segment.no_speech_prob
|
||||
end
|
||||
assert no_speech_prob > 0.0
|
||||
end
|
||||
|
||||
def test_on_new_segment
|
||||
params = Whisper::Params.new
|
||||
seg = nil
|
||||
index = 0
|
||||
params.on_new_segment do |segment|
|
||||
assert_instance_of Whisper::Segment, segment
|
||||
if index == 0
|
||||
seg = segment
|
||||
assert_equal 0, segment.start_time
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
whisper.transcribe(AUDIO, params)
|
||||
assert_equal 0, seg.start_time
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, seg.text)
|
||||
end
|
||||
|
||||
def test_on_new_segment_twice
|
||||
params = Whisper::Params.new
|
||||
seg = nil
|
||||
params.on_new_segment do |segment|
|
||||
seg = segment
|
||||
return
|
||||
end
|
||||
params.on_new_segment do |segment|
|
||||
assert_same seg, segment
|
||||
return
|
||||
end
|
||||
whisper.transcribe(AUDIO, params)
|
||||
end
|
||||
|
||||
def test_pattern_matching
|
||||
segment = whisper.each_segment.first
|
||||
segment => {start_time:, end_time:, text:, no_speech_prob:, speaker_turn_next:}
|
||||
|
||||
assert_equal segment.start_time, start_time
|
||||
assert_equal segment.end_time, end_time
|
||||
assert_equal segment.text, text
|
||||
assert_equal segment.no_speech_prob, no_speech_prob
|
||||
assert_equal segment.speaker_turn_next?, speaker_turn_next
|
||||
end
|
||||
|
||||
def test_pattern_matching_partial
|
||||
segment = whisper.each_segment.first
|
||||
segment => {start_time:, end_time:, text:}
|
||||
|
||||
assert_equal segment.start_time, start_time
|
||||
assert_equal segment.end_time, end_time
|
||||
assert_equal segment.text, text
|
||||
end
|
||||
|
||||
def test_deconstruct_keys
|
||||
segment = whisper.each_segment.first
|
||||
expected = {
|
||||
start_time: segment.start_time,
|
||||
end_time: segment.end_time,
|
||||
text: segment.text,
|
||||
no_speech_prob: segment.no_speech_prob,
|
||||
speaker_turn_next: segment.speaker_turn_next?
|
||||
}
|
||||
assert_equal expected, segment.deconstruct_keys([:start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next])
|
||||
end
|
||||
|
||||
def test_deconstruct_keys_non_existent
|
||||
omit "Undefined behavior"
|
||||
|
||||
segment = whisper.each_segment.first
|
||||
|
||||
assert_equal({}, segment.deconstruct_keys([:non_existent]))
|
||||
end
|
||||
|
||||
def test_deconstruct_keys_too_many_keys
|
||||
omit "Undefined behavior"
|
||||
|
||||
segment = whisper.each_segment.first
|
||||
|
||||
assert_equal({}, segment.deconstruct_keys([:start_time, :end_time, :text, :no_speech_prob, :speaker_turn_next, :extra_key]))
|
||||
end
|
||||
|
||||
def test_deconstruct_keys_includes_non_existent_keys_not_too_many
|
||||
omit "Undefined behavior"
|
||||
|
||||
segment = whisper.each_segment.first
|
||||
|
||||
expected = {
|
||||
start_time: segment.start_time,
|
||||
end_time: segment.end_time,
|
||||
text: segment.text,
|
||||
no_speech_prob: segment.no_speech_prob
|
||||
}
|
||||
assert_equal(expected, segment.deconstruct_keys([:start_time, :end_time, :text, :no_speech_prob, :non_existent]))
|
||||
end
|
||||
end
|
@ -1,19 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestVAD < TestBase
|
||||
def setup
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
vad_params = Whisper::VAD::Params.new
|
||||
@params = Whisper::Params.new(
|
||||
vad: true,
|
||||
vad_model_path: "silero-v5.1.2",
|
||||
vad_params:
|
||||
)
|
||||
end
|
||||
|
||||
def test_transcribe
|
||||
@whisper.transcribe(TestBase::AUDIO, @params) do |text|
|
||||
assert_match(/ask not what your country can do for you[,.] ask what you can do for your country/i, text)
|
||||
end
|
||||
end
|
||||
end
|
@ -1,103 +0,0 @@
|
||||
require_relative "helper"
|
||||
|
||||
class TestVADParams < TestBase
|
||||
PARAM_NAMES = [
|
||||
:threshold,
|
||||
:min_speech_duration_ms,
|
||||
:min_silence_duration_ms,
|
||||
:max_speech_duration_s,
|
||||
:speech_pad_ms,
|
||||
:samples_overlap
|
||||
]
|
||||
|
||||
def setup
|
||||
@params = Whisper::VAD::Params.new
|
||||
end
|
||||
|
||||
def test_new
|
||||
params = Whisper::VAD::Params.new
|
||||
assert_kind_of Whisper::VAD::Params, params
|
||||
end
|
||||
|
||||
def test_threshold
|
||||
assert_in_delta @params.threshold, 0.5
|
||||
@params.threshold = 0.7
|
||||
assert_in_delta @params.threshold, 0.7
|
||||
end
|
||||
|
||||
def test_min_speech_duration
|
||||
pend
|
||||
end
|
||||
|
||||
def test_min_speech_duration_ms
|
||||
assert_equal 250, @params.min_speech_duration_ms
|
||||
@params.min_speech_duration_ms = 500
|
||||
assert_equal 500, @params.min_speech_duration_ms
|
||||
end
|
||||
|
||||
def test_min_silence_duration_ms
|
||||
assert_equal 100, @params.min_silence_duration_ms
|
||||
@params.min_silence_duration_ms = 200
|
||||
assert_equal 200, @params.min_silence_duration_ms
|
||||
end
|
||||
|
||||
def test_max_speech_duration
|
||||
pend
|
||||
end
|
||||
|
||||
def test_max_speech_duration_s
|
||||
assert @params.max_speech_duration_s >= 10e37 # Defaults to FLT_MAX
|
||||
@params.max_speech_duration_s = 60.0
|
||||
assert_equal 60.0, @params.max_speech_duration_s
|
||||
end
|
||||
|
||||
def test_speech_pad_ms
|
||||
assert_equal 30, @params.speech_pad_ms
|
||||
@params.speech_pad_ms = 50
|
||||
assert_equal 50, @params.speech_pad_ms
|
||||
end
|
||||
|
||||
def test_samples_overlap
|
||||
assert_in_delta @params.samples_overlap, 0.1
|
||||
@params.samples_overlap = 0.5
|
||||
assert_in_delta @params.samples_overlap, 0.5
|
||||
end
|
||||
|
||||
def test_equal
|
||||
assert_equal @params, Whisper::VAD::Params.new
|
||||
end
|
||||
|
||||
def test_new_with_kw_args
|
||||
params = Whisper::VAD::Params.new(threshold: 0.7)
|
||||
assert_in_delta params.threshold, 0.7
|
||||
assert_equal 250, params.min_speech_duration_ms
|
||||
end
|
||||
|
||||
def test_new_with_kw_args_non_existent
|
||||
assert_raise ArgumentError do
|
||||
Whisper::VAD::Params.new(non_existent: "value")
|
||||
end
|
||||
end
|
||||
|
||||
data(PARAM_NAMES.collect {|param| [param, param]}.to_h)
|
||||
def test_new_with_kw_args_default_values(param)
|
||||
default_value = @params.send(param)
|
||||
value = default_value + 1
|
||||
params = Whisper::VAD::Params.new(param => value)
|
||||
if Float === value
|
||||
assert_in_delta value, params.send(param)
|
||||
else
|
||||
assert_equal value, params.send(param)
|
||||
end
|
||||
|
||||
PARAM_NAMES.reject {|name| name == param}.each do |name|
|
||||
expected = @params.send(name)
|
||||
actual = params.send(name)
|
||||
if Float === expected
|
||||
assert_in_delta expected, actual
|
||||
else
|
||||
assert_equal expected, actual
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -1,292 +0,0 @@
|
||||
require_relative "helper"
|
||||
require "stringio"
|
||||
require "etc"
|
||||
|
||||
# Exists to detect memory-related bug
|
||||
Whisper.log_set ->(level, buffer, user_data) {}, nil
|
||||
|
||||
class TestWhisper < TestBase
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
|
||||
def test_whisper
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
|
||||
@whisper.transcribe(AUDIO, params) {|text|
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text)
|
||||
}
|
||||
end
|
||||
|
||||
def test_transcribe_non_parallel
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
params = Whisper::Params.new
|
||||
|
||||
@whisper.transcribe(AUDIO, params, n_processors: 1) {|text|
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, text)
|
||||
}
|
||||
end
|
||||
|
||||
def test_transcribe_n_processors
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
params = Whisper::Params.new
|
||||
|
||||
@whisper.transcribe(AUDIO, params, n_processors: 4) {|text|
|
||||
assert_match(/ask not what your country can do for you[,.] ask what you can do for your country/i, text)
|
||||
}
|
||||
end
|
||||
|
||||
sub_test_case "After transcription" do
|
||||
def test_full_n_segments
|
||||
assert_equal 1, whisper.full_n_segments
|
||||
end
|
||||
|
||||
def test_full_lang_id
|
||||
assert_equal 0, whisper.full_lang_id
|
||||
end
|
||||
|
||||
def test_full_get_segment
|
||||
segment = whisper.full_get_segment(0)
|
||||
assert_equal 0, segment.start_time
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, segment.text)
|
||||
end
|
||||
|
||||
def test_full_get_segment_t0
|
||||
assert_equal 0, whisper.full_get_segment_t0(0)
|
||||
assert_raise IndexError do
|
||||
whisper.full_get_segment_t0(whisper.full_n_segments)
|
||||
end
|
||||
assert_raise IndexError do
|
||||
whisper.full_get_segment_t0(-1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_get_segment_t1
|
||||
t1 = whisper.full_get_segment_t1(0)
|
||||
assert_kind_of Integer, t1
|
||||
assert t1 > 0
|
||||
assert_raise IndexError do
|
||||
whisper.full_get_segment_t1(whisper.full_n_segments)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_get_segment_speaker_turn_next
|
||||
assert_false whisper.full_get_segment_speaker_turn_next(0)
|
||||
end
|
||||
|
||||
def test_full_get_segment_text
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, whisper.full_get_segment_text(0))
|
||||
end
|
||||
|
||||
def test_full_get_segment_no_speech_prob
|
||||
prob = whisper.full_get_segment_no_speech_prob(0)
|
||||
assert prob > 0.0
|
||||
assert prob < 1.0
|
||||
end
|
||||
end
|
||||
|
||||
def test_lang_max_id
|
||||
assert_kind_of Integer, Whisper.lang_max_id
|
||||
end
|
||||
|
||||
def test_lang_id
|
||||
assert_equal 0, Whisper.lang_id("en")
|
||||
assert_raise ArgumentError do
|
||||
Whisper.lang_id("non existing language")
|
||||
end
|
||||
end
|
||||
|
||||
def test_lang_str
|
||||
assert_equal "en", Whisper.lang_str(0)
|
||||
assert_raise IndexError do
|
||||
Whisper.lang_str(Whisper.lang_max_id + 1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_lang_str_full
|
||||
assert_equal "english", Whisper.lang_str_full(0)
|
||||
assert_raise IndexError do
|
||||
Whisper.lang_str_full(Whisper.lang_max_id + 1)
|
||||
end
|
||||
end
|
||||
|
||||
def test_system_info_str
|
||||
assert_match(/\AWHISPER : COREML = \d | OPENVINO = \d |/, Whisper.system_info_str)
|
||||
end
|
||||
|
||||
def test_log_set
|
||||
user_data = Object.new
|
||||
logs = []
|
||||
log_callback = ->(level, buffer, udata) {
|
||||
logs << [level, buffer, udata]
|
||||
}
|
||||
Whisper.log_set log_callback, user_data
|
||||
Whisper::Context.new("base.en")
|
||||
|
||||
assert logs.length > 30
|
||||
logs.each do |log|
|
||||
assert_include [Whisper::LOG_LEVEL_DEBUG, Whisper::LOG_LEVEL_INFO, Whisper::LOG_LEVEL_WARN], log[0]
|
||||
assert_same user_data, log[2]
|
||||
end
|
||||
end
|
||||
|
||||
def test_log_suppress
|
||||
stderr = $stderr
|
||||
Whisper.log_set ->(level, buffer, user_data) {
|
||||
# do nothing
|
||||
}, nil
|
||||
dev = StringIO.new("")
|
||||
$stderr = dev
|
||||
Whisper::Context.new("base.en")
|
||||
assert_empty dev.string
|
||||
ensure
|
||||
$stderr = stderr
|
||||
end
|
||||
|
||||
sub_test_case "full" do
|
||||
def setup
|
||||
super
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
@samples = File.read(AUDIO, nil, 78).unpack("s<*").collect {|i| i.to_f / 2**15}
|
||||
end
|
||||
|
||||
def test_full
|
||||
@whisper.full(@params, @samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_without_length
|
||||
@whisper.full(@params, @samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_enumerator
|
||||
samples = @samples.each
|
||||
@whisper.full(@params, samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_enumerator_without_length
|
||||
samples = @samples.each
|
||||
assert_raise ArgumentError do
|
||||
@whisper.full(@params, samples)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_enumerator_with_too_large_length
|
||||
samples = @samples.each.take(10).to_enum
|
||||
assert_raise StopIteration do
|
||||
@whisper.full(@params, samples, 11)
|
||||
end
|
||||
end
|
||||
|
||||
def test_full_with_memory_view
|
||||
samples = JFKReader.new(AUDIO)
|
||||
@whisper.full(@params, samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, @whisper.each_segment.first.text)
|
||||
end
|
||||
|
||||
def test_full_parallel
|
||||
nprocessors = 2
|
||||
@whisper.full_parallel(@params, @samples, @samples.length, nprocessors)
|
||||
|
||||
assert_equal nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_with_memory_view
|
||||
nprocessors = 2
|
||||
samples = JFKReader.new(AUDIO)
|
||||
@whisper.full_parallel(@params, samples, nil, nprocessors)
|
||||
|
||||
assert_equal nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_without_length_and_n_processors
|
||||
@whisper.full_parallel(@params, @samples)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_without_length
|
||||
nprocessors = 2
|
||||
@whisper.full_parallel(@params, @samples, nil, nprocessors)
|
||||
|
||||
assert_equal nprocessors, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
|
||||
def test_full_parallel_without_n_processors
|
||||
@whisper.full_parallel(@params, @samples, @samples.length)
|
||||
|
||||
assert_equal 1, @whisper.full_n_segments
|
||||
text = @whisper.each_segment.collect(&:text).join
|
||||
assert_match(/ask what you can do/i, text)
|
||||
assert_match(/for your country/i, text)
|
||||
end
|
||||
end
|
||||
|
||||
def test_to_srt
|
||||
whisper = Whisper::Context.new("base.en")
|
||||
whisper.transcribe AUDIO, @params
|
||||
|
||||
lines = whisper.to_srt.lines
|
||||
assert_match(/\A\d+\n/, lines[0])
|
||||
assert_match(/\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n/, lines[1])
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, lines[2])
|
||||
end
|
||||
|
||||
def test_to_webvtt
|
||||
whisper = Whisper::Context.new("base.en")
|
||||
whisper.transcribe AUDIO, @params
|
||||
|
||||
lines = whisper.to_webvtt.lines
|
||||
assert_equal "WEBVTT\n", lines[0]
|
||||
assert_equal "\n", lines[1]
|
||||
assert_match(/\A\d+\n/, lines[2])
|
||||
assert_match(/\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n/, lines[3])
|
||||
assert_match(/ask not what your country can do for you, ask what you can do for your country/, lines[4])
|
||||
end
|
||||
|
||||
sub_test_case "Format needs escape" do
|
||||
def setup
|
||||
@whisper = Whisper::Context.new("base.en")
|
||||
@whisper.transcribe AUDIO, Whisper::Params.new
|
||||
segment = @whisper.each_segment.first
|
||||
segment.define_singleton_method :text do
|
||||
"& so my fellow Americans --> ask not what your country can do for you <-- ask what you can do for your country."
|
||||
end
|
||||
@whisper.define_singleton_method :each_segment do
|
||||
Enumerator.new(3) {|yielder| 3.times {yielder << segment}}
|
||||
end
|
||||
end
|
||||
|
||||
def test_to_srt_escape
|
||||
assert_equal "& so my fellow Americans --> ask not what your country can do for you <-- ask what you can do for your country.\n", @whisper.to_srt.lines[2]
|
||||
end
|
||||
|
||||
def test_to_webvtt_escape
|
||||
assert_equal "& so my fellow Americans --> ask not what your country can do for you <-- ask what you can do for your country.\n", @whisper.to_webvtt.lines[4]
|
||||
end
|
||||
end
|
||||
end
|
138
bindings/ruby/tests/test_whisper.rb
Normal file
138
bindings/ruby/tests/test_whisper.rb
Normal file
@ -0,0 +1,138 @@
|
||||
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
||||
EXTDIR = File.join(TOPDIR, 'ext')
|
||||
#$LIBDIR = File.join(TOPDIR, 'lib')
|
||||
#$:.unshift(LIBDIR)
|
||||
$:.unshift(EXTDIR)
|
||||
|
||||
require 'whisper'
|
||||
require 'test/unit'
|
||||
|
||||
class TestWhisper < Test::Unit::TestCase
|
||||
def setup
|
||||
@params = Whisper::Params.new
|
||||
end
|
||||
|
||||
def test_language
|
||||
@params.language = "en"
|
||||
assert_equal @params.language, "en"
|
||||
@params.language = "auto"
|
||||
assert_equal @params.language, "auto"
|
||||
end
|
||||
|
||||
def test_offset
|
||||
@params.offset = 10_000
|
||||
assert_equal @params.offset, 10_000
|
||||
@params.offset = 0
|
||||
assert_equal @params.offset, 0
|
||||
end
|
||||
|
||||
def test_duration
|
||||
@params.duration = 60_000
|
||||
assert_equal @params.duration, 60_000
|
||||
@params.duration = 0
|
||||
assert_equal @params.duration, 0
|
||||
end
|
||||
|
||||
def test_max_text_tokens
|
||||
@params.max_text_tokens = 300
|
||||
assert_equal @params.max_text_tokens, 300
|
||||
@params.max_text_tokens = 0
|
||||
assert_equal @params.max_text_tokens, 0
|
||||
end
|
||||
|
||||
def test_translate
|
||||
@params.translate = true
|
||||
assert @params.translate
|
||||
@params.translate = false
|
||||
assert !@params.translate
|
||||
end
|
||||
|
||||
def test_no_context
|
||||
@params.no_context = true
|
||||
assert @params.no_context
|
||||
@params.no_context = false
|
||||
assert !@params.no_context
|
||||
end
|
||||
|
||||
def test_single_segment
|
||||
@params.single_segment = true
|
||||
assert @params.single_segment
|
||||
@params.single_segment = false
|
||||
assert !@params.single_segment
|
||||
end
|
||||
|
||||
def test_print_special
|
||||
@params.print_special = true
|
||||
assert @params.print_special
|
||||
@params.print_special = false
|
||||
assert !@params.print_special
|
||||
end
|
||||
|
||||
def test_print_progress
|
||||
@params.print_progress = true
|
||||
assert @params.print_progress
|
||||
@params.print_progress = false
|
||||
assert !@params.print_progress
|
||||
end
|
||||
|
||||
def test_print_realtime
|
||||
@params.print_realtime = true
|
||||
assert @params.print_realtime
|
||||
@params.print_realtime = false
|
||||
assert !@params.print_realtime
|
||||
end
|
||||
|
||||
def test_print_timestamps
|
||||
@params.print_timestamps = true
|
||||
assert @params.print_timestamps
|
||||
@params.print_timestamps = false
|
||||
assert !@params.print_timestamps
|
||||
end
|
||||
|
||||
def test_suppress_blank
|
||||
@params.suppress_blank = true
|
||||
assert @params.suppress_blank
|
||||
@params.suppress_blank = false
|
||||
assert !@params.suppress_blank
|
||||
end
|
||||
|
||||
def test_suppress_non_speech_tokens
|
||||
@params.suppress_non_speech_tokens = true
|
||||
assert @params.suppress_non_speech_tokens
|
||||
@params.suppress_non_speech_tokens = false
|
||||
assert !@params.suppress_non_speech_tokens
|
||||
end
|
||||
|
||||
def test_token_timestamps
|
||||
@params.token_timestamps = true
|
||||
assert @params.token_timestamps
|
||||
@params.token_timestamps = false
|
||||
assert !@params.token_timestamps
|
||||
end
|
||||
|
||||
def test_split_on_word
|
||||
@params.split_on_word = true
|
||||
assert @params.split_on_word
|
||||
@params.split_on_word = false
|
||||
assert !@params.split_on_word
|
||||
end
|
||||
|
||||
def test_speed_up
|
||||
@params.speed_up = true
|
||||
assert @params.speed_up
|
||||
@params.speed_up = false
|
||||
assert !@params.speed_up
|
||||
end
|
||||
|
||||
def test_whisper
|
||||
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
|
||||
params = Whisper::Params.new
|
||||
params.print_timestamps = false
|
||||
|
||||
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
|
||||
@whisper.transcribe(jfk, params) {|text|
|
||||
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
|
||||
}
|
||||
end
|
||||
|
||||
end
|
@ -1,36 +0,0 @@
|
||||
require_relative "extsources"
|
||||
|
||||
Gem::Specification.new do |s|
|
||||
s.name = "whispercpp"
|
||||
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
||||
s.version = '1.3.3'
|
||||
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
|
||||
s.email = 'todd.fisher@gmail.com'
|
||||
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
||||
|
||||
s.files = `git ls-files . -z`.split("\x0") +
|
||||
EXTSOURCES.collect {|file|
|
||||
basename = File.basename(file)
|
||||
if s.extra_rdoc_files.include?(basename)
|
||||
basename
|
||||
else
|
||||
file.sub("../..", "ext/sources")
|
||||
.sub("../javascript", "ext/sources/bindings/javascript")
|
||||
end
|
||||
}
|
||||
|
||||
s.summary = %q{Ruby whisper.cpp bindings}
|
||||
s.test_files = s.files.select {|file| file.start_with? "test/"}
|
||||
|
||||
s.extensions << 'ext/extconf.rb'
|
||||
s.required_ruby_version = '>= 3.1.0'
|
||||
|
||||
#### Documentation and testing.
|
||||
s.homepage = 'https://github.com/ggml-org/whisper.cpp'
|
||||
s.rdoc_options = ['--main', 'README.md']
|
||||
|
||||
|
||||
s.platform = Gem::Platform::RUBY
|
||||
|
||||
s.licenses = ['MIT']
|
||||
end
|
@ -1,547 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Options
|
||||
IOS_MIN_OS_VERSION=16.4
|
||||
MACOS_MIN_OS_VERSION=13.3
|
||||
VISIONOS_MIN_OS_VERSION=1.0
|
||||
TVOS_MIN_OS_VERSION=16.4
|
||||
|
||||
BUILD_SHARED_LIBS=OFF
|
||||
WHISPER_BUILD_EXAMPLES=OFF
|
||||
WHISPER_BUILD_TESTS=OFF
|
||||
WHISPER_BUILD_SERVER=OFF
|
||||
GGML_METAL=ON
|
||||
GGML_METAL_EMBED_LIBRARY=ON
|
||||
GGML_BLAS_DEFAULT=ON
|
||||
GGML_METAL_USE_BF16=ON
|
||||
GGML_OPENMP=OFF
|
||||
|
||||
COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
|
||||
COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
|
||||
|
||||
# Common options for all builds
|
||||
COMMON_CMAKE_ARGS=(
|
||||
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY=""
|
||||
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT="dwarf-with-dsym"
|
||||
-DCMAKE_XCODE_ATTRIBUTE_GCC_GENERATE_DEBUGGING_SYMBOLS=YES
|
||||
-DCMAKE_XCODE_ATTRIBUTE_COPY_PHASE_STRIP=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO
|
||||
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||
-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
|
||||
-DWHISPER_BUILD_EXAMPLES=${WHISPER_BUILD_EXAMPLES}
|
||||
-DWHISPER_BUILD_TESTS=${WHISPER_BUILD_TESTS}
|
||||
-DWHISPER_BUILD_SERVER=${WHISPER_BUILD_SERVER}
|
||||
-DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY}
|
||||
-DGGML_BLAS_DEFAULT=${GGML_BLAS_DEFAULT}
|
||||
-DGGML_METAL=${GGML_METAL}
|
||||
-DGGML_METAL_USE_BF16=${GGML_METAL_USE_BF16}
|
||||
-DGGML_NATIVE=OFF
|
||||
-DGGML_OPENMP=${GGML_OPENMP}
|
||||
)
|
||||
|
||||
XCODE_VERSION=$(xcodebuild -version 2>/dev/null | head -n1 | awk '{ print $2 }')
|
||||
MAJOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f1)
|
||||
MINOR_VERSION=$(echo $XCODE_VERSION | cut -d. -f2)
|
||||
echo "Detected Xcode version: $XCODE_VERSION"
|
||||
|
||||
check_required_tool() {
|
||||
local tool=$1
|
||||
local install_message=$2
|
||||
|
||||
if ! command -v $tool &> /dev/null; then
|
||||
echo "Error: $tool is required but not found."
|
||||
echo "$install_message"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
echo "Checking for required tools..."
|
||||
check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)"
|
||||
check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
|
||||
check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)"
|
||||
check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
|
||||
|
||||
set -e
|
||||
|
||||
## Clean up previous builds
|
||||
rm -rf build-apple
|
||||
rm -rf build-ios-sim
|
||||
rm -rf build-ios-device
|
||||
rm -rf build-macos
|
||||
rm -rf build-visionos
|
||||
rm -rf build-visionos-sim
|
||||
rm -rf build-tvos-sim
|
||||
rm -rf build-tvos-device
|
||||
|
||||
# Setup the xcframework build directory structure
|
||||
setup_framework_structure() {
|
||||
local build_dir=$1
|
||||
local min_os_version=$2
|
||||
local platform=$3 # "ios", "macos", "visionos", or "tvos"
|
||||
local framework_name="whisper"
|
||||
|
||||
echo "Creating ${platform}-style framework structure for ${build_dir}"
|
||||
|
||||
if [[ "$platform" == "macos" ]]; then
|
||||
# macOS versioned structure uses versioned directories
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Headers
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Modules
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Resources
|
||||
|
||||
# Create symbolic links
|
||||
ln -sf A ${build_dir}/framework/${framework_name}.framework/Versions/Current
|
||||
ln -sf Versions/Current/Headers ${build_dir}/framework/${framework_name}.framework/Headers
|
||||
ln -sf Versions/Current/Modules ${build_dir}/framework/${framework_name}.framework/Modules
|
||||
ln -sf Versions/Current/Resources ${build_dir}/framework/${framework_name}.framework/Resources
|
||||
ln -sf Versions/Current/${framework_name} ${build_dir}/framework/${framework_name}.framework/${framework_name}
|
||||
|
||||
# Set header and module paths
|
||||
local header_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Headers/
|
||||
local module_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Modules/
|
||||
else
|
||||
# iOS/VisionOS/tvOS use a flat structure
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Headers
|
||||
mkdir -p ${build_dir}/framework/${framework_name}.framework/Modules
|
||||
|
||||
# Remove any existing structure to ensure clean build
|
||||
rm -rf ${build_dir}/framework/${framework_name}.framework/Versions
|
||||
|
||||
# Set header and module paths
|
||||
local header_path=${build_dir}/framework/${framework_name}.framework/Headers/
|
||||
local module_path=${build_dir}/framework/${framework_name}.framework/Modules/
|
||||
fi
|
||||
|
||||
# Copy all required headers (common for all platforms)
|
||||
cp include/whisper.h ${header_path}
|
||||
cp ggml/include/ggml.h ${header_path}
|
||||
cp ggml/include/ggml-alloc.h ${header_path}
|
||||
cp ggml/include/ggml-backend.h ${header_path}
|
||||
cp ggml/include/ggml-metal.h ${header_path}
|
||||
cp ggml/include/ggml-cpu.h ${header_path}
|
||||
cp ggml/include/ggml-blas.h ${header_path}
|
||||
cp ggml/include/gguf.h ${header_path}
|
||||
|
||||
# Create module map (common for all platforms)
|
||||
cat > ${module_path}module.modulemap << EOF
|
||||
framework module whisper {
|
||||
header "whisper.h"
|
||||
header "ggml.h"
|
||||
header "ggml-alloc.h"
|
||||
header "ggml-backend.h"
|
||||
header "ggml-metal.h"
|
||||
header "ggml-cpu.h"
|
||||
header "ggml-blas.h"
|
||||
header "gguf.h"
|
||||
|
||||
link "c++"
|
||||
link framework "Accelerate"
|
||||
link framework "Metal"
|
||||
link framework "Foundation"
|
||||
|
||||
export *
|
||||
}
|
||||
EOF
|
||||
|
||||
# Platform-specific settings for Info.plist
|
||||
local platform_name=""
|
||||
local sdk_name=""
|
||||
local supported_platform=""
|
||||
|
||||
case "$platform" in
|
||||
"ios")
|
||||
platform_name="iphoneos"
|
||||
sdk_name="iphoneos${min_os_version}"
|
||||
supported_platform="iPhoneOS"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
|
||||
local device_family=' <key>UIDeviceFamily</key>
|
||||
<array>
|
||||
<integer>1</integer>
|
||||
<integer>2</integer>
|
||||
</array>'
|
||||
;;
|
||||
"macos")
|
||||
platform_name="macosx"
|
||||
sdk_name="macosx${min_os_version}"
|
||||
supported_platform="MacOSX"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Versions/A/Resources/Info.plist"
|
||||
local device_family=""
|
||||
;;
|
||||
"visionos")
|
||||
platform_name="xros"
|
||||
sdk_name="xros${min_os_version}"
|
||||
supported_platform="XRPlatform"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
|
||||
local device_family=""
|
||||
;;
|
||||
"tvos")
|
||||
platform_name="appletvos"
|
||||
sdk_name="appletvos${min_os_version}"
|
||||
supported_platform="AppleTVOS"
|
||||
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
|
||||
local device_family=' <key>UIDeviceFamily</key>
|
||||
<array>
|
||||
<integer>3</integer>
|
||||
</array>'
|
||||
;;
|
||||
esac
|
||||
|
||||
# Create Info.plist
|
||||
cat > ${plist_path} << EOF
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>en</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>whisper</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>org.ggml.whisper</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>whisper</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>FMWK</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.0</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>1</string>
|
||||
<key>MinimumOSVersion</key>
|
||||
<string>${min_os_version}</string>
|
||||
<key>CFBundleSupportedPlatforms</key>
|
||||
<array>
|
||||
<string>${supported_platform}</string>
|
||||
</array>${device_family}
|
||||
<key>DTPlatformName</key>
|
||||
<string>${platform_name}</string>
|
||||
<key>DTSDKName</key>
|
||||
<string>${sdk_name}</string>
|
||||
</dict>
|
||||
</plist>
|
||||
EOF
|
||||
}
|
||||
|
||||
# Create dynamic libraries from static libraries.
|
||||
combine_static_libraries() {
|
||||
local build_dir="$1"
|
||||
local release_dir="$2"
|
||||
local platform="$3" # "ios", "macos", "visionos", or "tvos"
|
||||
local is_simulator="$4"
|
||||
local base_dir="$(pwd)"
|
||||
local framework_name="whisper"
|
||||
|
||||
# Determine output path based on platform
|
||||
local output_lib=""
|
||||
if [[ "$platform" == "macos" ]]; then
|
||||
# macOS uses versioned structure
|
||||
output_lib="${build_dir}/framework/${framework_name}.framework/Versions/A/${framework_name}"
|
||||
else
|
||||
# iOS, visionOS, and tvOS use a directory flat structure
|
||||
output_lib="${build_dir}/framework/${framework_name}.framework/${framework_name}"
|
||||
fi
|
||||
|
||||
local libs=(
|
||||
"${base_dir}/${build_dir}/src/${release_dir}/libwhisper.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
|
||||
"${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
|
||||
)
|
||||
if [[ "$platform" == "macos" || "$platform" == "ios" ]]; then
|
||||
echo "Adding libwhisper.coreml library to the build."
|
||||
libs+=(
|
||||
"${base_dir}/${build_dir}/src/${release_dir}/libwhisper.coreml.a"
|
||||
)
|
||||
fi
|
||||
|
||||
# Create temporary directory for processing
|
||||
local temp_dir="${base_dir}/${build_dir}/temp"
|
||||
echo "Creating temporary directory: ${temp_dir}"
|
||||
mkdir -p "${temp_dir}"
|
||||
|
||||
# Since we have multiple architectures libtool will find object files that do not
|
||||
# match the target architecture. We suppress these warnings.
|
||||
libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null
|
||||
|
||||
# Determine SDK, architectures, and install_name based on platform and simulator flag.
|
||||
local sdk=""
|
||||
local archs=""
|
||||
local min_version_flag=""
|
||||
local install_name=""
|
||||
local frameworks="-framework Foundation -framework Metal -framework Accelerate"
|
||||
|
||||
case "$platform" in
|
||||
"ios")
|
||||
if [[ "$is_simulator" == "true" ]]; then
|
||||
sdk="iphonesimulator"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mios-simulator-version-min=${IOS_MIN_OS_VERSION}"
|
||||
else
|
||||
sdk="iphoneos"
|
||||
archs="arm64"
|
||||
min_version_flag="-mios-version-min=${IOS_MIN_OS_VERSION}"
|
||||
fi
|
||||
install_name="@rpath/whisper.framework/whisper"
|
||||
frameworks+=" -framework CoreML"
|
||||
;;
|
||||
"macos")
|
||||
sdk="macosx"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mmacosx-version-min=${MACOS_MIN_OS_VERSION}"
|
||||
install_name="@rpath/whisper.framework/Versions/Current/whisper"
|
||||
frameworks+=" -framework CoreML"
|
||||
;;
|
||||
"visionos")
|
||||
if [[ "$is_simulator" == "true" ]]; then
|
||||
sdk="xrsimulator"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}-simulator"
|
||||
else
|
||||
sdk="xros"
|
||||
archs="arm64"
|
||||
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}"
|
||||
fi
|
||||
# Use flat structure for visionOS, same as iOS
|
||||
install_name="@rpath/whisper.framework/whisper"
|
||||
;;
|
||||
"tvos")
|
||||
if [[ "$is_simulator" == "true" ]]; then
|
||||
sdk="appletvsimulator"
|
||||
archs="arm64 x86_64"
|
||||
min_version_flag="-mtvos-simulator-version-min=${TVOS_MIN_OS_VERSION}"
|
||||
else
|
||||
sdk="appletvos"
|
||||
archs="arm64"
|
||||
min_version_flag="-mtvos-version-min=${TVOS_MIN_OS_VERSION}"
|
||||
fi
|
||||
install_name="@rpath/whisper.framework/whisper"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Build architecture flags
|
||||
local arch_flags=""
|
||||
for arch in $archs; do
|
||||
arch_flags+=" -arch $arch"
|
||||
done
|
||||
|
||||
# Create dynamic library
|
||||
echo "Creating dynamic library for ${platform}."
|
||||
xcrun -sdk $sdk clang++ -dynamiclib \
|
||||
-isysroot $(xcrun --sdk $sdk --show-sdk-path) \
|
||||
$arch_flags \
|
||||
$min_version_flag \
|
||||
-Wl,-force_load,"${temp_dir}/combined.a" \
|
||||
$frameworks \
|
||||
-install_name "$install_name" \
|
||||
-o "${base_dir}/${output_lib}"
|
||||
|
||||
# Platform-specific post-processing for device builds
|
||||
if [[ "$is_simulator" == "false" ]]; then
|
||||
if command -v xcrun vtool &>/dev/null; then
|
||||
case "$platform" in
|
||||
"ios")
|
||||
echo "Marking binary as a framework binary for iOS..."
|
||||
xcrun vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
|
||||
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
|
||||
;;
|
||||
"visionos")
|
||||
echo "Marking binary as a framework binary for visionOS..."
|
||||
if [[ "$MAJOR_VERSION" -gt 16 ]] || [[ "$MAJOR_VERSION" -eq 16 && "$MINOR_VERSION" -gt 2 ]]; then
|
||||
echo "Xcode version greater than 16.2, using visionOS."
|
||||
VISION_OS_BUILD_VERSION="visionos"
|
||||
else
|
||||
echo "Xcode version less than or equal to 16.2, using xros."
|
||||
VISION_OS_BUILD_VERSION="xros"
|
||||
fi
|
||||
xcrun vtool -set-build-version ${VISION_OS_BUILD_VERSION} ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
|
||||
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
|
||||
;;
|
||||
"tvos")
|
||||
echo "Marking binary as a framework binary for tvOS..."
|
||||
xcrun vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
|
||||
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo "Warning: vtool not found. Binary may not pass App Store validation."
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Creating properly formatted dSYM..."
|
||||
# Create a separate directory for dSYMs for all platforms
|
||||
mkdir -p "${base_dir}/${build_dir}/dSYMs"
|
||||
|
||||
# iOS and visionOS style dSYM (flat structure)
|
||||
if [[ "$platform" == "ios" || "$platform" == "visionos" || "$platform" == "tvos" ]]; then
|
||||
# Generate dSYM in the dSYMs directory
|
||||
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
|
||||
|
||||
# Create a copy of the binary that will be stripped
|
||||
cp "${base_dir}/${output_lib}" "${temp_dir}/binary_to_strip"
|
||||
|
||||
# Strip debug symbols from the copy
|
||||
xcrun strip -S "${temp_dir}/binary_to_strip" -o "${temp_dir}/stripped_lib"
|
||||
|
||||
# Replace the original with the stripped version
|
||||
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
|
||||
else
|
||||
# macOS style dSYM
|
||||
# First strip debug info to a separate file
|
||||
xcrun strip -S "${base_dir}/${output_lib}" -o "${temp_dir}/stripped_lib"
|
||||
|
||||
# Generate dSYM in the dSYMs directory
|
||||
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/whisper.dSYM"
|
||||
|
||||
# Replace original binary with stripped version
|
||||
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
|
||||
fi
|
||||
|
||||
# Remove any automatically generated dSYM files in the framework structure as they will
|
||||
# otherwise case Invalid Bundle Structure validation errors.
|
||||
if [ -d "${base_dir}/${output_lib}.dSYM" ]; then
|
||||
echo "Removing generated dSYM file in framework structure: ${base_dir}/${output_lib}.dSYM"
|
||||
rm -rf "${base_dir}/${output_lib}.dSYM"
|
||||
fi
|
||||
|
||||
# Clean up
|
||||
rm -rf "${temp_dir}"
|
||||
}
|
||||
|
||||
echo "Building for iOS simulator..."
|
||||
cmake -B build-ios-sim -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
|
||||
-DIOS=ON \
|
||||
-DCMAKE_SYSTEM_NAME=iOS \
|
||||
-DCMAKE_OSX_SYSROOT=iphonesimulator \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-DWHISPER_COREML="ON" \
|
||||
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
|
||||
-S .
|
||||
cmake --build build-ios-sim --config Release -- -quiet
|
||||
|
||||
echo "Building for iOS devices..."
|
||||
cmake -B build-ios-device -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_SYSROOT=iphoneos \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-DWHISPER_COREML="ON" \
|
||||
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
|
||||
-S .
|
||||
cmake --build build-ios-device --config Release -- -quiet
|
||||
|
||||
echo "Building for macOS..."
|
||||
cmake -B build-macos -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-DWHISPER_COREML="ON" \
|
||||
-DWHISPER_COREML_ALLOW_FALLBACK="ON" \
|
||||
-S .
|
||||
cmake --build build-macos --config Release -- -quiet
|
||||
|
||||
echo "Building for visionOS..."
|
||||
cmake -B build-visionos -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DCMAKE_SYSTEM_NAME=visionOS \
|
||||
-DCMAKE_OSX_SYSROOT=xros \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
|
||||
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-visionos --config Release -- -quiet
|
||||
|
||||
echo "Building for visionOS simulator..."
|
||||
cmake -B build-visionos-sim -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DCMAKE_SYSTEM_NAME=visionOS \
|
||||
-DCMAKE_OSX_SYSROOT=xrsimulator \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
|
||||
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-visionos-sim --config Release -- -quiet
|
||||
|
||||
# Add tvOS builds (might need the same u_int definitions as watchOS and visionOS)
|
||||
echo "Building for tvOS simulator..."
|
||||
cmake -B build-tvos-sim -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_SYSTEM_NAME=tvOS \
|
||||
-DCMAKE_OSX_SYSROOT=appletvsimulator \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
|
||||
-DGGML_METAL=ON \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-tvos-sim --config Release -- -quiet
|
||||
|
||||
echo "Building for tvOS devices..."
|
||||
cmake -B build-tvos-device -G Xcode \
|
||||
"${COMMON_CMAKE_ARGS[@]}" \
|
||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
|
||||
-DCMAKE_SYSTEM_NAME=tvOS \
|
||||
-DCMAKE_OSX_SYSROOT=appletvos \
|
||||
-DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DGGML_METAL=ON \
|
||||
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \
|
||||
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
|
||||
-S .
|
||||
cmake --build build-tvos-device --config Release -- -quiet
|
||||
|
||||
# Setup frameworks and copy binaries and headers
|
||||
echo "Setting up framework structures..."
|
||||
setup_framework_structure "build-ios-sim" ${IOS_MIN_OS_VERSION} "ios"
|
||||
setup_framework_structure "build-ios-device" ${IOS_MIN_OS_VERSION} "ios"
|
||||
setup_framework_structure "build-macos" ${MACOS_MIN_OS_VERSION} "macos"
|
||||
setup_framework_structure "build-visionos" ${VISIONOS_MIN_OS_VERSION} "visionos"
|
||||
setup_framework_structure "build-visionos-sim" ${VISIONOS_MIN_OS_VERSION} "visionos"
|
||||
setup_framework_structure "build-tvos-sim" ${TVOS_MIN_OS_VERSION} "tvos"
|
||||
setup_framework_structure "build-tvos-device" ${TVOS_MIN_OS_VERSION} "tvos"
|
||||
|
||||
# Create dynamic libraries from static libraries
|
||||
echo "Creating dynamic libraries from static libraries..."
|
||||
combine_static_libraries "build-ios-sim" "Release-iphonesimulator" "ios" "true"
|
||||
combine_static_libraries "build-ios-device" "Release-iphoneos" "ios" "false"
|
||||
combine_static_libraries "build-macos" "Release" "macos" "false"
|
||||
combine_static_libraries "build-visionos" "Release-xros" "visionos" "false"
|
||||
combine_static_libraries "build-visionos-sim" "Release-xrsimulator" "visionos" "true"
|
||||
combine_static_libraries "build-tvos-sim" "Release-appletvsimulator" "tvos" "true"
|
||||
combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"
|
||||
|
||||
# Create XCFramework with correct debug symbols paths
|
||||
echo "Creating XCFramework..."
|
||||
xcodebuild -create-xcframework \
|
||||
-framework $(pwd)/build-ios-sim/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-ios-sim/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-ios-device/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-ios-device/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-macos/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-macos/dSYMS/whisper.dSYM \
|
||||
-framework $(pwd)/build-visionos/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-visionos/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-visionos-sim/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-visionos-sim/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-tvos-device/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-tvos-device/dSYMs/whisper.dSYM \
|
||||
-framework $(pwd)/build-tvos-sim/framework/whisper.framework \
|
||||
-debug-symbols $(pwd)/build-tvos-sim/dSYMs/whisper.dSYM \
|
||||
-output $(pwd)/build-apple/whisper.xcframework
|
41
ci/README.md
41
ci/README.md
@ -1,41 +0,0 @@
|
||||
# CI
|
||||
|
||||
In addition to [Github Actions](https://github.com/ggerganov/whisper.cpp/actions) `whisper.cpp` uses a custom CI framework:
|
||||
|
||||
https://github.com/ggml-org/ci
|
||||
|
||||
It monitors the `master` branch for new commits and runs the
|
||||
[ci/run.sh](https://github.com/ggerganov/whisper.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
|
||||
to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
|
||||
to cover various hardware architectures, including GPU and Apple Silicon instances.
|
||||
|
||||
Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
|
||||
Only the branches of this repo are monitored for this keyword.
|
||||
|
||||
It is a good practice, before publishing changes to execute the full CI locally on your machine:
|
||||
|
||||
```bash
|
||||
mkdir tmp
|
||||
|
||||
# CPU-only build
|
||||
bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
|
||||
# with CUDA support
|
||||
GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
The CI script supports several environment variables to control the build:
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `GG_BUILD_CUDA` | Enable NVIDIA CUDA GPU acceleration |
|
||||
| `GG_BUILD_SYCL` | Enable Intel SYCL acceleration |
|
||||
| `GG_BUILD_VULKAN` | Enable Vulkan GPU acceleration |
|
||||
| `GG_BUILD_METAL` | Enable Metal acceleration on Apple Silicon |
|
||||
| `GG_BUILD_BLAS` | Enable BLAS CPU acceleration |
|
||||
| `GG_BUILD_OPENVINO` | Enable OpenVINO support |
|
||||
| `GG_BUILD_COREML` | Enable Core ML support for Apple Neural Engine |
|
||||
| `GG_BUILD_LOW_PERF` | Limit tests for low-performance hardware |
|
||||
| `GG_BUILD_TEST_MODELS` | Comma-separated list of models to test (e.g. "tiny.en,tiny,base,medium", defaults to all models unless `GG_BUILD_LOW_PERF` is set) |
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user