mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-18 12:26:22 +00:00
whisper : reorganize source code + improve CMake (#2256)
* scripts : update sync [no ci] * files : reorganize [no ci] * sync : llama.cpp * cmake : link math library * cmake : build normal ggml library * files : move headers to include * objc : fix path to ggml-metal.h * ci : fix WHISPER_CUDA -> GGML_CUDA * scripts : sync LICENSE [no ci]
This commit is contained in:
parent
bf4cb4abad
commit
e30c679928
@ -21,7 +21,7 @@ COPY . .
|
|||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
# Enable cuBLAS
|
# Enable cuBLAS
|
||||||
ENV WHISPER_CUBLAS=1
|
ENV GGML_CUDA=1
|
||||||
|
|
||||||
RUN make
|
RUN make
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ ARG CUDA_DOCKER_ARCH=all
|
|||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
# Enable cuBLAS
|
# Enable cuBLAS
|
||||||
ENV WHISPER_CUBLAS=1
|
ENV GGML_CUDA=1
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y build-essential \
|
apt-get install -y build-essential \
|
||||||
|
39
.github/workflows/build.yml
vendored
39
.github/workflows/build.yml
vendored
@ -101,7 +101,10 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
build: [Debug, Release]
|
build: [Debug, Release]
|
||||||
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
#arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||||
|
# TODO: arm/v7 disabled due to clang bug
|
||||||
|
# https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
|
||||||
|
arch: [linux/amd64, linux/arm64, linux/ppc64le]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
@ -197,7 +200,7 @@ jobs:
|
|||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||||
cmake --build . --config Release -j $(nproc)
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
ubuntu-22-cmake-sycl-fp16:
|
ubuntu-22-cmake-sycl-fp16:
|
||||||
@ -247,7 +250,7 @@ jobs:
|
|||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DWHISPER_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||||
cmake --build . --config Release -j $(nproc)
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
windows-msys2:
|
windows-msys2:
|
||||||
@ -289,7 +292,7 @@ jobs:
|
|||||||
- name: Build using make w/ OpenBLAS
|
- name: Build using make w/ OpenBLAS
|
||||||
shell: msys2 {0}
|
shell: msys2 {0}
|
||||||
run: |
|
run: |
|
||||||
make WHISPER_OPENBLAS=1 -j $(nproc)
|
make GGML_OPENBLAS=1 -j $(nproc)
|
||||||
|
|
||||||
- name: Build using CMake
|
- name: Build using CMake
|
||||||
shell: msys2 {0}
|
shell: msys2 {0}
|
||||||
@ -305,7 +308,7 @@ jobs:
|
|||||||
- name: Build using CMake w/ OpenBLAS
|
- name: Build using CMake w/ OpenBLAS
|
||||||
shell: msys2 {0}
|
shell: msys2 {0}
|
||||||
run: |
|
run: |
|
||||||
cmake -B build -DWHISPER_OPENBLAS=ON
|
cmake -B build -DGGML_OPENBLAS=ON
|
||||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||||
|
|
||||||
windows:
|
windows:
|
||||||
@ -381,12 +384,9 @@ jobs:
|
|||||||
- arch: Win32
|
- arch: Win32
|
||||||
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
|
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
|
||||||
s2arc: x86
|
s2arc: x86
|
||||||
clblast: OFF
|
|
||||||
- arch: x64
|
- arch: x64
|
||||||
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
|
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
|
||||||
s2arc: x64
|
s2arc: x64
|
||||||
clblast: ON
|
|
||||||
clver: 1.6.1
|
|
||||||
- sdl2: ON
|
- sdl2: ON
|
||||||
s2ver: 2.28.5
|
s2ver: 2.28.5
|
||||||
|
|
||||||
@ -413,26 +413,13 @@ jobs:
|
|||||||
7z x sdl2.zip
|
7z x sdl2.zip
|
||||||
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
|
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
|
||||||
|
|
||||||
- name: Install OpenCL
|
|
||||||
if: matrix.clblast == 'ON'
|
|
||||||
run: vcpkg.exe --triplet=${{ matrix.arch }}-windows install opencl
|
|
||||||
|
|
||||||
- name: Fetch CLBlast and set CLBlast_DIR
|
|
||||||
if: matrix.clblast == 'ON'
|
|
||||||
run: |
|
|
||||||
C:/msys64/usr/bin/wget.exe -qO clblast.zip https://github.com/CNugteren/CLBlast/releases/download/${{ matrix.clver }}/CLBlast-${{ matrix.clver }}-windows-x64.zip
|
|
||||||
7z x clblast.zip
|
|
||||||
7z x CLBlast-${{ matrix.clver }}-windows-x64.7z
|
|
||||||
echo "CLBlast_DIR=$env:GITHUB_WORKSPACE/CLBlast-${{ matrix.clver }}-windows-x64/lib/cmake/CLBlast" >> $env:GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Configure
|
- name: Configure
|
||||||
run: >
|
run: >
|
||||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||||
-DWHISPER_OPENBLAS=${{ matrix.blas }}
|
-DGGML_OPENBLAS=${{ matrix.blas }}
|
||||||
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
|
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
|
||||||
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||||
-DWHISPER_CLBLAST=${{ matrix.clblast }}
|
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
@ -447,15 +434,11 @@ jobs:
|
|||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
- name: Copy clblast.dll
|
|
||||||
if: matrix.clblast == 'ON'
|
|
||||||
run: copy "$env:CLBlast_DIR/../../clblast.dll" build/bin/${{ matrix.build }}
|
|
||||||
|
|
||||||
- name: Upload binaries
|
- name: Upload binaries
|
||||||
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: whisper-blas${{ matrix.clblast == 'ON' && '-clblast' || ''}}-bin-${{ matrix.arch }}
|
name: whisper-blas-bin-${{ matrix.arch }}
|
||||||
path: build/bin/${{ matrix.build }}
|
path: build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
windows-cublas:
|
windows-cublas:
|
||||||
@ -498,7 +481,7 @@ jobs:
|
|||||||
run: >
|
run: >
|
||||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||||
-DWHISPER_CUDA=${{ matrix.cublas }}
|
-DGGML_CUDA=${{ matrix.cublas }}
|
||||||
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||||
|
|
||||||
- name: Build ${{ matrix.cuda-toolkit }}
|
- name: Build ${{ matrix.cuda-toolkit }}
|
||||||
|
12
.gitignore
vendored
12
.gitignore
vendored
@ -10,17 +10,7 @@
|
|||||||
/CMakeSettings.json
|
/CMakeSettings.json
|
||||||
|
|
||||||
build/
|
build/
|
||||||
build-blas/
|
build-*/
|
||||||
build-coreml/
|
|
||||||
build-em/
|
|
||||||
build-debug/
|
|
||||||
build-release/
|
|
||||||
build-rwdi/
|
|
||||||
build-static/
|
|
||||||
build-cublas/
|
|
||||||
build-no-accel/
|
|
||||||
build-sanitize-addr/
|
|
||||||
build-sanitize-thread/
|
|
||||||
|
|
||||||
# SPM
|
# SPM
|
||||||
.build/
|
.build/
|
||||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
|||||||
[submodule "bindings/ios"]
|
|
||||||
path = bindings/ios
|
|
||||||
url = https://github.com/ggerganov/whisper.spm
|
|
893
CMakeLists.txt
893
CMakeLists.txt
@ -1,25 +1,31 @@
|
|||||||
cmake_minimum_required (VERSION 3.5)
|
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
||||||
|
project("whisper.cpp" C CXX)
|
||||||
|
project("whisper.cpp" VERSION 1.6.2)
|
||||||
|
include(CheckIncludeFileCXX)
|
||||||
|
|
||||||
# Allow for the creation of solution folders.
|
|
||||||
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
|
||||||
|
|
||||||
project(whisper.cpp VERSION 1.6.2)
|
|
||||||
set(SOVERSION 1)
|
set(SOVERSION 1)
|
||||||
|
|
||||||
|
#set(CMAKE_WARN_DEPRECATED YES)
|
||||||
|
set(CMAKE_WARN_UNUSED_CLI YES)
|
||||||
|
|
||||||
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
|
||||||
|
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||||
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||||
|
endif()
|
||||||
|
|
||||||
# Add path to modules
|
# Add path to modules
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||||
|
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|
||||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
set(WHISPER_STANDALONE ON)
|
set(WHISPER_STANDALONE ON)
|
||||||
include(GitVars)
|
|
||||||
include(BuildTypes)
|
include(git-vars)
|
||||||
|
|
||||||
# configure project version
|
# configure project version
|
||||||
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
|
|
||||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
|
|
||||||
endif()
|
|
||||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
||||||
else()
|
else()
|
||||||
set(WHISPER_STANDALONE OFF)
|
set(WHISPER_STANDALONE OFF)
|
||||||
@ -29,6 +35,11 @@ if (EMSCRIPTEN)
|
|||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
|
|
||||||
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
||||||
|
|
||||||
|
# TODO: without these, we get the following error:
|
||||||
|
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||||
else()
|
else()
|
||||||
if (MINGW)
|
if (MINGW)
|
||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
@ -37,793 +48,145 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# options
|
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||||
|
|
||||||
if (APPLE)
|
#
|
||||||
set(WHISPER_METAL_DEFAULT ON)
|
# option list
|
||||||
else()
|
#
|
||||||
set(WHISPER_METAL_DEFAULT OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
# general
|
||||||
|
option(WHISPER_CCACHE "whisper: use ccache if available" ON)
|
||||||
|
|
||||||
|
# debug
|
||||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
|
|
||||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
# build
|
||||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
||||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
|
||||||
|
|
||||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
|
||||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
|
||||||
|
|
||||||
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
|
||||||
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
||||||
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
|
||||||
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
|
||||||
option(WHISPER_NO_AVX512 "whisper: disable AVX512" ON)
|
|
||||||
option(WHISPER_NO_AVX512_VBMI "whisper: disable AVX512-VBMI" ON)
|
|
||||||
option(WHISPER_NO_AVX512_VNNI "whisper: disable AVX512-VNNI" ON)
|
|
||||||
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
|
||||||
option(WHISPER_NO_F16C "whisper: disable F16c" OFF)
|
|
||||||
|
|
||||||
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
|
||||||
|
|
||||||
if (APPLE)
|
|
||||||
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
|
||||||
option(WHISPER_METAL "whisper: use Metal" ${WHISPER_METAL_DEFAULT})
|
|
||||||
option(WHISPER_METAL_NDEBUG "whisper: disable Metal debugging" OFF)
|
|
||||||
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
|
||||||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
|
||||||
option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
|
|
||||||
option(WHISPER_BLAS "whisper: use BLAS" ON)
|
|
||||||
set (WHISPER_BLAS_VENDOR "Apple" CACHE STRING
|
|
||||||
"whisper: BLAS library vendor")
|
|
||||||
else()
|
|
||||||
option(WHISPER_CUDA "whisper: support for CUDA" OFF)
|
|
||||||
option(WHISPER_CUDA_FA_ALL_QUANTS "whisper: compile all quants for FlashAttention" OFF)
|
|
||||||
option(WHISPER_CUBLAS "whisper: support for CUDA (deprecated)" OFF)
|
|
||||||
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
|
|
||||||
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
|
|
||||||
option(WHISPER_MKL "whisper: use Intel Math Kernel Library (MKL)" OFF)
|
|
||||||
option(WHISPER_SYCL "whisper: use SYCL" OFF)
|
|
||||||
option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF)
|
|
||||||
option(WHISPER_BLAS "whisper: use BLAS" OFF)
|
|
||||||
set (WHISPER_BLAS_VENDOR "Generic" CACHE STRING
|
|
||||||
"whisper: BLAS library vendor")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
|
||||||
|
|
||||||
# sanitizers
|
# sanitizers
|
||||||
|
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||||
|
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||||
|
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
if (NOT MSVC)
|
# extra artifacts
|
||||||
if (WHISPER_SANITIZE_THREAD)
|
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
|
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
|
option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_SANITIZE_ADDRESS)
|
# 3rd party libs
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_SANITIZE_UNDEFINED)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
|
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
|
||||||
|
|
||||||
# dependencies
|
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
|
||||||
|
|
||||||
#compile flag sycl
|
|
||||||
if (WHISPER_SYCL)
|
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
|
||||||
else()
|
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_FFMPEG)
|
|
||||||
# As of cmake 3.27, there is no official cmake support for FindFFmpeg.
|
|
||||||
# Consequnelty we added a FindFFmpeg.cmake script the cmake subfolder:
|
|
||||||
# whisper.cpp does not need the full ffmpeg libs, just AVFORMAT AVCODEC AVUTIL SWRESAMPLE
|
|
||||||
# libswresample performs highly optimized audio resampling, rematrixing and sample format conversion operations
|
|
||||||
# libavcodec provides a generic encoding/decoding framework and contains multiple decoders and encoders for audio, video and subtitle streams, and several bitstream filters.
|
|
||||||
# libavformat provides a generic framework for multiplexing and demultiplexing (muxing and demuxing) audio, video and subtitle streams.
|
|
||||||
find_package(FFmpeg REQUIRED)
|
|
||||||
if (NOT ${FFMPEG_FOUND})
|
|
||||||
message(FATAL_ERROR "Cannot find ffmpeg libs/headers")
|
|
||||||
endif()
|
|
||||||
message(STATUS "Found ffmpeg libs: ${FFMPEG_LIBRARIES}")
|
|
||||||
message(STATUS "Found ffmpeg headers in: ${FFMPEG_INCLUDE_DIRS}")
|
|
||||||
message(STATUS "ffmpeg definitions: ${FFMPEG_DEFINITIONS}")
|
|
||||||
message(STATUS "Found avformat ${AVFORMAT_VERSION}")
|
|
||||||
include_directories(${FFMPEG_INCLUDE_DIRS})
|
|
||||||
add_compile_definitions(WHISPER_FFMPEG)
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${FFMPEG_LIBRARIES})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# on APPLE
|
|
||||||
if (APPLE)
|
|
||||||
# include Accelerate framework
|
|
||||||
if (NOT WHISPER_NO_ACCELERATE)
|
|
||||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
|
||||||
|
|
||||||
if (ACCELERATE_FRAMEWORK)
|
|
||||||
message(STATUS "Accelerate framework found")
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64)
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "Accelerate framework not found")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_METAL)
|
|
||||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
|
||||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
|
||||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
|
||||||
|
|
||||||
if (METAL_FRAMEWORK)
|
|
||||||
message(STATUS "Metal framework found")
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS}
|
|
||||||
${FOUNDATION_LIBRARY}
|
|
||||||
${METAL_FRAMEWORK}
|
|
||||||
${METALKIT_FRAMEWORK}
|
|
||||||
)
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_METAL)
|
|
||||||
|
|
||||||
if (WHISPER_METAL_NDEBUG)
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_METAL_NDEBUG)
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "Metal framework not found")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h)
|
|
||||||
|
|
||||||
# copy ggml-common.h and ggml-metal.metal to bin directory
|
|
||||||
configure_file(ggml-common.h bin/ggml-common.h COPYONLY)
|
|
||||||
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)
|
|
||||||
|
|
||||||
if (WHISPER_METAL_EMBED_LIBRARY)
|
|
||||||
enable_language(ASM)
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_METAL_EMBED_LIBRARY)
|
|
||||||
|
|
||||||
set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
|
|
||||||
set(COMMON_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
|
|
||||||
|
|
||||||
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
|
|
||||||
set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s")
|
|
||||||
set(EMBED_METALLIB_SOURCE "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-combined.metal")
|
|
||||||
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${EMBED_METALLIB_SOURCE}
|
|
||||||
COMMAND sed -e "/^#include \\\"ggml-common.h\\\"/r ${COMMON_HEADER}" -e "/^#include \\\"ggml-common.h\\\"/d" ${METALLIB_SOURCE} > ${EMBED_METALLIB_SOURCE}
|
|
||||||
DEPENDS ${METALLIB_SOURCE} ${COMMON_HEADER}
|
|
||||||
COMMENT "Generating combined Metal library for embedding"
|
|
||||||
)
|
|
||||||
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${EMBED_METALLIB_ASSEMBLY}
|
|
||||||
COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY}
|
|
||||||
COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY}
|
|
||||||
COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY}
|
|
||||||
COMMAND echo ".incbin \\\"${EMBED_METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY}
|
|
||||||
COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY}
|
|
||||||
COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY}
|
|
||||||
DEPENDS ${EMBED_METALLIB_SOURCE}
|
|
||||||
COMMENT "Generate assembly for embedded Metal library"
|
|
||||||
)
|
|
||||||
|
|
||||||
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY})
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_COREML)
|
|
||||||
find_library(FOUNDATION_FRAMEWORK Foundation)
|
|
||||||
find_library(COREML_FRAMEWORK CoreML)
|
|
||||||
|
|
||||||
if (COREML_FRAMEWORK)
|
|
||||||
message(STATUS "CoreML framework found")
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML)
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "CoreML framework not found")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_COREML_ALLOW_FALLBACK)
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_COREML_ALLOW_FALLBACK)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_BLAS)
|
|
||||||
if (WHISPER_STATIC)
|
|
||||||
set(BLA_STATIC ON)
|
|
||||||
endif()
|
|
||||||
#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
|
|
||||||
# set(BLA_SIZEOF_INTEGER 8)
|
|
||||||
#endif()
|
|
||||||
|
|
||||||
set(BLA_VENDOR ${WHISPER_BLAS_VENDOR})
|
|
||||||
find_package(BLAS)
|
|
||||||
|
|
||||||
if (BLAS_FOUND)
|
|
||||||
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
|
||||||
|
|
||||||
if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${WHISPER_BLAS_VENDOR} MATCHES "Apple"))
|
|
||||||
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
|
|
||||||
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
|
|
||||||
find_package(PkgConfig REQUIRED)
|
|
||||||
if (${WHISPER_BLAS_VENDOR} MATCHES "Generic")
|
|
||||||
pkg_check_modules(DepBLAS REQUIRED blas)
|
|
||||||
elseif (${WHISPER_BLAS_VENDOR} MATCHES "OpenBLAS")
|
|
||||||
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
|
|
||||||
pkg_check_modules(DepBLAS openblas64)
|
|
||||||
if (NOT DepBLAS_FOUND)
|
|
||||||
pkg_check_modules(DepBLAS REQUIRED openblas)
|
|
||||||
endif()
|
|
||||||
elseif (${WHISPER_BLAS_VENDOR} MATCHES "FLAME")
|
|
||||||
pkg_check_modules(DepBLAS REQUIRED blis)
|
|
||||||
elseif (${WHISPER_BLAS_VENDOR} MATCHES "ATLAS")
|
|
||||||
pkg_check_modules(DepBLAS REQUIRED blas-atlas)
|
|
||||||
elseif (${WHISPER_BLAS_VENDOR} MATCHES "FlexiBLAS")
|
|
||||||
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
|
|
||||||
elseif (${WHISPER_BLAS_VENDOR} MATCHES "Intel")
|
|
||||||
# all Intel* libraries share the same include path
|
|
||||||
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
|
|
||||||
elseif (${WHISPER_BLAS_VENDOR} MATCHES "NVHPC")
|
|
||||||
# this doesn't provide pkg-config
|
|
||||||
# suggest to assign BLAS_INCLUDE_DIRS on your own
|
|
||||||
if ("${NVHPC_VERSION}" STREQUAL "")
|
|
||||||
message(WARNING "Better to set NVHPC_VERSION")
|
|
||||||
else()
|
|
||||||
set(DepBLAS_FOUND ON)
|
|
||||||
set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
if (DepBLAS_FOUND)
|
|
||||||
set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
|
|
||||||
else()
|
|
||||||
message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
|
|
||||||
" detected by pkgconfig, trying to find cblas.h from possible paths...")
|
|
||||||
find_path(BLAS_INCLUDE_DIRS
|
|
||||||
NAMES cblas.h
|
|
||||||
HINTS
|
|
||||||
/usr/include
|
|
||||||
/usr/local/include
|
|
||||||
/usr/include/openblas
|
|
||||||
/opt/homebrew/opt/openblas/include
|
|
||||||
/usr/local/opt/openblas/include
|
|
||||||
/usr/include/x86_64-linux-gnu/openblas/include
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
|
||||||
|
|
||||||
add_compile_options(${BLAS_LINKER_FLAGS})
|
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_BLAS)
|
|
||||||
|
|
||||||
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${WHISPER_BLAS_VENDOR} MATCHES "Generic" OR ${WHISPER_BLAS_VENDOR} MATCHES "Intel"))
|
|
||||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(GGML_HEADERS_BLAS ggml-blas.h)
|
|
||||||
set(GGML_SOURCES_BLAS ggml-blas.cpp)
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
|
|
||||||
set(WHISPER_EXTRA_INCLUDES ${WHISPER_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
|
|
||||||
else()
|
|
||||||
message(WARNING "BLAS not found, please refer to "
|
|
||||||
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
|
||||||
" to set correct WHISPER_BLAS_VENDOR")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_MKL)
|
|
||||||
find_package(MKL CONFIG REQUIRED PATHS $ENV{MKLROOT})
|
|
||||||
message(STATUS "Imported oneMKL targets: ${MKL_IMPORTED_TARGETS}")
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_BLAS_USE_MKL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_CUBLAS)
|
|
||||||
message(WARNING "WHISPER_CUBLAS is deprecated and will be removed in the future.\nUse WHISPER_CUDA instead")
|
|
||||||
set(WHISPER_CUDA ON)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_CUDA)
|
|
||||||
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
|
|
||||||
|
|
||||||
find_package(CUDAToolkit)
|
|
||||||
|
|
||||||
if (CUDAToolkit_FOUND)
|
|
||||||
message(STATUS "cuBLAS found")
|
|
||||||
|
|
||||||
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
||||||
# 52 == lowest CUDA 12 standard
|
|
||||||
# 60 == f16 CUDA intrinsics
|
|
||||||
# 61 == integer CUDA intrinsics
|
|
||||||
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
|
||||||
if (WHISPER_CUDA_F16 OR WHISPER_CUDA_DMMV_F16)
|
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
|
||||||
else()
|
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
|
|
||||||
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
|
||||||
|
|
||||||
enable_language(CUDA)
|
|
||||||
|
|
||||||
file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ggml-cuda.h)
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ggml-cuda.cu)
|
|
||||||
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
|
|
||||||
if (WHISPER_CUDA_FA_ALL_QUANTS)
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
|
|
||||||
else()
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_CUDA)
|
|
||||||
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
|
|
||||||
|
|
||||||
if (WHISPER_STATIC)
|
|
||||||
if (WIN32)
|
|
||||||
# As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt CUDA::cufft)
|
|
||||||
else ()
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static CUDA::cufft_static)
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt CUDA::cufft)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver)
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "cuBLAS not found")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
if (WHISPER_HIPBLAS)
|
|
||||||
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
|
|
||||||
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
|
|
||||||
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
|
|
||||||
endif()
|
|
||||||
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
|
|
||||||
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
find_package(hip)
|
|
||||||
find_package(hipblas)
|
|
||||||
find_package(rocblas)
|
|
||||||
|
|
||||||
if (${hipblas_FOUND} AND ${hip_FOUND})
|
|
||||||
message(STATUS "HIP and hipBLAS found")
|
|
||||||
set(GGML_HEADERS_ROCM "ggml-cuda.h")
|
|
||||||
|
|
||||||
file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
|
|
||||||
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
|
|
||||||
if (WHISPER_CUDA_FA_ALL_QUANTS)
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
|
|
||||||
else()
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA)
|
|
||||||
|
|
||||||
set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
|
|
||||||
if (WHISPER_STATIC)
|
|
||||||
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
|
||||||
endif()
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if( WHISPER_OPENVINO )
|
|
||||||
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_SYCL)
|
|
||||||
if ( NOT DEFINED ENV{ONEAPI_ROOT})
|
|
||||||
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
|
|
||||||
endif()
|
|
||||||
#todo: AOT
|
|
||||||
|
|
||||||
find_package(IntelSYCL REQUIRED)
|
|
||||||
if (WHISPER_SYCL_F16)
|
|
||||||
add_compile_definitions(GGML_SYCL_F16)
|
|
||||||
endif()
|
|
||||||
add_compile_definitions(GGML_USE_SYCL)
|
|
||||||
|
|
||||||
add_compile_options(-I./) #include DPCT
|
|
||||||
add_compile_options(-I/${SYCL_INCLUDE_DIR})
|
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
|
||||||
|
|
||||||
set(GGML_HEADERS_SYCL ggml-sycl.h)
|
|
||||||
file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
|
|
||||||
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
|
||||||
endif()
|
|
||||||
# compiler flags
|
|
||||||
|
|
||||||
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
|
||||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (WHISPER_ALL_WARNINGS)
|
|
||||||
if (NOT MSVC)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
|
|
||||||
-Wall \
|
|
||||||
-Wextra \
|
|
||||||
-Wpedantic \
|
|
||||||
-Wshadow \
|
|
||||||
-Wcast-qual \
|
|
||||||
-Wstrict-prototypes \
|
|
||||||
-Wpointer-arith \
|
|
||||||
-Wno-unused-function \
|
|
||||||
")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
|
|
||||||
-Wall \
|
|
||||||
-Wextra \
|
|
||||||
-Wpedantic \
|
|
||||||
-Wcast-qual \
|
|
||||||
")
|
|
||||||
else()
|
|
||||||
# todo : msvc
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
# TODO: temporary disabled until we figure out ggml-metal.m
|
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
|
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
|
||||||
message(STATUS "ARM detected")
|
|
||||||
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
|
|
||||||
message(STATUS "PowerPC detected")
|
|
||||||
else()
|
|
||||||
message(STATUS "x86 detected")
|
|
||||||
if (MSVC)
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /utf-8")
|
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /utf-8")
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8")
|
|
||||||
if(NOT WHISPER_NO_AVX512)
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX512")
|
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX512")
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX512")
|
|
||||||
# MSVC has no compile-time flags enabling specific
|
|
||||||
# AVX512 extensions, neither it defines the
|
|
||||||
# macros corresponding to the extensions.
|
|
||||||
# Do it manually.
|
|
||||||
if (NOT WHISPER_NO_AVX512_VBMI)
|
|
||||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
|
|
||||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
|
|
||||||
endif()
|
|
||||||
if (NOT WHISPER_NO_AVX512_VNNI)
|
|
||||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
|
|
||||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
|
|
||||||
endif()
|
|
||||||
elseif(NOT WHISPER_NO_AVX2)
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
|
|
||||||
elseif(NOT WHISPER_NO_AVX)
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
|
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX")
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
if (EMSCRIPTEN)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
|
||||||
else()
|
|
||||||
if(NOT WHISPER_NO_AVX)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_AVX2)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_AVX512)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw")
|
|
||||||
if(NOT WHISPER_NO_AVX512_VBMI)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vbmi")
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_AVX512_VNNI)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vnni")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_FMA)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_F16C)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
|
||||||
# POSIX conformance
|
|
||||||
#
|
|
||||||
|
|
||||||
# clock_gettime came in POSIX.1b (1993)
|
|
||||||
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
|
|
||||||
# posix_memalign came in POSIX.1-2001 / SUSv3
|
|
||||||
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
|
|
||||||
add_compile_definitions(_XOPEN_SOURCE=600)
|
|
||||||
|
|
||||||
# Somehow in OpenBSD whenever POSIX conformance is specified
|
|
||||||
# some string functions rely on locale_t availability,
|
|
||||||
# which was introduced in POSIX.1-2008, forcing us to go higher
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
|
|
||||||
remove_definitions(-D_XOPEN_SOURCE=600)
|
|
||||||
add_compile_definitions(_XOPEN_SOURCE=700)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Data types, macros and functions related to controlling CPU affinity
|
|
||||||
# are available on Linux through GNU extensions in libc
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||||
add_compile_definitions(_GNU_SOURCE)
|
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
|
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||||
# and on macOS its availability depends on enabling Darwin extensions
|
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||||
# similarly on DragonFly, enabling BSD extensions is necessary
|
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
|
||||||
add_compile_definitions(_DARWIN_C_SOURCE)
|
|
||||||
endif()
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "DragonFly")
|
|
||||||
add_compile_definitions(_DARWIN_C_SOURCE)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# alloca is a non-standard interface that is not visible on BSDs when
|
# Required for relocatable CMake package
|
||||||
# POSIX conformance is specified, but not all of them provide a clean way
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||||
# to enable it in such cases
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
|
||||||
add_compile_definitions(__BSD_VISIBLE)
|
|
||||||
endif()
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
|
|
||||||
add_compile_definitions(_NETBSD_SOURCE)
|
|
||||||
endif()
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
|
|
||||||
add_compile_definitions(_BSD_SOURCE)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_PERF)
|
# override ggml options
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
|
set(GGML_CCACHE ${WHISPER_CCACHE})
|
||||||
endif()
|
set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
|
||||||
|
set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
|
||||||
|
set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
|
||||||
|
set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
|
||||||
|
set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
|
||||||
|
|
||||||
#
|
# transition helpers
|
||||||
# whisper.coreml - Core ML support
|
function (whisper_option_depr TYPE OLD NEW)
|
||||||
#
|
if (${OLD})
|
||||||
|
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
||||||
if (WHISPER_COREML)
|
set(${NEW} ON)
|
||||||
set(TARGET whisper.coreml)
|
|
||||||
|
|
||||||
add_library(${TARGET}
|
|
||||||
coreml/whisper-encoder.h
|
|
||||||
coreml/whisper-encoder.mm
|
|
||||||
coreml/whisper-encoder-impl.h
|
|
||||||
coreml/whisper-encoder-impl.m
|
|
||||||
)
|
|
||||||
|
|
||||||
include(DefaultTargetOptions)
|
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC
|
|
||||||
.
|
|
||||||
)
|
|
||||||
|
|
||||||
target_link_libraries(${TARGET} PRIVATE ${FOUNDATION_FRAMEWORK} ${COREML_FRAMEWORK})
|
|
||||||
|
|
||||||
set_target_properties(${TARGET} PROPERTIES
|
|
||||||
COMPILE_FLAGS "-fobjc-arc"
|
|
||||||
)
|
|
||||||
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_OPENVINO)
|
|
||||||
set(TARGET whisper.openvino)
|
|
||||||
|
|
||||||
add_library(${TARGET} OBJECT
|
|
||||||
openvino/whisper-openvino-encoder.h
|
|
||||||
openvino/whisper-openvino-encoder.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC
|
|
||||||
.
|
|
||||||
)
|
|
||||||
|
|
||||||
set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
|
|
||||||
|
|
||||||
target_link_libraries(${TARGET} PRIVATE openvino::runtime)
|
|
||||||
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
|
||||||
# whisper - this is the main library of the project
|
|
||||||
#
|
|
||||||
|
|
||||||
set(TARGET whisper)
|
|
||||||
|
|
||||||
add_library(${TARGET}
|
|
||||||
ggml.h
|
|
||||||
ggml.c
|
|
||||||
ggml-alloc.h
|
|
||||||
ggml-alloc.c
|
|
||||||
ggml-backend.h
|
|
||||||
ggml-backend.c
|
|
||||||
ggml-quants.h
|
|
||||||
ggml-quants.c
|
|
||||||
${GGML_SOURCES_METAL}
|
|
||||||
${GGML_SOURCES_CUDA}
|
|
||||||
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
|
|
||||||
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
|
||||||
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
|
|
||||||
whisper.h
|
|
||||||
whisper.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
if (WHISPER_CUDA)
|
|
||||||
target_sources(${TARGET} PRIVATE whisper-mel-cuda.cu)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
include_directories (
|
|
||||||
.
|
|
||||||
)
|
|
||||||
# Set the version numbers
|
|
||||||
set_target_properties(whisper PROPERTIES
|
|
||||||
VERSION ${PROJECT_VERSION}
|
|
||||||
SOVERSION ${SOVERSION}
|
|
||||||
)
|
|
||||||
|
|
||||||
include(DefaultTargetOptions)
|
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC
|
|
||||||
.
|
|
||||||
)
|
|
||||||
|
|
||||||
if (WHISPER_COREML)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE whisper.coreml)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_OPENVINO)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE whisper.openvino)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_MKL)
|
|
||||||
target_link_libraries(${TARGET} PUBLIC MKL::MKL)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (MSVC)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
|
||||||
else()
|
|
||||||
target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (BUILD_SHARED_LIBS)
|
|
||||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
||||||
target_link_libraries(${TARGET} PUBLIC
|
|
||||||
${CMAKE_DL_LIBS}
|
|
||||||
)
|
|
||||||
|
|
||||||
target_compile_definitions(${TARGET} PUBLIC
|
|
||||||
WHISPER_SHARED
|
|
||||||
GGML_SHARED
|
|
||||||
)
|
|
||||||
|
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
|
||||||
WHISPER_BUILD
|
|
||||||
GGML_BUILD
|
|
||||||
)
|
|
||||||
|
|
||||||
if (WHISPER_METAL)
|
|
||||||
# TODO: I think this should make ggml-metal.m "see" the ggml-metal.metal file from the "bin" directory
|
|
||||||
# but for some reason it does not work here like it does in llama.cpp
|
|
||||||
set_target_properties(${TARGET} PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endfunction()
|
||||||
|
|
||||||
if (GGML_SOURCES_CUDA)
|
whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
|
||||||
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
|
whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
|
||||||
# Only configure gmml CUDA architectures is not globally set
|
whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
|
||||||
if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
|
whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
|
||||||
# Not overriden by user, so set defaults
|
whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
||||||
set(GGML_CUDA_ARCHITECTURES 52 61 70)
|
whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
|
||||||
endif()
|
whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
|
||||||
message(STATUS "GGML Configuring CUDA architectures ${GGML_CUDA_ARCHITECTURES}")
|
whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
|
||||||
set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES ${GGML_CUDA_ARCHITECTURES})
|
whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
|
||||||
set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
||||||
endif()
|
|
||||||
|
|
||||||
if (EMSCRIPTEN)
|
#
|
||||||
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
|
# build the library
|
||||||
endif()
|
#
|
||||||
|
|
||||||
target_compile_definitions(${TARGET} PUBLIC
|
add_subdirectory(ggml)
|
||||||
${WHISPER_EXTRA_FLAGS}
|
add_subdirectory(src)
|
||||||
)
|
|
||||||
|
|
||||||
set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "ggml.h;whisper.h")
|
#
|
||||||
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
|
# install
|
||||||
|
#
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
install(TARGETS ${TARGET}
|
set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
|
||||||
LIBRARY DESTINATION lib
|
set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
|
||||||
ARCHIVE DESTINATION lib/static
|
set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
|
||||||
RUNTIME DESTINATION bin
|
|
||||||
RESOURCE DESTINATION bin
|
|
||||||
PUBLIC_HEADER DESTINATION include
|
|
||||||
)
|
|
||||||
|
|
||||||
#
|
set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
||||||
# bindings
|
set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||||
#
|
set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||||
|
|
||||||
add_subdirectory(bindings)
|
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||||
|
|
||||||
|
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
|
||||||
|
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
|
||||||
|
|
||||||
|
configure_package_config_file(
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||||
|
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
|
||||||
|
PATH_VARS
|
||||||
|
WHISPER_INCLUDE_INSTALL_DIR
|
||||||
|
WHISPER_LIB_INSTALL_DIR
|
||||||
|
WHISPER_BIN_INSTALL_DIR )
|
||||||
|
|
||||||
|
write_basic_package_version_file(
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||||
|
VERSION ${WHISPER_INSTALL_VERSION}
|
||||||
|
COMPATIBILITY SameMajorVersion)
|
||||||
|
|
||||||
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||||
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
||||||
|
|
||||||
|
install(
|
||||||
|
FILES convert-hf-to-gguf.py
|
||||||
|
PERMISSIONS
|
||||||
|
OWNER_READ
|
||||||
|
OWNER_WRITE
|
||||||
|
OWNER_EXECUTE
|
||||||
|
GROUP_READ
|
||||||
|
GROUP_EXECUTE
|
||||||
|
WORLD_READ
|
||||||
|
WORLD_EXECUTE
|
||||||
|
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||||
|
|
||||||
|
configure_file(cmake/whisper.pc.in
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||||
|
@ONLY)
|
||||||
|
|
||||||
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||||
|
DESTINATION lib/pkgconfig)
|
||||||
|
|
||||||
#
|
#
|
||||||
# programs, examples and tests
|
# programs, examples and tests
|
||||||
#
|
#
|
||||||
|
|
||||||
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||||
enable_testing()
|
#include(CTest)
|
||||||
add_subdirectory(tests)
|
#add_subdirectory(tests)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (WHISPER_BUILD_EXAMPLES)
|
if (WHISPER_BUILD_EXAMPLES)
|
||||||
|
@ -27,17 +27,15 @@ let package = Package(
|
|||||||
"samples",
|
"samples",
|
||||||
"tests",
|
"tests",
|
||||||
"CMakeLists.txt",
|
"CMakeLists.txt",
|
||||||
"ggml-cuda.cu",
|
|
||||||
"ggml-cuda.h",
|
|
||||||
"Makefile"
|
"Makefile"
|
||||||
],
|
],
|
||||||
sources: [
|
sources: [
|
||||||
"ggml.c",
|
"ggml/src/ggml.c",
|
||||||
"whisper.cpp",
|
"src/whisper.cpp",
|
||||||
"ggml-alloc.c",
|
"ggml/src/ggml-alloc.c",
|
||||||
"ggml-backend.c",
|
"ggml/src/ggml-backend.c",
|
||||||
"ggml-quants.c",
|
"ggml/src/ggml-quants.c",
|
||||||
"ggml-metal.m"
|
"ggml/src/ggml-metal.m"
|
||||||
],
|
],
|
||||||
resources: [.process("ggml-metal.metal")],
|
resources: [.process("ggml-metal.metal")],
|
||||||
publicHeadersPath: "spm-headers",
|
publicHeadersPath: "spm-headers",
|
||||||
|
@ -418,7 +418,7 @@ Now build `whisper.cpp` with CUDA support:
|
|||||||
|
|
||||||
```
|
```
|
||||||
make clean
|
make clean
|
||||||
WHISPER_CUDA=1 make -j
|
GGML_CUDA=1 make -j
|
||||||
```
|
```
|
||||||
|
|
||||||
## BLAS CPU support via OpenBLAS
|
## BLAS CPU support via OpenBLAS
|
||||||
@ -430,7 +430,7 @@ Now build `whisper.cpp` with OpenBLAS support:
|
|||||||
|
|
||||||
```
|
```
|
||||||
make clean
|
make clean
|
||||||
WHISPER_OPENBLAS=1 make -j
|
GGML_OPENBLAS=1 make -j
|
||||||
```
|
```
|
||||||
|
|
||||||
## BLAS CPU support via Intel MKL
|
## BLAS CPU support via Intel MKL
|
||||||
|
@ -1 +0,0 @@
|
|||||||
Subproject commit a2085436c2eb796af90956b62bd64731f5e5b823
|
|
@ -1,54 +0,0 @@
|
|||||||
# Add new build types
|
|
||||||
|
|
||||||
# ReleaseGG - Release with enabled asserts
|
|
||||||
|
|
||||||
SET(CMAKE_CXX_FLAGS_RELEASEGG
|
|
||||||
"-O3"
|
|
||||||
CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
SET(CMAKE_C_FLAGS_RELEASEGG
|
|
||||||
"-O3"
|
|
||||||
CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
|
|
||||||
""
|
|
||||||
CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
|
|
||||||
""
|
|
||||||
CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
MARK_AS_ADVANCED(
|
|
||||||
CMAKE_CXX_FLAGS_RELEASEGG
|
|
||||||
CMAKE_C_FLAGS_RELEASEGG
|
|
||||||
CMAKE_EXE_LINKER_FLAGS_RELEASEGG
|
|
||||||
CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
|
|
||||||
|
|
||||||
# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
|
|
||||||
|
|
||||||
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
|
|
||||||
"-O2 -g"
|
|
||||||
CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
|
|
||||||
"-O2 -g"
|
|
||||||
CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
|
|
||||||
""
|
|
||||||
CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
|
|
||||||
""
|
|
||||||
CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
|
|
||||||
FORCE )
|
|
||||||
MARK_AS_ADVANCED(
|
|
||||||
CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
|
|
||||||
CMAKE_C_FLAGS_RELWITHDEBINFOGG
|
|
||||||
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
|
|
||||||
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
|
|
||||||
|
|
||||||
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
|
||||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
|
|
||||||
endif()
|
|
58
cmake/build-info.cmake
Normal file
58
cmake/build-info.cmake
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
set(BUILD_NUMBER 0)
|
||||||
|
set(BUILD_COMMIT "unknown")
|
||||||
|
set(BUILD_COMPILER "unknown")
|
||||||
|
set(BUILD_TARGET "unknown")
|
||||||
|
|
||||||
|
# Look for git
|
||||||
|
find_package(Git)
|
||||||
|
if(NOT Git_FOUND)
|
||||||
|
find_program(GIT_EXECUTABLE NAMES git git.exe)
|
||||||
|
if(GIT_EXECUTABLE)
|
||||||
|
set(Git_FOUND TRUE)
|
||||||
|
message(STATUS "Found Git: ${GIT_EXECUTABLE}")
|
||||||
|
else()
|
||||||
|
message(WARNING "Git not found. Build info will not be accurate.")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Get the commit count and hash
|
||||||
|
if(Git_FOUND)
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
OUTPUT_VARIABLE HEAD
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
|
RESULT_VARIABLE RES
|
||||||
|
)
|
||||||
|
if (RES EQUAL 0)
|
||||||
|
set(BUILD_COMMIT ${HEAD})
|
||||||
|
endif()
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
OUTPUT_VARIABLE COUNT
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
|
RESULT_VARIABLE RES
|
||||||
|
)
|
||||||
|
if (RES EQUAL 0)
|
||||||
|
set(BUILD_NUMBER ${COUNT})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
|
||||||
|
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
||||||
|
else()
|
||||||
|
execute_process(
|
||||||
|
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||||
|
OUTPUT_VARIABLE OUT
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
|
)
|
||||||
|
set(BUILD_COMPILER ${OUT})
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
|
||||||
|
OUTPUT_VARIABLE OUT
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
|
)
|
||||||
|
set(BUILD_TARGET ${OUT})
|
||||||
|
endif()
|
65
cmake/whisper-config.cmake.in
Normal file
65
cmake/whisper-config.cmake.in
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
|
||||||
|
set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
||||||
|
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||||
|
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||||
|
|
||||||
|
set(GGML_BLAS @GGML_BLAS@)
|
||||||
|
set(GGML_CUDA @GGML_CUDA@)
|
||||||
|
set(GGML_METAL @GGML_METAL@)
|
||||||
|
set(GGML_HIPBLAS @GGML_HIPBLAS@)
|
||||||
|
set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
||||||
|
|
||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
||||||
|
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
||||||
|
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
||||||
|
|
||||||
|
# Ensure transient dependencies satisfied
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
if (APPLE AND GGML_ACCELERATE)
|
||||||
|
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_BLAS)
|
||||||
|
find_package(BLAS REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_CUDA)
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_METAL)
|
||||||
|
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||||
|
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||||
|
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_HIPBLAS)
|
||||||
|
find_package(hip REQUIRED)
|
||||||
|
find_package(hipblas REQUIRED)
|
||||||
|
find_package(rocblas REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_library(llama_LIBRARY llama
|
||||||
|
REQUIRED
|
||||||
|
HINTS ${LLAMA_LIB_DIR})
|
||||||
|
|
||||||
|
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
||||||
|
set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
|
||||||
|
|
||||||
|
add_library(llama UNKNOWN IMPORTED)
|
||||||
|
|
||||||
|
set_target_properties(llama
|
||||||
|
PROPERTIES
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||||
|
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
||||||
|
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
|
||||||
|
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||||
|
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||||
|
INTERFACE_COMPILE_FEATURES cxx_std_11
|
||||||
|
POSITION_INDEPENDENT_CODE ON )
|
||||||
|
|
||||||
|
check_required_components(Llama)
|
10
cmake/whisper.pc.in
Normal file
10
cmake/whisper.pc.in
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
|
exec_prefix=${prefix}
|
||||||
|
libdir=${exec_prefix}/lib
|
||||||
|
includedir=${prefix}/include
|
||||||
|
|
||||||
|
Name: whisper
|
||||||
|
Description: Port of OpenAI's Whisper model in C/C++
|
||||||
|
Version: @PROJECT_VERSION@
|
||||||
|
Libs: -L${libdir} -lwhisper
|
||||||
|
Cflags: -I${includedir}
|
@ -18,7 +18,7 @@ struct whisper_params {
|
|||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||||
|
|
||||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int whisper_bench_full(const whisper_params & params) {
|
static int whisper_bench_full(const whisper_params & params) {
|
||||||
// whisper init
|
// whisper init
|
||||||
|
|
||||||
struct whisper_context_params cparams = whisper_context_default_params();
|
struct whisper_context_params cparams = whisper_context_default_params();
|
||||||
|
@ -59,7 +59,7 @@ struct whisper_params {
|
|||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||||
|
|
||||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
|
||||||
@ -130,7 +130,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string transcribe(
|
static std::string transcribe(
|
||||||
whisper_context * ctx,
|
whisper_context * ctx,
|
||||||
const whisper_params & params,
|
const whisper_params & params,
|
||||||
const std::vector<float> & pcmf32,
|
const std::vector<float> & pcmf32,
|
||||||
@ -216,7 +216,7 @@ std::string transcribe(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> read_allowed_commands(const std::string & fname) {
|
static std::vector<std::string> read_allowed_commands(const std::string & fname) {
|
||||||
std::vector<std::string> allowed_commands;
|
std::vector<std::string> allowed_commands;
|
||||||
|
|
||||||
std::ifstream ifs(fname);
|
std::ifstream ifs(fname);
|
||||||
@ -238,7 +238,7 @@ std::vector<std::string> read_allowed_commands(const std::string & fname) {
|
|||||||
return allowed_commands;
|
return allowed_commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> get_words(const std::string &txt) {
|
static std::vector<std::string> get_words(const std::string &txt) {
|
||||||
std::vector<std::string> words;
|
std::vector<std::string> words;
|
||||||
|
|
||||||
std::istringstream iss(txt);
|
std::istringstream iss(txt);
|
||||||
@ -252,7 +252,7 @@ std::vector<std::string> get_words(const std::string &txt) {
|
|||||||
|
|
||||||
// command-list mode
|
// command-list mode
|
||||||
// guide the transcription to match the most likely command from a provided list
|
// guide the transcription to match the most likely command from a provided list
|
||||||
int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
|
static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s: guided mode\n", __func__);
|
fprintf(stderr, "%s: guided mode\n", __func__);
|
||||||
|
|
||||||
@ -463,7 +463,7 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
|
|||||||
|
|
||||||
// always-prompt mode
|
// always-prompt mode
|
||||||
// transcribe the voice into text after valid prompt
|
// transcribe the voice into text after valid prompt
|
||||||
int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
|
static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
|
||||||
bool is_running = true;
|
bool is_running = true;
|
||||||
bool ask_prompt = true;
|
bool ask_prompt = true;
|
||||||
|
|
||||||
@ -543,7 +543,7 @@ int always_prompt_transcription(struct whisper_context * ctx, audio_async & audi
|
|||||||
|
|
||||||
// general-purpose mode
|
// general-purpose mode
|
||||||
// freely transcribe the voice into text
|
// freely transcribe the voice into text
|
||||||
int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
|
static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
|
||||||
bool is_running = true;
|
bool is_running = true;
|
||||||
bool have_prompt = false;
|
bool have_prompt = false;
|
||||||
bool ask_prompt = true;
|
bool ask_prompt = true;
|
||||||
|
@ -219,7 +219,7 @@ bool sdl_poll_events() {
|
|||||||
case SDL_QUIT:
|
case SDL_QUIT:
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
} break;
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
namespace grammar_parser {
|
namespace grammar_parser {
|
||||||
// NOTE: assumes valid utf8 (but checks for overrun)
|
// NOTE: assumes valid utf8 (but checks for overrun)
|
||||||
// copied from whisper.cpp
|
// copied from whisper.cpp
|
||||||
std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
||||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
||||||
uint8_t first_byte = static_cast<uint8_t>(*src);
|
uint8_t first_byte = static_cast<uint8_t>(*src);
|
||||||
uint8_t highbits = first_byte >> 4;
|
uint8_t highbits = first_byte >> 4;
|
||||||
@ -24,19 +24,19 @@ namespace grammar_parser {
|
|||||||
return std::make_pair(value, pos);
|
return std::make_pair(value, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
||||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||||
auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
|
auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
|
||||||
return result.first->second;
|
return result.first->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
||||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||||
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
||||||
return next_id;
|
return next_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_rule(
|
static void add_rule(
|
||||||
parse_state & state,
|
parse_state & state,
|
||||||
uint32_t rule_id,
|
uint32_t rule_id,
|
||||||
const std::vector<whisper_grammar_element> & rule) {
|
const std::vector<whisper_grammar_element> & rule) {
|
||||||
@ -46,11 +46,11 @@ namespace grammar_parser {
|
|||||||
state.rules[rule_id] = rule;
|
state.rules[rule_id] = rule;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_word_char(char c) {
|
static bool is_word_char(char c) {
|
||||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
|
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
||||||
const char * pos = src;
|
const char * pos = src;
|
||||||
const char * end = src + size;
|
const char * end = src + size;
|
||||||
uint32_t value = 0;
|
uint32_t value = 0;
|
||||||
@ -73,7 +73,7 @@ namespace grammar_parser {
|
|||||||
return std::make_pair(value, pos);
|
return std::make_pair(value, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_space(const char * src, bool newline_ok) {
|
static const char * parse_space(const char * src, bool newline_ok) {
|
||||||
const char * pos = src;
|
const char * pos = src;
|
||||||
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
||||||
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
||||||
@ -88,7 +88,7 @@ namespace grammar_parser {
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_name(const char * src) {
|
static const char * parse_name(const char * src) {
|
||||||
const char * pos = src;
|
const char * pos = src;
|
||||||
while (is_word_char(*pos)) {
|
while (is_word_char(*pos)) {
|
||||||
pos++;
|
pos++;
|
||||||
@ -99,7 +99,7 @@ namespace grammar_parser {
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, const char *> parse_char(const char * src) {
|
static std::pair<uint32_t, const char *> parse_char(const char * src) {
|
||||||
if (*src == '\\') {
|
if (*src == '\\') {
|
||||||
switch (src[1]) {
|
switch (src[1]) {
|
||||||
case 'x': return parse_hex(src + 2, 2);
|
case 'x': return parse_hex(src + 2, 2);
|
||||||
@ -122,14 +122,14 @@ namespace grammar_parser {
|
|||||||
throw std::runtime_error("unexpected end of input");
|
throw std::runtime_error("unexpected end of input");
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_alternates(
|
static const char * parse_alternates(
|
||||||
parse_state & state,
|
parse_state & state,
|
||||||
const char * src,
|
const char * src,
|
||||||
const std::string & rule_name,
|
const std::string & rule_name,
|
||||||
uint32_t rule_id,
|
uint32_t rule_id,
|
||||||
bool is_nested);
|
bool is_nested);
|
||||||
|
|
||||||
const char * parse_sequence(
|
static const char * parse_sequence(
|
||||||
parse_state & state,
|
parse_state & state,
|
||||||
const char * src,
|
const char * src,
|
||||||
const std::string & rule_name,
|
const std::string & rule_name,
|
||||||
@ -229,7 +229,7 @@ namespace grammar_parser {
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_alternates(
|
static const char * parse_alternates(
|
||||||
parse_state & state,
|
parse_state & state,
|
||||||
const char * src,
|
const char * src,
|
||||||
const std::string & rule_name,
|
const std::string & rule_name,
|
||||||
@ -247,7 +247,7 @@ namespace grammar_parser {
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_rule(parse_state & state, const char * src) {
|
static const char * parse_rule(parse_state & state, const char * src) {
|
||||||
const char * name_end = parse_name(src);
|
const char * name_end = parse_name(src);
|
||||||
const char * pos = parse_space(name_end, false);
|
const char * pos = parse_space(name_end, false);
|
||||||
size_t name_len = name_end - src;
|
size_t name_len = name_end - src;
|
||||||
@ -285,7 +285,7 @@ namespace grammar_parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_grammar_char(FILE * file, uint32_t c) {
|
static void print_grammar_char(FILE * file, uint32_t c) {
|
||||||
if (0x20 <= c && c <= 0x7f) {
|
if (0x20 <= c && c <= 0x7f) {
|
||||||
fprintf(file, "%c", static_cast<char>(c));
|
fprintf(file, "%c", static_cast<char>(c));
|
||||||
} else {
|
} else {
|
||||||
@ -294,7 +294,7 @@ namespace grammar_parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_char_element(whisper_grammar_element elem) {
|
static bool is_char_element(whisper_grammar_element elem) {
|
||||||
switch (elem.type) {
|
switch (elem.type) {
|
||||||
case WHISPER_GRETYPE_CHAR: return true;
|
case WHISPER_GRETYPE_CHAR: return true;
|
||||||
case WHISPER_GRETYPE_CHAR_NOT: return true;
|
case WHISPER_GRETYPE_CHAR_NOT: return true;
|
||||||
@ -304,7 +304,7 @@ namespace grammar_parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_rule_binary(FILE * file, const std::vector<whisper_grammar_element> & rule) {
|
static void print_rule_binary(FILE * file, const std::vector<whisper_grammar_element> & rule) {
|
||||||
for (auto elem : rule) {
|
for (auto elem : rule) {
|
||||||
switch (elem.type) {
|
switch (elem.type) {
|
||||||
case WHISPER_GRETYPE_END: fprintf(file, "END"); break;
|
case WHISPER_GRETYPE_END: fprintf(file, "END"); break;
|
||||||
@ -334,7 +334,7 @@ namespace grammar_parser {
|
|||||||
fprintf(file, "\n");
|
fprintf(file, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_rule(
|
static void print_rule(
|
||||||
FILE * file,
|
FILE * file,
|
||||||
uint32_t rule_id,
|
uint32_t rule_id,
|
||||||
const std::vector<whisper_grammar_element> & rule,
|
const std::vector<whisper_grammar_element> & rule,
|
||||||
@ -413,7 +413,7 @@ namespace grammar_parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<const whisper_grammar_element *> parse_state::c_rules() const{
|
std::vector<const whisper_grammar_element *> parse_state::c_rules() const {
|
||||||
std::vector<const whisper_grammar_element *> ret;
|
std::vector<const whisper_grammar_element *> ret;
|
||||||
for (const auto & rule : rules) {
|
for (const auto & rule : rules) {
|
||||||
ret.push_back(rule.data());
|
ret.push_back(rule.data());
|
||||||
|
@ -53,7 +53,7 @@ struct commandset {
|
|||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||||
|
|
||||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
uint64_t wait_for_vad(audio_async & audio, json jparams, const whisper_params & params, uint64_t maxlength_ms, std::vector<float> & pcmf32) {
|
static uint64_t wait_for_vad(audio_async & audio, json jparams, const whisper_params & params, uint64_t maxlength_ms, std::vector<float> & pcmf32) {
|
||||||
using namespace std::chrono;
|
using namespace std::chrono;
|
||||||
uint64_t time_now = time_point_cast<milliseconds>(system_clock::now()).time_since_epoch().count();
|
uint64_t time_now = time_point_cast<milliseconds>(system_clock::now()).time_since_epoch().count();
|
||||||
uint64_t start_time = time_now;
|
uint64_t start_time = time_now;
|
||||||
@ -153,7 +153,7 @@ uint64_t wait_for_vad(audio_async & audio, json jparams, const whisper_params &
|
|||||||
return time_now;
|
return time_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
json unguided_transcription(struct whisper_context * ctx, audio_async &audio, json jparams, const whisper_params ¶ms) {
|
static json unguided_transcription(struct whisper_context * ctx, audio_async &audio, json jparams, const whisper_params ¶ms) {
|
||||||
std::vector<whisper_token> prompt_tokens;
|
std::vector<whisper_token> prompt_tokens;
|
||||||
std::vector<float> pcmf32;
|
std::vector<float> pcmf32;
|
||||||
uint64_t unprocessed_audio_timestamp = wait_for_vad(audio, jparams, params, 10000U, pcmf32);
|
uint64_t unprocessed_audio_timestamp = wait_for_vad(audio, jparams, params, 10000U, pcmf32);
|
||||||
@ -199,7 +199,7 @@ json unguided_transcription(struct whisper_context * ctx, audio_async &audio, js
|
|||||||
|
|
||||||
// command-list mode
|
// command-list mode
|
||||||
// guide the transcription to match the most likely command from a provided list
|
// guide the transcription to match the most likely command from a provided list
|
||||||
json guided_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, json jparams, std::vector<struct commandset> commandset_list) {
|
static json guided_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, json jparams, std::vector<struct commandset> commandset_list) {
|
||||||
struct commandset cs = commandset_list[jparams.value("commandset_index", commandset_list.size()-1)];
|
struct commandset cs = commandset_list[jparams.value("commandset_index", commandset_list.size()-1)];
|
||||||
std::vector<float> pcmf32;
|
std::vector<float> pcmf32;
|
||||||
uint64_t unprocessed_audio_timestamp = wait_for_vad(audio, jparams, params, 2000U, pcmf32);
|
uint64_t unprocessed_audio_timestamp = wait_for_vad(audio, jparams, params, 2000U, pcmf32);
|
||||||
@ -285,7 +285,7 @@ json guided_transcription(struct whisper_context * ctx, audio_async &audio, cons
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
json register_commandset(struct whisper_context * ctx, json jparams, std::vector<struct commandset> &commandset_list) {
|
static json register_commandset(struct whisper_context * ctx, json jparams, std::vector<struct commandset> &commandset_list) {
|
||||||
// TODO: check for token collision
|
// TODO: check for token collision
|
||||||
struct commandset cs;
|
struct commandset cs;
|
||||||
|
|
||||||
@ -325,7 +325,8 @@ json register_commandset(struct whisper_context * ctx, json jparams, std::vector
|
|||||||
commandset_list.push_back(cs);
|
commandset_list.push_back(cs);
|
||||||
return json{{"index",index}};
|
return json{{"index",index}};
|
||||||
}
|
}
|
||||||
json seek(struct whisper_context * /*ctx*/, audio_async & /*audio*/, json /*params*/) {
|
|
||||||
|
static json seek(struct whisper_context * /*ctx*/, audio_async & /*audio*/, json /*params*/) {
|
||||||
// whisper_state has the pertinent offsets, but there also seem to be a large
|
// whisper_state has the pertinent offsets, but there also seem to be a large
|
||||||
// number of scratch buffers that would prevent rewinding context in a manner similar to llama
|
// number of scratch buffers that would prevent rewinding context in a manner similar to llama
|
||||||
// I'll give this a another pass once everything else is implemented,
|
// I'll give this a another pass once everything else is implemented,
|
||||||
@ -335,7 +336,8 @@ json seek(struct whisper_context * /*ctx*/, audio_async & /*audio*/, json /*para
|
|||||||
{"message", "Seeking is not yet supported."}
|
{"message", "Seeking is not yet supported."}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
json parse_job(const json &body, struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, std::vector<struct commandset> &commandset_list) {
|
|
||||||
|
static json parse_job(const json &body, struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, std::vector<struct commandset> &commandset_list) {
|
||||||
// See: https://www.jsonrpc.org/specification
|
// See: https://www.jsonrpc.org/specification
|
||||||
json id = body.at("id");
|
json id = body.at("id");
|
||||||
try {
|
try {
|
||||||
@ -375,7 +377,7 @@ json parse_job(const json &body, struct whisper_context * ctx, audio_async &audi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_loop(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
|
static void process_loop(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
|
||||||
std::deque<json> jobqueue;
|
std::deque<json> jobqueue;
|
||||||
std::vector<struct commandset> commandset_list;
|
std::vector<struct commandset> commandset_list;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// helper function to replace substrings
|
// helper function to replace substrings
|
||||||
void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
||||||
for (size_t pos = 0; ; pos += replace.length()) {
|
for (size_t pos = 0; ; pos += replace.length()) {
|
||||||
pos = s.find(search, pos);
|
pos = s.find(search, pos);
|
||||||
if (pos == std::string::npos) break;
|
if (pos == std::string::npos) break;
|
||||||
@ -94,17 +94,17 @@ struct whisper_params {
|
|||||||
grammar_parser::parse_state grammar_parsed;
|
grammar_parser::parse_state grammar_parsed;
|
||||||
};
|
};
|
||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
static void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||||
|
|
||||||
char* whisper_param_turn_lowercase(char* in){
|
static char * whisper_param_turn_lowercase(char * in){
|
||||||
int string_len = strlen(in);
|
int string_len = strlen(in);
|
||||||
for(int i = 0; i < string_len; i++){
|
for (int i = 0; i < string_len; i++){
|
||||||
*(in+i) = tolower((unsigned char)*(in+i));
|
*(in+i) = tolower((unsigned char)*(in+i));
|
||||||
}
|
}
|
||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
|
||||||
@ -182,7 +182,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
|
fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
@ -248,7 +248,7 @@ struct whisper_print_user_data {
|
|||||||
int progress_prev;
|
int progress_prev;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
|
static std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
|
||||||
std::string speaker = "";
|
std::string speaker = "";
|
||||||
const int64_t n_samples = pcmf32s[0].size();
|
const int64_t n_samples = pcmf32s[0].size();
|
||||||
|
|
||||||
@ -280,7 +280,8 @@ std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s
|
|||||||
|
|
||||||
return speaker;
|
return speaker;
|
||||||
}
|
}
|
||||||
void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
|
|
||||||
|
static void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
|
||||||
int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
|
int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
|
||||||
int * progress_prev = &(((whisper_print_user_data *) user_data)->progress_prev);
|
int * progress_prev = &(((whisper_print_user_data *) user_data)->progress_prev);
|
||||||
if (progress >= *progress_prev + progress_step) {
|
if (progress >= *progress_prev + progress_step) {
|
||||||
@ -289,7 +290,7 @@ void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct wh
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
|
static void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
|
||||||
const auto & params = *((whisper_print_user_data *) user_data)->params;
|
const auto & params = *((whisper_print_user_data *) user_data)->params;
|
||||||
const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
|
const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
|
||||||
|
|
||||||
@ -358,7 +359,7 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
static bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
if (!fout.is_open()) {
|
if (!fout.is_open()) {
|
||||||
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
||||||
@ -385,7 +386,7 @@ bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
static bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
if (!fout.is_open()) {
|
if (!fout.is_open()) {
|
||||||
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
||||||
@ -417,7 +418,7 @@ bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
static bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
if (!fout.is_open()) {
|
if (!fout.is_open()) {
|
||||||
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
||||||
@ -446,7 +447,7 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *escape_double_quotes_and_backslashes(const char *str) {
|
static char * escape_double_quotes_and_backslashes(const char * str) {
|
||||||
if (str == NULL) {
|
if (str == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -459,7 +460,7 @@ char *escape_double_quotes_and_backslashes(const char *str) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char *escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
|
char * escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
|
||||||
if (escaped == NULL) {
|
if (escaped == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -478,7 +479,7 @@ char *escape_double_quotes_and_backslashes(const char *str) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// double quote should be escaped by another double quote. (rfc4180)
|
// double quote should be escaped by another double quote. (rfc4180)
|
||||||
char *escape_double_quotes_in_csv(const char *str) {
|
static char * escape_double_quotes_in_csv(const char * str) {
|
||||||
if (str == NULL) {
|
if (str == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -509,7 +510,7 @@ char *escape_double_quotes_in_csv(const char *str) {
|
|||||||
return escaped;
|
return escaped;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
static bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
if (!fout.is_open()) {
|
if (!fout.is_open()) {
|
||||||
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
||||||
@ -544,7 +545,7 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & /*params*/, std::vector<std::vector<float>> /*pcmf32s*/) {
|
static bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & /*params*/, std::vector<std::vector<float>> /*pcmf32s*/) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
||||||
|
|
||||||
@ -563,7 +564,7 @@ bool output_score(struct whisper_context * ctx, const char * fname, const whispe
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool output_json(
|
static bool output_json(
|
||||||
struct whisper_context * ctx,
|
struct whisper_context * ctx,
|
||||||
const char * fname,
|
const char * fname,
|
||||||
const whisper_params & params,
|
const whisper_params & params,
|
||||||
@ -734,7 +735,7 @@ bool output_json(
|
|||||||
// karaoke video generation
|
// karaoke video generation
|
||||||
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
||||||
// TODO: font parameter adjustments
|
// TODO: font parameter adjustments
|
||||||
bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec, std::vector<std::vector<float>> pcmf32s) {
|
static bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec, std::vector<std::vector<float>> pcmf32s) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
|
|
||||||
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
||||||
@ -859,7 +860,7 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool output_lrc(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
static bool output_lrc(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
||||||
std::ofstream fout(fname);
|
std::ofstream fout(fname);
|
||||||
if (!fout.is_open()) {
|
if (!fout.is_open()) {
|
||||||
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
||||||
@ -900,7 +901,7 @@ bool output_lrc(struct whisper_context * ctx, const char * fname, const whisper_
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cb_log_disable(enum ggml_log_level , const char * , void * ) { }
|
static void cb_log_disable(enum ggml_log_level , const char * , void * ) { }
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
whisper_params params;
|
whisper_params params;
|
||||||
|
@ -36,7 +36,7 @@ struct whisper_filters {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// quantize a model
|
// quantize a model
|
||||||
bool whisper_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
|
static bool whisper_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
|
||||||
gpt_vocab vocab;
|
gpt_vocab vocab;
|
||||||
|
|
||||||
printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str());
|
printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str());
|
||||||
|
@ -44,7 +44,7 @@ struct whisper_params {
|
|||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||||
|
|
||||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -174,6 +174,7 @@ extern "C" {
|
|||||||
LLAMA_POOLING_TYPE_NONE = 0,
|
LLAMA_POOLING_TYPE_NONE = 0,
|
||||||
LLAMA_POOLING_TYPE_MEAN = 1,
|
LLAMA_POOLING_TYPE_MEAN = 1,
|
||||||
LLAMA_POOLING_TYPE_CLS = 2,
|
LLAMA_POOLING_TYPE_CLS = 2,
|
||||||
|
LLAMA_POOLING_TYPE_LAST = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llama_split_mode {
|
enum llama_split_mode {
|
||||||
@ -293,7 +294,6 @@ extern "C" {
|
|||||||
|
|
||||||
enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
||||||
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
||||||
// (ignored if no pooling layer)
|
|
||||||
|
|
||||||
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||||
float rope_freq_base; // RoPE base frequency, 0 = from model
|
float rope_freq_base; // RoPE base frequency, 0 = from model
|
||||||
@ -786,6 +786,10 @@ extern "C" {
|
|||||||
// Get the number of threads used for prompt and batch processing (multiple token).
|
// Get the number of threads used for prompt and batch processing (multiple token).
|
||||||
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
|
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
|
||||||
|
|
||||||
|
// Set whether the model is in embeddings mode or not
|
||||||
|
// If true, embeddings will be returned but logits will not
|
||||||
|
LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
|
||||||
|
|
||||||
// Set whether to use causal attention or not
|
// Set whether to use causal attention or not
|
||||||
// If set to true, the model will only attend to the past tokens
|
// If set to true, the model will only attend to the past tokens
|
||||||
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
#include <regex>
|
#include <regex>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
static std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
||||||
auto * model = llama_get_model(ctx);
|
auto * model = llama_get_model(ctx);
|
||||||
|
|
||||||
// upper limit for the number of tokens
|
// upper limit for the number of tokens
|
||||||
@ -33,7 +33,7 @@ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::s
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
||||||
std::vector<char> result(8, 0);
|
std::vector<char> result(8, 0);
|
||||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
|
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
|
||||||
if (n_tokens < 0) {
|
if (n_tokens < 0) {
|
||||||
@ -83,7 +83,7 @@ struct whisper_params {
|
|||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||||
|
|
||||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
|
||||||
@ -168,7 +168,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string transcribe(
|
static std::string transcribe(
|
||||||
whisper_context * ctx,
|
whisper_context * ctx,
|
||||||
const whisper_params & params,
|
const whisper_params & params,
|
||||||
const std::vector<float> & pcmf32,
|
const std::vector<float> & pcmf32,
|
||||||
@ -235,7 +235,7 @@ std::string transcribe(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> get_words(const std::string &txt) {
|
static std::vector<std::string> get_words(const std::string &txt) {
|
||||||
std::vector<std::string> words;
|
std::vector<std::string> words;
|
||||||
|
|
||||||
std::istringstream iss(txt);
|
std::istringstream iss(txt);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -226,8 +226,9 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|||||||
assert(offset_end <= cpts.size());
|
assert(offset_end <= cpts.size());
|
||||||
start = offset_end;
|
start = offset_end;
|
||||||
|
|
||||||
|
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
||||||
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
||||||
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : 0;
|
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
||||||
@ -309,7 +310,7 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|||||||
}
|
}
|
||||||
|
|
||||||
// regex: \s+(?!\S)
|
// regex: \s+(?!\S)
|
||||||
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != 0) {
|
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
||||||
pos += num_whitespaces - 1;
|
pos += num_whitespaces - 1;
|
||||||
_add_token(pos);
|
_add_token(pos);
|
||||||
continue;
|
continue;
|
||||||
@ -344,8 +345,9 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|||||||
assert(offset_end <= cpts.size());
|
assert(offset_end <= cpts.size());
|
||||||
start = offset_end;
|
start = offset_end;
|
||||||
|
|
||||||
|
static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
|
||||||
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
auto _get_cpt = [&] (const size_t pos) -> uint32_t {
|
||||||
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : 0;
|
return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
||||||
@ -450,7 +452,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|||||||
}
|
}
|
||||||
|
|
||||||
// regex: \s+(?!\S)
|
// regex: \s+(?!\S)
|
||||||
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != 0) {
|
if (num_whitespaces > 1 && _get_cpt(pos+num_whitespaces) != OUT_OF_RANGE) {
|
||||||
pos += num_whitespaces - 1;
|
pos += num_whitespaces - 1;
|
||||||
_add_token(pos);
|
_add_token(pos);
|
||||||
continue;
|
continue;
|
||||||
@ -594,6 +596,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
|||||||
|
|
||||||
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
|
||||||
std::vector<uint32_t> result;
|
std::vector<uint32_t> result;
|
||||||
|
result.reserve(utf8.size());
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
while (offset < utf8.size()) {
|
while (offset < utf8.size()) {
|
||||||
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
result.push_back(unicode_cpt_from_utf8(utf8, offset));
|
||||||
@ -679,10 +682,14 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int cpt_flag = unicode_cpt_flags(cpts[i]).category_flag();
|
const auto flags = unicode_cpt_flags(cpts[i]);
|
||||||
|
|
||||||
if (k_ucat_cpt.find(cpt_flag) != k_ucat_cpt.end()) {
|
if (flags.is_whitespace) {
|
||||||
text_collapsed[i] = k_ucat_cpt.at(cpt_flag);
|
//NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does.
|
||||||
|
//text_collapsed[i] = (char) 0x85; // <Next Line> as whitespace fallback
|
||||||
|
text_collapsed[i] = (char) 0x0B; // <vertical tab> as whitespace fallback
|
||||||
|
} else if (k_ucat_cpt.find(flags.category_flag()) != k_ucat_cpt.end()) {
|
||||||
|
text_collapsed[i] = k_ucat_cpt.at(flags.category_flag());
|
||||||
} else {
|
} else {
|
||||||
text_collapsed[i] = (char) 0xD0; // fallback
|
text_collapsed[i] = (char) 0xD0; // fallback
|
||||||
}
|
}
|
||||||
@ -766,9 +773,16 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|||||||
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
|
bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
|
||||||
} else {
|
} else {
|
||||||
// no unicode category used, we can use std::wregex directly
|
// no unicode category used, we can use std::wregex directly
|
||||||
const std::wstring wtext = unicode_wstring_from_utf8(text);
|
|
||||||
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
|
const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
|
||||||
|
|
||||||
|
// std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
|
||||||
|
std::wstring wtext(cpts.begin(), cpts.end());
|
||||||
|
for (size_t i = 0; i < wtext.size(); ++i) {
|
||||||
|
if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) {
|
||||||
|
wtext[i] = 0x0B;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//printf("text: %s\n", text.c_str());
|
//printf("text: %s\n", text.c_str());
|
||||||
//printf("regex_expr: %s\n", regex_expr.c_str());
|
//printf("regex_expr: %s\n", regex_expr.c_str());
|
||||||
bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);
|
bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);
|
||||||
|
@ -72,7 +72,7 @@ struct gpt2_model {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// load the model's weights from a file
|
// load the model's weights from a file
|
||||||
bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
|
static bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
|
||||||
printf("%s: loading model from '%s'\n", __func__, fname.c_str());
|
printf("%s: loading model from '%s'\n", __func__, fname.c_str());
|
||||||
|
|
||||||
auto fin = std::ifstream(fname, std::ios::binary);
|
auto fin = std::ifstream(fname, std::ios::binary);
|
||||||
@ -380,7 +380,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
|
|||||||
// - embd_w: the predicted logits for the next token
|
// - embd_w: the predicted logits for the next token
|
||||||
//
|
//
|
||||||
// TODO: sync latest version from ggml repo
|
// TODO: sync latest version from ggml repo
|
||||||
bool gpt2_eval(
|
static bool gpt2_eval(
|
||||||
const gpt2_model & model,
|
const gpt2_model & model,
|
||||||
const int n_threads,
|
const int n_threads,
|
||||||
const int n_past,
|
const int n_past,
|
||||||
|
@ -44,7 +44,7 @@ struct whisper_params {
|
|||||||
|
|
||||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||||
|
|
||||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
std::string arg = argv[i];
|
std::string arg = argv[i];
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string transcribe(whisper_context * ctx, const whisper_params & params, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
|
static std::string transcribe(whisper_context * ctx, const whisper_params & params, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
|
||||||
const auto t_start = std::chrono::high_resolution_clock::now();
|
const auto t_start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
prob = 0.0f;
|
prob = 0.0f;
|
||||||
|
@ -5,15 +5,14 @@ project(whisper.cpp)
|
|||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../)
|
set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../)
|
||||||
|
|
||||||
set(
|
set(SOURCE_FILES
|
||||||
SOURCE_FILES
|
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
||||||
${WHISPER_LIB_DIR}/ggml.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
||||||
${WHISPER_LIB_DIR}/ggml-alloc.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.c
|
||||||
${WHISPER_LIB_DIR}/ggml-backend.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
||||||
${WHISPER_LIB_DIR}/ggml-quants.c
|
${WHISPER_LIB_DIR}/src/whisper.cpp
|
||||||
${WHISPER_LIB_DIR}/whisper.cpp
|
${CMAKE_SOURCE_DIR}/jni.c
|
||||||
${CMAKE_SOURCE_DIR}/jni.c
|
)
|
||||||
)
|
|
||||||
|
|
||||||
find_library(LOG_LIB log)
|
find_library(LOG_LIB log)
|
||||||
|
|
||||||
@ -41,7 +40,6 @@ function(build_library target_name)
|
|||||||
#target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
|
#target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
|
||||||
#target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
|
#target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
|
||||||
#target_link_options(${target_name} PRIVATE -flto)
|
#target_link_options(${target_name} PRIVATE -flto)
|
||||||
|
|
||||||
endif ()
|
endif ()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
@ -54,3 +52,7 @@ elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a")
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
include_directories(${WHISPER_LIB_DIR})
|
include_directories(${WHISPER_LIB_DIR})
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/src)
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/include)
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/ggml/include)
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/ggml/src)
|
||||||
|
@ -10,7 +10,7 @@ option(GGML_HOME "whisper: Path to external GGML source" OFF)
|
|||||||
|
|
||||||
set(
|
set(
|
||||||
SOURCE_FILES
|
SOURCE_FILES
|
||||||
${WHISPER_LIB_DIR}/whisper.cpp
|
${WHISPER_LIB_DIR}/src/whisper.cpp
|
||||||
${CMAKE_SOURCE_DIR}/jni.c
|
${CMAKE_SOURCE_DIR}/jni.c
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -18,10 +18,10 @@ if (NOT GGML_HOME)
|
|||||||
set(
|
set(
|
||||||
SOURCE_FILES
|
SOURCE_FILES
|
||||||
${SOURCE_FILES}
|
${SOURCE_FILES}
|
||||||
${WHISPER_LIB_DIR}/ggml.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml.c
|
||||||
${WHISPER_LIB_DIR}/ggml-alloc.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-alloc.c
|
||||||
${WHISPER_LIB_DIR}/ggml-backend.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-backend.c
|
||||||
${WHISPER_LIB_DIR}/ggml-quants.c
|
${WHISPER_LIB_DIR}/ggml/src/ggml-quants.c
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -75,3 +75,7 @@ endif ()
|
|||||||
build_library("whisper") # Default target
|
build_library("whisper") # Default target
|
||||||
|
|
||||||
include_directories(${WHISPER_LIB_DIR})
|
include_directories(${WHISPER_LIB_DIR})
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/src)
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/include)
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/ggml/include)
|
||||||
|
include_directories(${WHISPER_LIB_DIR}/ggml/src)
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
|
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
|
||||||
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
|
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
|
||||||
184447212AB21B43007D6BFE /* ggml-metal.metal in CopyFiles */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
|
|
||||||
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
|
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
|
||||||
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
|
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
|
||||||
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
|
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
|
||||||
@ -20,6 +19,8 @@
|
|||||||
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
|
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
|
||||||
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
|
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
|
||||||
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
|
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
|
||||||
|
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
|
||||||
|
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
|
||||||
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
|
18ABE15A2AF556340044A204 /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.c */; };
|
||||||
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
|
||||||
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
|
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */ = {isa = PBXBuildFile; fileRef = 7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */; };
|
||||||
@ -29,23 +30,24 @@
|
|||||||
/* End PBXBuildFile section */
|
/* End PBXBuildFile section */
|
||||||
|
|
||||||
/* Begin PBXCopyFilesBuildPhase section */
|
/* Begin PBXCopyFilesBuildPhase section */
|
||||||
184447202AB21B25007D6BFE /* CopyFiles */ = {
|
184447202AB21B25007D6BFE /* Copy Files */ = {
|
||||||
isa = PBXCopyFilesBuildPhase;
|
isa = PBXCopyFilesBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
dstPath = "";
|
dstPath = "";
|
||||||
dstSubfolderSpec = 7;
|
dstSubfolderSpec = 7;
|
||||||
files = (
|
files = (
|
||||||
184447212AB21B43007D6BFE /* ggml-metal.metal in CopyFiles */,
|
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */,
|
||||||
);
|
);
|
||||||
|
name = "Copy Files";
|
||||||
runOnlyForDeploymentPostprocessing = 0;
|
runOnlyForDeploymentPostprocessing = 0;
|
||||||
};
|
};
|
||||||
/* End PBXCopyFilesBuildPhase section */
|
/* End PBXCopyFilesBuildPhase section */
|
||||||
|
|
||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml-alloc.c"; sourceTree = "<group>"; };
|
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
|
||||||
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml-alloc.h"; sourceTree = "<group>"; };
|
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
|
||||||
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml-metal.m"; sourceTree = "<group>"; };
|
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal.m"; sourceTree = "<group>"; };
|
||||||
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml-metal.metal"; sourceTree = "<group>"; };
|
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal.metal"; sourceTree = "<group>"; };
|
||||||
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
|
||||||
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
|
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
|
||||||
@ -58,17 +60,19 @@
|
|||||||
18627C8829052BE000BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
|
18627C8829052BE000BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
|
||||||
18627C8A29052BE000BD2A04 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
18627C8A29052BE000BD2A04 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||||
18627C8B29052BE000BD2A04 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
|
18627C8B29052BE000BD2A04 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
|
||||||
18627C9229052C2B00BD2A04 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../whisper.h; sourceTree = "<group>"; };
|
18627C9229052C2B00BD2A04 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../include/whisper.h; sourceTree = "<group>"; };
|
||||||
18627C9329052C4900BD2A04 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../whisper.cpp; sourceTree = "<group>"; };
|
18627C9329052C4900BD2A04 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../src/whisper.cpp; sourceTree = "<group>"; };
|
||||||
18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml.c; sourceTree = "<group>"; };
|
18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml/src/ggml.c; sourceTree = "<group>"; };
|
||||||
18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml.h; sourceTree = "<group>"; };
|
18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml/include/ggml.h; sourceTree = "<group>"; };
|
||||||
18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
|
18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
|
||||||
18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml-quants.h"; sourceTree = "<group>"; };
|
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../../ggml/include/ggml-metal.h"; sourceTree = "<group>"; };
|
||||||
18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml-backend.h"; sourceTree = "<group>"; };
|
18A275FF2C2A9563001C8D37 /* ggml-common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-common.h"; path = "../../../ggml/src/ggml-common.h"; sourceTree = "<group>"; };
|
||||||
18ABE1562AF556340044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend-impl.h"; path = "../../../ggml-backend-impl.h"; sourceTree = "<group>"; };
|
18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml/src/ggml-quants.h"; sourceTree = "<group>"; };
|
||||||
18ABE1572AF556340044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../../ggml-backend.c"; sourceTree = "<group>"; };
|
18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml/include/ggml-backend.h"; sourceTree = "<group>"; };
|
||||||
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml-impl.h"; sourceTree = "<group>"; };
|
18ABE1562AF556340044A204 /* ggml-backend-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend-impl.h"; path = "../../../ggml/src/ggml-backend-impl.h"; sourceTree = "<group>"; };
|
||||||
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml-quants.c"; sourceTree = "<group>"; };
|
18ABE1572AF556340044A204 /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../../ggml/src/ggml-backend.c"; sourceTree = "<group>"; };
|
||||||
|
18ABE1582AF556340044A204 /* ggml-impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-impl.h"; path = "../../../ggml/src/ggml-impl.h"; sourceTree = "<group>"; };
|
||||||
|
18ABE1592AF556340044A204 /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../../ggml/src/ggml-quants.c"; sourceTree = "<group>"; };
|
||||||
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
|
7FE342452A0C3FA20015A058 /* whisper-encoder-impl.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "whisper-encoder-impl.m"; sourceTree = "<group>"; };
|
||||||
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
|
7FE342462A0C3FA20015A058 /* whisper-encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "whisper-encoder.h"; sourceTree = "<group>"; };
|
||||||
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
|
7FE342472A0C3FA20015A058 /* whisper-encoder.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = "whisper-encoder.mm"; sourceTree = "<group>"; };
|
||||||
@ -108,6 +112,8 @@
|
|||||||
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
|
18627C7829052BDF00BD2A04 /* whisper.objc */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
18A275FF2C2A9563001C8D37 /* ggml-common.h */,
|
||||||
|
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */,
|
||||||
18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
|
18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
|
||||||
18ABE1572AF556340044A204 /* ggml-backend.c */,
|
18ABE1572AF556340044A204 /* ggml-backend.c */,
|
||||||
18ABE1552AF556340044A204 /* ggml-backend.h */,
|
18ABE1552AF556340044A204 /* ggml-backend.h */,
|
||||||
@ -151,7 +157,7 @@
|
|||||||
7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */,
|
7FE3424A2A0C3FA20015A058 /* whisper-decoder-impl.m */,
|
||||||
);
|
);
|
||||||
name = coreml;
|
name = coreml;
|
||||||
path = ../../../coreml;
|
path = ../../../src/coreml;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
};
|
};
|
||||||
/* End PBXGroup section */
|
/* End PBXGroup section */
|
||||||
@ -164,7 +170,7 @@
|
|||||||
18627C7229052BDF00BD2A04 /* Sources */,
|
18627C7229052BDF00BD2A04 /* Sources */,
|
||||||
18627C7329052BDF00BD2A04 /* Frameworks */,
|
18627C7329052BDF00BD2A04 /* Frameworks */,
|
||||||
18627C7429052BDF00BD2A04 /* Resources */,
|
18627C7429052BDF00BD2A04 /* Resources */,
|
||||||
184447202AB21B25007D6BFE /* CopyFiles */,
|
184447202AB21B25007D6BFE /* Copy Files */,
|
||||||
);
|
);
|
||||||
buildRules = (
|
buildRules = (
|
||||||
);
|
);
|
||||||
@ -182,7 +188,7 @@
|
|||||||
isa = PBXProject;
|
isa = PBXProject;
|
||||||
attributes = {
|
attributes = {
|
||||||
BuildIndependentTargetsInParallel = 1;
|
BuildIndependentTargetsInParallel = 1;
|
||||||
LastUpgradeCheck = 1400;
|
LastUpgradeCheck = 1540;
|
||||||
TargetAttributes = {
|
TargetAttributes = {
|
||||||
18627C7529052BDF00BD2A04 = {
|
18627C7529052BDF00BD2A04 = {
|
||||||
CreatedOnToolsVersion = 14.0.1;
|
CreatedOnToolsVersion = 14.0.1;
|
||||||
@ -212,6 +218,7 @@
|
|||||||
isa = PBXResourcesBuildPhase;
|
isa = PBXResourcesBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
|
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */,
|
||||||
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
|
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
|
||||||
7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */,
|
7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */,
|
||||||
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
|
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
|
||||||
@ -301,6 +308,7 @@
|
|||||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||||
ENABLE_TESTABILITY = YES;
|
ENABLE_TESTABILITY = YES;
|
||||||
|
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||||
GCC_C_LANGUAGE_STANDARD = gnu11;
|
GCC_C_LANGUAGE_STANDARD = gnu11;
|
||||||
GCC_DYNAMIC_NO_PIC = NO;
|
GCC_DYNAMIC_NO_PIC = NO;
|
||||||
GCC_NO_COMMON_BLOCKS = YES;
|
GCC_NO_COMMON_BLOCKS = YES;
|
||||||
@ -359,6 +367,7 @@
|
|||||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||||
ENABLE_NS_ASSERTIONS = NO;
|
ENABLE_NS_ASSERTIONS = NO;
|
||||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||||
|
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||||
GCC_C_LANGUAGE_STANDARD = gnu11;
|
GCC_C_LANGUAGE_STANDARD = gnu11;
|
||||||
GCC_NO_COMMON_BLOCKS = YES;
|
GCC_NO_COMMON_BLOCKS = YES;
|
||||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||||
@ -400,6 +409,7 @@
|
|||||||
"@executable_path/Frameworks",
|
"@executable_path/Frameworks",
|
||||||
);
|
);
|
||||||
MARKETING_VERSION = 1.0;
|
MARKETING_VERSION = 1.0;
|
||||||
|
MTL_HEADER_SEARCH_PATHS = "";
|
||||||
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
|
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
|
||||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
@ -428,6 +438,7 @@
|
|||||||
"@executable_path/Frameworks",
|
"@executable_path/Frameworks",
|
||||||
);
|
);
|
||||||
MARKETING_VERSION = 1.0;
|
MARKETING_VERSION = 1.0;
|
||||||
|
MTL_HEADER_SEARCH_PATHS = "";
|
||||||
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
|
PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
|
||||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
|
@ -15,7 +15,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
|||||||
private var audioPlayer: AVAudioPlayer?
|
private var audioPlayer: AVAudioPlayer?
|
||||||
|
|
||||||
private var modelUrl: URL? {
|
private var modelUrl: URL? {
|
||||||
Bundle.main.url(forResource: "ggml-tiny.en", withExtension: "bin", subdirectory: "models")
|
Bundle.main.url(forResource: "ggml-base.en", withExtension: "bin", subdirectory: "models")
|
||||||
}
|
}
|
||||||
|
|
||||||
private var sampleUrl: URL? {
|
private var sampleUrl: URL? {
|
||||||
|
2138
ggml-cuda/mmq.cuh
2138
ggml-cuda/mmq.cuh
File diff suppressed because it is too large
Load Diff
13337
ggml-sycl.cpp
13337
ggml-sycl.cpp
File diff suppressed because it is too large
Load Diff
1
ggml/.gitignore
vendored
Normal file
1
ggml/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
src/ggml-metal-embed.metal
|
238
ggml/CMakeLists.txt
Normal file
238
ggml/CMakeLists.txt
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
||||||
|
project("ggml" C CXX)
|
||||||
|
include(CheckIncludeFileCXX)
|
||||||
|
|
||||||
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
|
||||||
|
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||||
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
|
set(GGML_STANDALONE ON)
|
||||||
|
|
||||||
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|
||||||
|
# configure project version
|
||||||
|
# TODO
|
||||||
|
else()
|
||||||
|
set(GGML_STANDALONE OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (EMSCRIPTEN)
|
||||||
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
|
|
||||||
|
option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
|
||||||
|
else()
|
||||||
|
if (MINGW)
|
||||||
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
|
else()
|
||||||
|
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||||
|
|
||||||
|
#
|
||||||
|
# option list
|
||||||
|
#
|
||||||
|
|
||||||
|
# TODO: mark all options as advanced when not GGML_STANDALONE
|
||||||
|
|
||||||
|
if (APPLE)
|
||||||
|
set(GGML_METAL_DEFAULT ON)
|
||||||
|
set(GGML_BLAS_DEFAULT ON)
|
||||||
|
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
|
||||||
|
else()
|
||||||
|
set(GGML_METAL_DEFAULT OFF)
|
||||||
|
set(GGML_BLAS_DEFAULT OFF)
|
||||||
|
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# general
|
||||||
|
option(GGML_STATIC "ggml: static link libraries" OFF)
|
||||||
|
option(GGML_NATIVE "ggml: enable -march=native flag" ON)
|
||||||
|
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
||||||
|
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
||||||
|
|
||||||
|
# debug
|
||||||
|
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
||||||
|
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
|
option(GGML_GPROF "ggml: enable gprof" OFF)
|
||||||
|
|
||||||
|
# build
|
||||||
|
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
|
||||||
|
|
||||||
|
# sanitizers
|
||||||
|
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
|
||||||
|
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
|
||||||
|
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
|
# instruction set specific
|
||||||
|
if (GGML_NATIVE)
|
||||||
|
set(INS_ENB OFF)
|
||||||
|
else()
|
||||||
|
set(INS_ENB ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
||||||
|
|
||||||
|
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
||||||
|
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
||||||
|
option(GGML_AVX512 "ggml: enable AVX512" OFF)
|
||||||
|
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
||||||
|
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
||||||
|
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
||||||
|
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
||||||
|
if (NOT MSVC)
|
||||||
|
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
|
||||||
|
endif()
|
||||||
|
option(GGML_LASX "ggml: enable lasx" ON)
|
||||||
|
option(GGML_LSX "ggml: enable lsx" ON)
|
||||||
|
option(GGML_SVE "ggml: enable SVE" OFF)
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ggml core
|
||||||
|
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
|
||||||
|
|
||||||
|
# 3rd party libs / backends
|
||||||
|
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
|
||||||
|
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
||||||
|
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
||||||
|
"ggml: BLAS library vendor")
|
||||||
|
option(GGML_LLAMAFILE "ggml: use ggml SGEMM" OFF)
|
||||||
|
|
||||||
|
option(GGML_CUDA "ggml: use CUDA" OFF)
|
||||||
|
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||||
|
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
|
||||||
|
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
|
||||||
|
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
|
||||||
|
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
|
||||||
|
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
|
||||||
|
"ggml: iters./thread per block for Q2_K/Q6_K")
|
||||||
|
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||||
|
"ggml: max. batch size for using peer access")
|
||||||
|
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
||||||
|
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
||||||
|
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
||||||
|
|
||||||
|
option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
|
||||||
|
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
|
||||||
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
||||||
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
||||||
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
||||||
|
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
||||||
|
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
||||||
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
||||||
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
||||||
|
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
||||||
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
||||||
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
||||||
|
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
||||||
|
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
|
||||||
|
set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
||||||
|
"ggml: metal minimum macOS version")
|
||||||
|
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
||||||
|
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
||||||
|
option(GGML_RPC "ggml: use RPC" OFF)
|
||||||
|
option(GGML_SYCL "ggml: use SYCL" OFF)
|
||||||
|
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
||||||
|
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
||||||
|
"ggml: sycl target device")
|
||||||
|
|
||||||
|
# extra artifacts
|
||||||
|
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
||||||
|
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
||||||
|
|
||||||
|
#
|
||||||
|
# dependencies
|
||||||
|
#
|
||||||
|
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED true)
|
||||||
|
|
||||||
|
if (GGML_SYCL)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
else()
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
endif()
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
||||||
|
|
||||||
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
#
|
||||||
|
# build the library
|
||||||
|
#
|
||||||
|
|
||||||
|
add_subdirectory(src)
|
||||||
|
|
||||||
|
#
|
||||||
|
# tests and examples
|
||||||
|
#
|
||||||
|
|
||||||
|
if (GGML_BUILD_TESTS)
|
||||||
|
enable_testing()
|
||||||
|
add_subdirectory(tests)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (GGML_BUILD_EXAMPLES)
|
||||||
|
add_subdirectory(examples)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
#
|
||||||
|
# install
|
||||||
|
#
|
||||||
|
|
||||||
|
include(GNUInstallDirs)
|
||||||
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
|
set(GGML_PUBLIC_HEADERS
|
||||||
|
include/ggml.h
|
||||||
|
include/ggml-alloc.h
|
||||||
|
include/ggml-backend.h
|
||||||
|
"${GGML_HEADERS_CUDA}"
|
||||||
|
"${GGML_HEADERS_METAL}"
|
||||||
|
"${GGML_HEADERS_EXTRA}")
|
||||||
|
|
||||||
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
||||||
|
#if (GGML_METAL)
|
||||||
|
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
||||||
|
#endif()
|
||||||
|
install(TARGETS ggml PUBLIC_HEADER)
|
||||||
|
|
||||||
|
if (BUILD_SHARED_LIBS)
|
||||||
|
install(TARGETS ggml LIBRARY)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_METAL)
|
||||||
|
install(
|
||||||
|
FILES src/ggml-metal.metal
|
||||||
|
PERMISSIONS
|
||||||
|
OWNER_READ
|
||||||
|
OWNER_WRITE
|
||||||
|
GROUP_READ
|
||||||
|
WORLD_READ
|
||||||
|
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||||
|
|
||||||
|
if (NOT GGML_METAL_EMBED_LIBRARY)
|
||||||
|
install(
|
||||||
|
FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||||
|
DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_STANDALONE)
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
||||||
|
@ONLY)
|
||||||
|
|
||||||
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
||||||
|
DESTINATION share/pkgconfig)
|
||||||
|
endif()
|
100
ggml/cmake/FindSIMD.cmake
Normal file
100
ggml/cmake/FindSIMD.cmake
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
include(CheckCSourceRuns)
|
||||||
|
|
||||||
|
set(AVX_CODE "
|
||||||
|
#include <immintrin.h>
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
__m256 a;
|
||||||
|
a = _mm256_set1_ps(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
")
|
||||||
|
|
||||||
|
set(AVX512_CODE "
|
||||||
|
#include <immintrin.h>
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
__m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
__m512i b = a;
|
||||||
|
__mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
")
|
||||||
|
|
||||||
|
set(AVX2_CODE "
|
||||||
|
#include <immintrin.h>
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
__m256i a = {0};
|
||||||
|
a = _mm256_abs_epi16(a);
|
||||||
|
__m256i x;
|
||||||
|
_mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
")
|
||||||
|
|
||||||
|
set(FMA_CODE "
|
||||||
|
#include <immintrin.h>
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
__m256 acc = _mm256_setzero_ps();
|
||||||
|
const __m256 d = _mm256_setzero_ps();
|
||||||
|
const __m256 p = _mm256_setzero_ps();
|
||||||
|
acc = _mm256_fmadd_ps( d, p, acc );
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
")
|
||||||
|
|
||||||
|
macro(check_sse type flags)
|
||||||
|
set(__FLAG_I 1)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||||
|
foreach (__FLAG ${flags})
|
||||||
|
if (NOT ${type}_FOUND)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS ${__FLAG})
|
||||||
|
check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
|
||||||
|
if (HAS_${type}_${__FLAG_I})
|
||||||
|
set(${type}_FOUND TRUE CACHE BOOL "${type} support")
|
||||||
|
set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
|
||||||
|
endif()
|
||||||
|
math(EXPR __FLAG_I "${__FLAG_I}+1")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
||||||
|
|
||||||
|
if (NOT ${type}_FOUND)
|
||||||
|
set(${type}_FOUND FALSE CACHE BOOL "${type} support")
|
||||||
|
set(${type}_FLAGS "" CACHE STRING "${type} flags")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
mark_as_advanced(${type}_FOUND ${type}_FLAGS)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
# flags are for MSVC only!
|
||||||
|
check_sse("AVX" " ;/arch:AVX")
|
||||||
|
if (NOT ${AVX_FOUND})
|
||||||
|
set(GGML_AVX OFF)
|
||||||
|
else()
|
||||||
|
set(GGML_AVX ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
check_sse("AVX2" " ;/arch:AVX2")
|
||||||
|
check_sse("FMA" " ;/arch:AVX2")
|
||||||
|
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
|
||||||
|
set(GGML_AVX2 OFF)
|
||||||
|
else()
|
||||||
|
set(GGML_AVX2 ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
check_sse("AVX512" " ;/arch:AVX512")
|
||||||
|
if (NOT ${AVX512_FOUND})
|
||||||
|
set(GGML_AVX512 OFF)
|
||||||
|
else()
|
||||||
|
set(GGML_AVX512 ON)
|
||||||
|
endif()
|
220
ggml/ggml_vk_generate_shaders.py
Normal file
220
ggml/ggml_vk_generate_shaders.py
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from tempfile import gettempdir
|
||||||
|
|
||||||
|
logger = logging.getLogger("ggml-vk-generate-shaders")
|
||||||
|
|
||||||
|
GLSLC = "glslc"
|
||||||
|
|
||||||
|
type_names = [
|
||||||
|
"f32",
|
||||||
|
"f16",
|
||||||
|
"q4_0",
|
||||||
|
"q4_1",
|
||||||
|
"q5_0",
|
||||||
|
"q5_1",
|
||||||
|
"q8_0",
|
||||||
|
"q2_k",
|
||||||
|
"q3_k",
|
||||||
|
"q4_k",
|
||||||
|
"q5_k",
|
||||||
|
"q6_k",
|
||||||
|
]
|
||||||
|
|
||||||
|
ASYNCIO_CONCURRENCY = 64
|
||||||
|
|
||||||
|
input_dir = "vulkan-shaders"
|
||||||
|
output_dir = gettempdir()
|
||||||
|
|
||||||
|
lock = asyncio.Lock()
|
||||||
|
shader_fnames = []
|
||||||
|
|
||||||
|
|
||||||
|
async def string_to_spv(name, in_fname, defines, fp16=True):
|
||||||
|
name = f"{name}{'_fp32' if not fp16 else ''}"
|
||||||
|
out_fname = os.path.join(output_dir, f"{name}.spv")
|
||||||
|
|
||||||
|
in_path = os.path.join(input_dir, in_fname)
|
||||||
|
|
||||||
|
cmd = [GLSLC, "-fshader-stage=compute", "--target-env=vulkan1.2", "-O", in_path, "-o", out_fname]
|
||||||
|
|
||||||
|
cmd.extend([f"-D{key}={value}" for key, value in defines.items()])
|
||||||
|
|
||||||
|
proc = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
|
||||||
|
|
||||||
|
stdout, stderr = await proc.communicate()
|
||||||
|
|
||||||
|
stdout = stdout.decode()
|
||||||
|
error = stderr.decode()
|
||||||
|
|
||||||
|
if proc.returncode:
|
||||||
|
cmd = " ".join(cmd)
|
||||||
|
logger.error(f"cannot compile {name}\n\n{cmd}\n\n{error}")
|
||||||
|
return
|
||||||
|
|
||||||
|
async with lock:
|
||||||
|
shader_fnames.append((name, out_fname))
|
||||||
|
|
||||||
|
|
||||||
|
def matmul_shaders(tasks, fp16, matmul_id):
|
||||||
|
if fp16:
|
||||||
|
load_vec = "8"
|
||||||
|
aligned_b_type_f32 = "mat2x4"
|
||||||
|
aligned_b_type_f16 = "f16mat2x4"
|
||||||
|
else:
|
||||||
|
load_vec = "4"
|
||||||
|
aligned_b_type_f32 = "vec4"
|
||||||
|
aligned_b_type_f16 = "f16vec4"
|
||||||
|
|
||||||
|
base_dict = {"FLOAT_TYPE": "float" if not fp16 else "float16_t"}
|
||||||
|
shader_name = "matmul"
|
||||||
|
|
||||||
|
if matmul_id:
|
||||||
|
base_dict["MUL_MAT_ID"] = "1"
|
||||||
|
shader_name = "matmul_id"
|
||||||
|
|
||||||
|
if fp16:
|
||||||
|
base_dict["FLOAT16"] = "1"
|
||||||
|
|
||||||
|
# Shaders with f16 B_TYPE
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f32_f16", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f32_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F32": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f16", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "B_TYPE": "float16_t", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_f16_aligned", "mul_mm.comp", base_dict | {"DATA_A_F16": "1", "LOAD_VEC_A": load_vec, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f16, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
for tname in type_names:
|
||||||
|
data_a_key = f"DATA_A_{tname.upper()}"
|
||||||
|
load_vec_a = load_vec if tname in ("f32", "f16") else "2"
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_{tname}_f32", "mul_mm.comp", base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv(f"{shader_name}_{tname}_f32_aligned", "mul_mm.comp", base_dict | {data_a_key: "2", "LOAD_VEC_A": load_vec_a, "LOAD_VEC_B": load_vec, "B_TYPE": aligned_b_type_f32, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
logger.info("ggml_vulkan: Generating and compiling shaders to SPIR-V")
|
||||||
|
|
||||||
|
tasks = []
|
||||||
|
|
||||||
|
for fp16 in (False, True):
|
||||||
|
# MUL_MAT
|
||||||
|
matmul_shaders(tasks, fp16, False)
|
||||||
|
# MUL_MAT_ID
|
||||||
|
matmul_shaders(tasks, fp16, True)
|
||||||
|
|
||||||
|
for tname in type_names:
|
||||||
|
base_dict = {"FLOAT_TYPE": "float"}
|
||||||
|
|
||||||
|
# mul mat vec
|
||||||
|
data_a_key = f"DATA_A_{tname.upper()}"
|
||||||
|
shader = f"mul_mat_vec_{tname}.comp" if tname.endswith("_k") else "mul_mat_vec.comp"
|
||||||
|
|
||||||
|
tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f32_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv(f"mul_mat_vec_{tname}_f16_f32", shader, base_dict | {data_a_key: "1", "B_TYPE": "float16_t", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv(f"mul_mat_vec_id_{tname}_f32", shader, base_dict | {"MUL_MAT_ID": "1", data_a_key: "1", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
# Dequant shaders
|
||||||
|
if tname != "f16":
|
||||||
|
tasks.append(string_to_spv(f"dequant_{tname}", f"dequant_{tname}.comp", base_dict | {data_a_key: "1", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
|
# get_rows
|
||||||
|
if not tname.endswith("_k"):
|
||||||
|
shader = "get_rows.comp" if tname in ("f32", "f16") else "get_rows_quant.comp"
|
||||||
|
|
||||||
|
if tname == "f16":
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
||||||
|
else:
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{tname}", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float16_t"}))
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{tname}_f32", shader, {data_a_key: "1", "B_TYPE": "int", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", "mul_mat_vec_p021.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", "mul_mat_vec_nc.comp", {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
# Norms
|
||||||
|
tasks.append(string_to_spv("norm_f32", "norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("rms_norm_f32", "rms_norm.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("cpy_f32_f32", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("cpy_f32_f16", "copy.comp", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
|
||||||
|
tasks.append(string_to_spv("cpy_f16_f16", "copy.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("add_f32", "add.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("mul_f32", "mul.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("div_f32", "div.comp", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("scale_f32", "scale.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("sqr_f32", "square.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("clamp_f32", "clamp.comp", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("gelu_f32", "gelu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("silu_f32", "silu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("relu_f32", "relu.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("diag_mask_inf_f32", "diag_mask_inf.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("soft_max_f32", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("soft_max_f32_f16", "soft_max.comp", base_dict | {"A_TYPE": "float", "B_TYPE": "float16_t", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("rope_norm_f32", "rope_norm.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("rope_norm_f16", "rope_norm.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("rope_neox_f32", "rope_neox.comp", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
tasks.append(string_to_spv("rope_neox_f16", "rope_neox.comp", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("argsort_f32", "argsort.comp", {"A_TYPE": "float"}))
|
||||||
|
|
||||||
|
tasks.append(string_to_spv("sum_rows_f32", "sum_rows.comp", base_dict | {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
|
# Helper to decorate tasks with semaphore acquisition.
|
||||||
|
async def withSemaphore(sem, task):
|
||||||
|
async with sem:
|
||||||
|
return await task
|
||||||
|
|
||||||
|
# Run tasks concurrently guarded by a concurrency limit.
|
||||||
|
sem = asyncio.Semaphore(ASYNCIO_CONCURRENCY)
|
||||||
|
await asyncio.gather(*(withSemaphore(sem, task) for task in tasks))
|
||||||
|
|
||||||
|
with open("ggml-vulkan-shaders.hpp", "w") as f:
|
||||||
|
f.write("#include <cstdint>\n\n")
|
||||||
|
for name, path in sorted(shader_fnames):
|
||||||
|
|
||||||
|
with open(path, "rb") as spv:
|
||||||
|
counter = 0
|
||||||
|
newline_counter = 0
|
||||||
|
f.write(f"unsigned char {name}_data[] = {{\n")
|
||||||
|
for val in spv.read():
|
||||||
|
f.write(f"0x{val:02x},")
|
||||||
|
newline_counter += 1
|
||||||
|
counter += 1
|
||||||
|
if newline_counter >= 12:
|
||||||
|
newline_counter = 0
|
||||||
|
f.write("\n")
|
||||||
|
f.write("\n};\n")
|
||||||
|
f.write(f"const uint64_t {name}_len = {counter};\n\n")
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator")
|
||||||
|
|
||||||
|
parser.add_argument("--glslc", help="Path to glslc")
|
||||||
|
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
||||||
|
|
||||||
|
if args.glslc:
|
||||||
|
GLSLC = args.glslc
|
||||||
|
|
||||||
|
asyncio.run(main())
|
@ -8,7 +8,9 @@
|
|||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
#include "ggml-sycl/presets.hpp"
|
|
||||||
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
#define GGML_SYCL_MAX_DEVICES 48
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
@ -312,6 +312,12 @@
|
|||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
|
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
||||||
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
||||||
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
||||||
|
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
||||||
|
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
@ -585,11 +591,7 @@ extern "C" {
|
|||||||
struct ggml_tensor * grad;
|
struct ggml_tensor * grad;
|
||||||
struct ggml_tensor * src[GGML_MAX_SRC];
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
||||||
|
|
||||||
// performance
|
// source tensor and offset for views
|
||||||
int perf_runs;
|
|
||||||
int64_t perf_cycles;
|
|
||||||
int64_t perf_time_us;
|
|
||||||
|
|
||||||
struct ggml_tensor * view_src;
|
struct ggml_tensor * view_src;
|
||||||
size_t view_offs;
|
size_t view_offs;
|
||||||
|
|
||||||
@ -599,7 +601,7 @@ extern "C" {
|
|||||||
|
|
||||||
void * extra; // extra things e.g. for ggml-cuda.cu
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
||||||
|
|
||||||
char padding[8];
|
// char padding[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||||
@ -646,11 +648,6 @@ extern "C" {
|
|||||||
struct ggml_hash_set visited_hash_table;
|
struct ggml_hash_set visited_hash_table;
|
||||||
|
|
||||||
enum ggml_cgraph_eval_order order;
|
enum ggml_cgraph_eval_order order;
|
||||||
|
|
||||||
// performance
|
|
||||||
int perf_runs;
|
|
||||||
int64_t perf_cycles;
|
|
||||||
int64_t perf_time_us;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// scratch buffer
|
// scratch buffer
|
||||||
@ -667,28 +664,6 @@ extern "C" {
|
|||||||
bool no_alloc; // don't allocate memory for the tensor data
|
bool no_alloc; // don't allocate memory for the tensor data
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// compute types
|
|
||||||
|
|
||||||
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
|
||||||
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
|
||||||
enum ggml_task_type {
|
|
||||||
GGML_TASK_TYPE_INIT = 0,
|
|
||||||
GGML_TASK_TYPE_COMPUTE,
|
|
||||||
GGML_TASK_TYPE_FINALIZE,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ggml_compute_params {
|
|
||||||
enum ggml_task_type type;
|
|
||||||
|
|
||||||
// ith = thread index, nth = number of threads
|
|
||||||
int ith, nth;
|
|
||||||
|
|
||||||
// work buffer for all threads
|
|
||||||
size_t wsize;
|
|
||||||
void * wdata;
|
|
||||||
};
|
|
||||||
|
|
||||||
// numa strategies
|
// numa strategies
|
||||||
enum ggml_numa_strategy {
|
enum ggml_numa_strategy {
|
||||||
GGML_NUMA_STRATEGY_DISABLED = 0,
|
GGML_NUMA_STRATEGY_DISABLED = 0,
|
1171
ggml/src/CMakeLists.txt
Normal file
1171
ggml/src/CMakeLists.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -1172,7 +1172,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
|||||||
// check if a backend with higher prio wants to offload the op
|
// check if a backend with higher prio wants to offload the op
|
||||||
if (src_backend_id == sched->n_backends - 1) {
|
if (src_backend_id == sched->n_backends - 1) {
|
||||||
for (int b = 0; b < src_backend_id; b++) {
|
for (int b = 0; b < src_backend_id; b++) {
|
||||||
if (ggml_backend_offload_op(sched->backends[b], tensor)) {
|
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
|
||||||
SET_CAUSE(tensor, "1.off");
|
SET_CAUSE(tensor, "1.off");
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
@ -152,16 +152,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|||||||
GGML_ASSERT(info.device_count <= GGML_CUDA_MAX_DEVICES);
|
GGML_ASSERT(info.device_count <= GGML_CUDA_MAX_DEVICES);
|
||||||
|
|
||||||
int64_t total_vram = 0;
|
int64_t total_vram = 0;
|
||||||
#if defined(GGML_CUDA_FORCE_MMQ)
|
#ifdef GGML_CUDA_FORCE_MMQ
|
||||||
GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: yes\n", __func__);
|
GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: yes\n", __func__);
|
||||||
#else
|
#else
|
||||||
GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: no\n", __func__);
|
GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_MMQ: no\n", __func__);
|
||||||
#endif
|
#endif // GGML_CUDA_FORCE_MMQ
|
||||||
#if defined(CUDA_USE_TENSOR_CORES)
|
#ifdef GGML_CUDA_FORCE_CUBLAS
|
||||||
GGML_CUDA_LOG_INFO("%s: CUDA_USE_TENSOR_CORES: yes\n", __func__);
|
GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: yes\n", __func__);
|
||||||
#else
|
#else
|
||||||
GGML_CUDA_LOG_INFO("%s: CUDA_USE_TENSOR_CORES: no\n", __func__);
|
GGML_CUDA_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
|
||||||
#endif
|
#endif // GGML_CUDA_FORCE_CUBLAS
|
||||||
GGML_CUDA_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
GGML_CUDA_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
|
||||||
for (int id = 0; id < info.device_count; ++id) {
|
for (int id = 0; id < info.device_count; ++id) {
|
||||||
int device_vmm = 0;
|
int device_vmm = 0;
|
||||||
@ -635,7 +635,7 @@ static int64_t get_row_rounding(const std::array<float, GGML_CUDA_MAX_DEVICES> &
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int cc = ggml_cuda_info().devices[id].cc;
|
const int cc = ggml_cuda_info().devices[id].cc;
|
||||||
row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc, get_mmq_x_max_host(cc)));
|
row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc));
|
||||||
}
|
}
|
||||||
return row_rounding;
|
return row_rounding;
|
||||||
}
|
}
|
||||||
@ -1873,9 +1873,17 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co
|
|||||||
static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
const bool split = ggml_backend_buffer_is_cuda_split(src0->buffer);
|
const bool split = ggml_backend_buffer_is_cuda_split(src0->buffer);
|
||||||
|
|
||||||
int64_t min_compute_capability = INT_MAX;
|
bool use_dequantize_mul_mat_vec = (ggml_is_quantized(src0->type) || src0->type == GGML_TYPE_F16)
|
||||||
|
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32
|
||||||
|
&& src0->ne[0] % GGML_CUDA_DMMV_X == 0 && src1->ne[1] == 1;
|
||||||
|
bool use_mul_mat_vec_q = ggml_is_quantized(src0->type)
|
||||||
|
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32
|
||||||
|
&& src1->ne[1] <= MMVQ_MAX_BATCH_SIZE;
|
||||||
|
bool use_mul_mat_q = ggml_is_quantized(src0->type)
|
||||||
|
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32;
|
||||||
|
|
||||||
|
bool any_gpus_with_slow_fp16 = false;
|
||||||
|
|
||||||
bool any_pascal_with_slow_fp16 = false;
|
|
||||||
if (split) {
|
if (split) {
|
||||||
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) src0->buffer->buft->context;
|
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) src0->buffer->buft->context;
|
||||||
auto & tensor_split = buft_ctx->tensor_split;
|
auto & tensor_split = buft_ctx->tensor_split;
|
||||||
@ -1885,55 +1893,18 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_compute_capability > ggml_cuda_info().devices[id].cc) {
|
const int cc = ggml_cuda_info().devices[id].cc;
|
||||||
min_compute_capability = ggml_cuda_info().devices[id].cc;
|
use_mul_mat_vec_q = use_mul_mat_vec_q && cc >= MIN_CC_DP4A;
|
||||||
}
|
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
|
||||||
if (ggml_cuda_info().devices[id].cc == 610) {
|
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
|
||||||
any_pascal_with_slow_fp16 = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
min_compute_capability = ggml_cuda_info().devices[ctx.device].cc;
|
const int cc = ggml_cuda_info().devices[ctx.device].cc;
|
||||||
any_pascal_with_slow_fp16 = ggml_cuda_info().devices[ctx.device].cc == 610;
|
use_mul_mat_vec_q = use_mul_mat_vec_q && cc >= MIN_CC_DP4A;
|
||||||
|
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
|
||||||
|
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check data types and tensor shapes for custom matrix multiplication kernels:
|
|
||||||
bool use_dequantize_mul_mat_vec = (ggml_is_quantized(src0->type) || src0->type == GGML_TYPE_F16)
|
|
||||||
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32
|
|
||||||
&& src0->ne[0] % GGML_CUDA_DMMV_X == 0 && src1->ne[1] == 1;
|
|
||||||
|
|
||||||
bool use_mul_mat_vec_q = ggml_is_quantized(src0->type)
|
|
||||||
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32
|
|
||||||
&& src1->ne[1] <= MMVQ_MAX_BATCH_SIZE;
|
|
||||||
|
|
||||||
bool use_mul_mat_q = ggml_cuda_supports_mmq(src0->type)
|
|
||||||
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32;
|
|
||||||
|
|
||||||
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
|
||||||
|
|
||||||
const bool fp16_performance_good = min_compute_capability >= CC_RDNA1;
|
|
||||||
|
|
||||||
#ifdef CUDA_USE_TENSOR_CORES
|
|
||||||
use_mul_mat_q = use_mul_mat_q && min_compute_capability < CC_RDNA3;
|
|
||||||
#endif // CUDA_USE_TENSOR_CORES
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
// fp16 performance is good on Volta or newer and on P100 (compute capability 6.0)
|
|
||||||
const bool fp16_performance_good = min_compute_capability >= CC_PASCAL && !any_pascal_with_slow_fp16;
|
|
||||||
|
|
||||||
// mmvq and mmq need the __dp4a instruction which on NVIDIA is only available for CC >= 6.1
|
|
||||||
use_mul_mat_vec_q = use_mul_mat_vec_q && min_compute_capability >= MIN_CC_DP4A;
|
|
||||||
use_mul_mat_q = use_mul_mat_q && min_compute_capability >= MIN_CC_DP4A;
|
|
||||||
|
|
||||||
#ifdef CUDA_USE_TENSOR_CORES
|
|
||||||
// when tensor cores are available, use them for large batch size
|
|
||||||
// ref: https://github.com/ggerganov/llama.cpp/pull/3776
|
|
||||||
use_mul_mat_q = use_mul_mat_q && (!fp16_performance_good || src1->ne[1] <= MMQ_MAX_BATCH_SIZE);
|
|
||||||
#endif // CUDA_USE_TENSOR_CORES
|
|
||||||
|
|
||||||
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
|
||||||
|
|
||||||
// if mmvq is available it's a better choice than dmmv:
|
// if mmvq is available it's a better choice than dmmv:
|
||||||
#ifndef GGML_CUDA_FORCE_DMMV
|
#ifndef GGML_CUDA_FORCE_DMMV
|
||||||
use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
|
use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
|
||||||
@ -1947,14 +1918,15 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
|
|||||||
//printf("src0 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
|
//printf("src0 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
|
||||||
//printf("src1 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
|
//printf("src1 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
|
||||||
|
|
||||||
if (!split && !fp16_performance_good && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
|
if (!split && any_gpus_with_slow_fp16 && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
|
||||||
// KQ single-batch
|
// FP32 precision KQ single-batch for batch size 1 without FlashAttention
|
||||||
ggml_cuda_mul_mat_vec_p021(ctx, src0, src1, dst);
|
ggml_cuda_mul_mat_vec_p021(ctx, src0, src1, dst);
|
||||||
} else if (!split && !fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
|
} else if (!split && any_gpus_with_slow_fp16 && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
|
||||||
// KQV single-batch
|
// FP32 precision KQV single-batch for batch size 1 without FlashAttention
|
||||||
ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst);
|
ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst);
|
||||||
} else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || fp16_performance_good) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
|
} else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16)
|
||||||
// KQ + KQV multi-batch
|
&& !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
|
||||||
|
// KQ + KQV multi-batch without FlashAttention
|
||||||
ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst);
|
ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst);
|
||||||
} else if (use_dequantize_mul_mat_vec) {
|
} else if (use_dequantize_mul_mat_vec) {
|
||||||
ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, nullptr);
|
ggml_cuda_op_mul_mat(ctx, src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, nullptr);
|
||||||
@ -2267,6 +2239,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
|||||||
case GGML_OP_SQR:
|
case GGML_OP_SQR:
|
||||||
ggml_cuda_op_sqr(ctx, dst);
|
ggml_cuda_op_sqr(ctx, dst);
|
||||||
break;
|
break;
|
||||||
|
case GGML_OP_SQRT:
|
||||||
|
ggml_cuda_op_sqrt(ctx, dst);
|
||||||
|
break;
|
||||||
case GGML_OP_CLAMP:
|
case GGML_OP_CLAMP:
|
||||||
ggml_cuda_op_clamp(ctx, dst);
|
ggml_cuda_op_clamp(ctx, dst);
|
||||||
break;
|
break;
|
||||||
@ -2830,6 +2805,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
|
|||||||
case GGML_OP_RMS_NORM:
|
case GGML_OP_RMS_NORM:
|
||||||
case GGML_OP_SCALE:
|
case GGML_OP_SCALE:
|
||||||
case GGML_OP_SQR:
|
case GGML_OP_SQR:
|
||||||
|
case GGML_OP_SQRT:
|
||||||
case GGML_OP_CLAMP:
|
case GGML_OP_CLAMP:
|
||||||
case GGML_OP_CONT:
|
case GGML_OP_CONT:
|
||||||
case GGML_OP_DIAG_MASK_INF:
|
case GGML_OP_DIAG_MASK_INF:
|
@ -146,23 +146,6 @@
|
|||||||
#define CC_RDNA2 (CC_OFFSET_AMD + 1030)
|
#define CC_RDNA2 (CC_OFFSET_AMD + 1030)
|
||||||
#define CC_RDNA3 (CC_OFFSET_AMD + 1100)
|
#define CC_RDNA3 (CC_OFFSET_AMD + 1100)
|
||||||
|
|
||||||
// define this if you want to always fallback to MMQ kernels and not use cuBLAS for matrix multiplication
|
|
||||||
// on modern hardware, using cuBLAS is recommended as it utilizes F16 tensor cores which are very performant
|
|
||||||
// for large computational tasks. the drawback is that this requires some extra amount of VRAM:
|
|
||||||
// - 7B quantum model: +100-200 MB
|
|
||||||
// - 13B quantum model: +200-400 MB
|
|
||||||
//
|
|
||||||
//#define GGML_CUDA_FORCE_MMQ
|
|
||||||
|
|
||||||
// TODO: improve this to be correct for more hardware
|
|
||||||
// for example, currently fails for GeForce GTX 1660 which is TURING arch (> VOLTA) but does not have tensor cores
|
|
||||||
#if !defined(GGML_CUDA_FORCE_MMQ)
|
|
||||||
#define CUDA_USE_TENSOR_CORES
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MMVQ_MAX_BATCH_SIZE 8 // max batch size to use MMVQ kernels
|
|
||||||
#define MMQ_MAX_BATCH_SIZE 64 // max batch size to use MMQ kernels when tensor cores are available
|
|
||||||
|
|
||||||
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
@ -343,15 +326,15 @@ static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int
|
|||||||
#define INT8_MMA_AVAILABLE
|
#define INT8_MMA_AVAILABLE
|
||||||
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_TURING
|
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_TURING
|
||||||
|
|
||||||
static bool fast_fp16_available(const int cc) {
|
static constexpr bool fast_fp16_available(const int cc) {
|
||||||
return cc >= CC_PASCAL && cc != 610;
|
return cc >= CC_PASCAL && cc != 610;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool fp16_mma_available(const int cc) {
|
static constexpr bool fp16_mma_available(const int cc) {
|
||||||
return cc < CC_OFFSET_AMD && cc >= CC_VOLTA;
|
return cc < CC_OFFSET_AMD && cc >= CC_VOLTA;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool int8_mma_available(const int cc) {
|
static constexpr bool int8_mma_available(const int cc) {
|
||||||
return cc < CC_OFFSET_AMD && cc >= CC_TURING;
|
return cc < CC_OFFSET_AMD && cc >= CC_TURING;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -643,19 +626,6 @@ struct ggml_cuda_type_traits<GGML_TYPE_IQ3_S> {
|
|||||||
static constexpr int qi = QI3_S;
|
static constexpr int qi = QI3_S;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int get_mmq_x_max_host(const int cc) {
|
|
||||||
#ifdef CUDA_USE_TENSOR_CORES
|
|
||||||
return cc >= CC_VOLTA && cc < CC_OFFSET_AMD ? MMQ_MAX_BATCH_SIZE : 64;
|
|
||||||
#else
|
|
||||||
return cc >= CC_VOLTA && cc < CC_OFFSET_AMD ? 128 : 64;
|
|
||||||
#endif // CUDA_USE_TENSOR_CORES
|
|
||||||
}
|
|
||||||
|
|
||||||
// Round rows to this value for --split-mode row:
|
|
||||||
static int get_mmq_y_host(const int cc, const int mmq_x) {
|
|
||||||
return cc >= CC_VOLTA && mmq_x >= 32 ? 128 : 64;
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
|
||||||
struct ggml_cuda_device_info {
|
struct ggml_cuda_device_info {
|
@ -603,7 +603,7 @@ static void on_no_fattn_vec_case(const int D) {
|
|||||||
if (D == 64) {
|
if (D == 64) {
|
||||||
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
||||||
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
||||||
fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
} else if (D == 128) {
|
} else if (D == 128) {
|
||||||
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
||||||
@ -611,7 +611,7 @@ static void on_no_fattn_vec_case(const int D) {
|
|||||||
fprintf(stderr, " - K == q4_0, V == q4_0, 4.50 BPV\n");
|
fprintf(stderr, " - K == q4_0, V == q4_0, 4.50 BPV\n");
|
||||||
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
||||||
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
||||||
fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
@ -20,6 +20,20 @@ struct mma_int_A_I16K4 {
|
|||||||
GGML_CUDA_ASSUME(ret < K);
|
GGML_CUDA_ASSUME(ret < K);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) {
|
||||||
|
#if defined(INT8_MMA_AVAILABLE)
|
||||||
|
const int * xs = xs0 + (threadIdx.x%I)*stride;
|
||||||
|
asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];"
|
||||||
|
: "+r"(x[0]), "+r"(x[1])
|
||||||
|
: "l"(xs));
|
||||||
|
#else
|
||||||
|
#pragma unroll
|
||||||
|
for (int l = 0; l < ne; ++l) {
|
||||||
|
x[l] = xs0[get_i(l)*stride + get_k(l)];
|
||||||
|
}
|
||||||
|
#endif // defined(INT8_MMA_AVAILABLE)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mma_int_A_I16K8 {
|
struct mma_int_A_I16K8 {
|
||||||
@ -42,6 +56,20 @@ struct mma_int_A_I16K8 {
|
|||||||
GGML_CUDA_ASSUME(ret < K);
|
GGML_CUDA_ASSUME(ret < K);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) {
|
||||||
|
#if defined(INT8_MMA_AVAILABLE)
|
||||||
|
const int * xs = xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2);
|
||||||
|
asm("ldmatrix.sync.aligned.m8n8.x4.b16 {%0, %1, %2, %3}, [%4];"
|
||||||
|
: "+r"(x[0]), "+r"(x[1]), "+r"(x[2]), "+r"(x[3])
|
||||||
|
: "l"(xs));
|
||||||
|
#else
|
||||||
|
#pragma unroll
|
||||||
|
for (int l = 0; l < ne; ++l) {
|
||||||
|
x[l] = xs0[get_i(l)*stride + get_k(l)];
|
||||||
|
}
|
||||||
|
#endif // defined(INT8_MMA_AVAILABLE)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mma_int_B_J8K4 {
|
struct mma_int_B_J8K4 {
|
||||||
@ -64,6 +92,20 @@ struct mma_int_B_J8K4 {
|
|||||||
GGML_CUDA_ASSUME(ret < K);
|
GGML_CUDA_ASSUME(ret < K);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) {
|
||||||
|
#if defined(INT8_MMA_AVAILABLE) && false // Loading as 4 byte values is faster
|
||||||
|
const int * xs = xs0 + (threadIdx.x%J)*stride;
|
||||||
|
asm("ldmatrix.sync.aligned.m8n8.x1.b16 {%0}, [%1];"
|
||||||
|
: "+r"(x[0])
|
||||||
|
: "l"(xs));
|
||||||
|
#else
|
||||||
|
#pragma unroll
|
||||||
|
for (int l = 0; l < ne; ++l) {
|
||||||
|
x[l] = xs0[get_j(l)*stride + get_k(l)];
|
||||||
|
}
|
||||||
|
#endif // defined(INT8_MMA_AVAILABLE)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mma_int_B_J8K8 {
|
struct mma_int_B_J8K8 {
|
||||||
@ -86,6 +128,20 @@ struct mma_int_B_J8K8 {
|
|||||||
GGML_CUDA_ASSUME(ret < K);
|
GGML_CUDA_ASSUME(ret < K);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) {
|
||||||
|
#if defined(INT8_MMA_AVAILABLE) && false // Loading as 4 byte values is faster
|
||||||
|
const int * xs = xs0 + (threadIdx.x%J)*stride + ((threadIdx.x/J)*(K/2)) % K;
|
||||||
|
asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];"
|
||||||
|
: "+r"(x[0]), "+r"(x[1])
|
||||||
|
: "l"(xs));
|
||||||
|
#else
|
||||||
|
#pragma unroll
|
||||||
|
for (int l = 0; l < ne; ++l) {
|
||||||
|
x[l] = xs0[get_j(l)*stride + get_k(l)];
|
||||||
|
}
|
||||||
|
#endif // defined(INT8_MMA_AVAILABLE)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mma_int_C_I16J8 {
|
struct mma_int_C_I16J8 {
|
@ -30,34 +30,34 @@ void ggml_cuda_op_mul_mat_q(
|
|||||||
|
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
mul_mat_q_case<GGML_TYPE_Q4_0>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q4_1:
|
case GGML_TYPE_Q4_1:
|
||||||
mul_mat_q_case<GGML_TYPE_Q4_1>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q5_0:
|
case GGML_TYPE_Q5_0:
|
||||||
mul_mat_q_case<GGML_TYPE_Q5_0>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q5_0>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q5_1:
|
case GGML_TYPE_Q5_1:
|
||||||
mul_mat_q_case<GGML_TYPE_Q5_1>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q5_1>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q8_0:
|
case GGML_TYPE_Q8_0:
|
||||||
mul_mat_q_case<GGML_TYPE_Q8_0>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q8_0>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q2_K:
|
case GGML_TYPE_Q2_K:
|
||||||
mul_mat_q_case<GGML_TYPE_Q2_K>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q2_K>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q3_K:
|
case GGML_TYPE_Q3_K:
|
||||||
mul_mat_q_case<GGML_TYPE_Q3_K>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q3_K>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q4_K:
|
case GGML_TYPE_Q4_K:
|
||||||
mul_mat_q_case<GGML_TYPE_Q4_K>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q4_K>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q5_K:
|
case GGML_TYPE_Q5_K:
|
||||||
mul_mat_q_case<GGML_TYPE_Q5_K>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q5_K>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
mul_mat_q_case<GGML_TYPE_Q6_K>(args, stream);
|
mul_mat_q_case<GGML_TYPE_Q6_K>(ctx, args, stream);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
@ -69,7 +69,13 @@ void ggml_cuda_op_mul_mat_q(
|
|||||||
GGML_UNUSED(src1_ddf_i);
|
GGML_UNUSED(src1_ddf_i);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_cuda_supports_mmq(enum ggml_type type) {
|
bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
|
||||||
|
#ifdef GGML_CUDA_FORCE_CUBLAS
|
||||||
|
return false;
|
||||||
|
#endif // GGML_CUDA_FORCE_CUBLAS
|
||||||
|
|
||||||
|
bool mmq_supported;
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
case GGML_TYPE_Q4_1:
|
case GGML_TYPE_Q4_1:
|
||||||
@ -81,8 +87,32 @@ bool ggml_cuda_supports_mmq(enum ggml_type type) {
|
|||||||
case GGML_TYPE_Q4_K:
|
case GGML_TYPE_Q4_K:
|
||||||
case GGML_TYPE_Q5_K:
|
case GGML_TYPE_Q5_K:
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
return true;
|
mmq_supported = true;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
mmq_supported = false;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!mmq_supported) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (int8_mma_available(cc)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cc < MIN_CC_DP4A) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef GGML_CUDA_FORCE_MMQ
|
||||||
|
return true;
|
||||||
|
#endif //GGML_CUDA_FORCE_MMQ
|
||||||
|
|
||||||
|
if (cc < CC_OFFSET_AMD) {
|
||||||
|
return cc < CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cc < CC_RDNA3 || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
|
||||||
}
|
}
|
2610
ggml/src/ggml-cuda/mmq.cuh
Normal file
2610
ggml/src/ggml-cuda/mmq.cuh
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,7 @@
|
|||||||
#include "common.cuh"
|
#include "common.cuh"
|
||||||
|
|
||||||
|
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
|
||||||
|
|
||||||
void ggml_cuda_op_mul_mat_vec_q(
|
void ggml_cuda_op_mul_mat_vec_q(
|
||||||
ggml_backend_cuda_context & ctx,
|
ggml_backend_cuda_context & ctx,
|
||||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user