mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-18 12:26:22 +00:00
whisper : add SYCL support (#1863)
* add changes from llama upstream * add sycl abstraction * add sycl build * update cmake * add sycl build config * fix bug * fix bug * refactor build * fix bug * update build * call build * use sycl header * add examples * add target * fix typecast in quant.c * readd fp16 and readme * fix quant typecast * add sample * add readme * remove cxx file check
This commit is contained in:
parent
a2506909b1
commit
a0ddd8392c
100
.github/workflows/build.yml
vendored
100
.github/workflows/build.yml
vendored
@ -150,6 +150,106 @@ jobs:
|
|||||||
make
|
make
|
||||||
ctest -L gh --output-on-failure'
|
ctest -L gh --output-on-failure'
|
||||||
|
|
||||||
|
ubuntu-22-cmake-sycl:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
dwhisper_sycl: [ON]
|
||||||
|
dcmake_c_compiler: [icx]
|
||||||
|
dcmake_cxx_compiler: [icpx]
|
||||||
|
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||||
|
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: add oneAPI to apt
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd /tmp
|
||||||
|
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||||
|
|
||||||
|
- name: install oneAPI dpcpp compiler
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
||||||
|
|
||||||
|
- name: install oneAPI MKL library
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt install intel-oneapi-mkl-devel
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||||
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
|
ubuntu-22-cmake-sycl-fp16:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
dwhisper_sycl: [ON]
|
||||||
|
dcmake_c_compiler: [icx]
|
||||||
|
dcmake_cxx_compiler: [icpx]
|
||||||
|
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||||
|
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: add oneAPI to apt
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd /tmp
|
||||||
|
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||||
|
|
||||||
|
- name: install oneAPI dpcpp compiler
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
||||||
|
|
||||||
|
- name: install oneAPI MKL library
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt install intel-oneapi-mkl-devel
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -DWHISPER_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||||
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
windows:
|
windows:
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
|
|
||||||
|
@ -70,12 +70,14 @@ if (APPLE)
|
|||||||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||||
option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
|
option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
|
||||||
else()
|
else()
|
||||||
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
|
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
|
||||||
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
|
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
|
||||||
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
|
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
|
||||||
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
|
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
|
||||||
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
|
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
|
||||||
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
|
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
|
||||||
|
option(WHISPER_SYCL "whisper: use SYCL" OFF)
|
||||||
|
option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
||||||
@ -106,6 +108,13 @@ endif()
|
|||||||
|
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
#compile flag sycl
|
||||||
|
if (WHISPER_SYCL)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
else()
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
endif()
|
||||||
|
|
||||||
# on APPLE
|
# on APPLE
|
||||||
if (APPLE)
|
if (APPLE)
|
||||||
# include Accelerate framework
|
# include Accelerate framework
|
||||||
@ -309,6 +318,30 @@ if( WHISPER_OPENVINO )
|
|||||||
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
|
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WHISPER_SYCL)
|
||||||
|
if ( NOT DEFINED ENV{ONEAPI_ROOT})
|
||||||
|
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
|
||||||
|
endif()
|
||||||
|
#todo: AOT
|
||||||
|
|
||||||
|
find_package(IntelSYCL REQUIRED)
|
||||||
|
if (WHISPER_SYCL_F16)
|
||||||
|
add_compile_definitions(GGML_SYCL_F16)
|
||||||
|
endif()
|
||||||
|
add_compile_definitions(GGML_USE_SYCL)
|
||||||
|
|
||||||
|
add_compile_options(-I./) #include DPCT
|
||||||
|
add_compile_options(-I/${SYCL_INCLUDE_DIR})
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
||||||
|
|
||||||
|
set(GGML_HEADERS_SYCL ggml-sycl.h)
|
||||||
|
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
|
||||||
|
|
||||||
|
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
||||||
|
endif()
|
||||||
# compiler flags
|
# compiler flags
|
||||||
|
|
||||||
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||||
@ -503,6 +536,8 @@ add_library(${TARGET}
|
|||||||
${GGML_SOURCES_METAL}
|
${GGML_SOURCES_METAL}
|
||||||
${GGML_SOURCES_CUDA}
|
${GGML_SOURCES_CUDA}
|
||||||
${GGML_SOURCES_OPENCL}
|
${GGML_SOURCES_OPENCL}
|
||||||
|
${GGML_SOURCES_SYCL}
|
||||||
|
${GGML_HEADERS_SYCL}
|
||||||
whisper.h
|
whisper.h
|
||||||
whisper.cpp
|
whisper.cpp
|
||||||
)
|
)
|
||||||
|
249
README_sycl.md
Normal file
249
README_sycl.md
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
# whisper.cpp for SYCL
|
||||||
|
|
||||||
|
[Background](#background)
|
||||||
|
|
||||||
|
[OS](#os)
|
||||||
|
|
||||||
|
[Intel GPU](#intel-gpu)
|
||||||
|
|
||||||
|
[Linux](#linux)
|
||||||
|
|
||||||
|
[Environment Variable](#environment-variable)
|
||||||
|
|
||||||
|
[Known Issue](#known-issue)
|
||||||
|
|
||||||
|
[Todo](#todo)
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
||||||
|
|
||||||
|
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
||||||
|
|
||||||
|
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
||||||
|
|
||||||
|
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
||||||
|
|
||||||
|
The whisper.cpp for SYCL is used to support Intel GPUs.
|
||||||
|
|
||||||
|
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
|
||||||
|
|
||||||
|
## OS
|
||||||
|
|
||||||
|
|OS|Status|Verified|
|
||||||
|
|-|-|-|
|
||||||
|
|Linux|Support|Ubuntu 22.04|
|
||||||
|
|Windows|Ongoing| |
|
||||||
|
|
||||||
|
|
||||||
|
## Intel GPU
|
||||||
|
|
||||||
|
|Intel GPU| Status | Verified Model|
|
||||||
|
|-|-|-|
|
||||||
|
|Intel Data Center Max Series| Support| Max 1550|
|
||||||
|
|Intel Data Center Flex Series| Support| Flex 170|
|
||||||
|
|Intel Arc Series| Support| Arc 770|
|
||||||
|
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
||||||
|
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
||||||
|
|
||||||
|
|
||||||
|
## Linux
|
||||||
|
|
||||||
|
### Setup Environment
|
||||||
|
|
||||||
|
1. Install Intel GPU driver.
|
||||||
|
|
||||||
|
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
||||||
|
|
||||||
|
Note: for iGPU, please install the client GPU driver.
|
||||||
|
|
||||||
|
b. Add user to group: video, render.
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo usermod -aG render username
|
||||||
|
sudo usermod -aG video username
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: re-login to enable it.
|
||||||
|
|
||||||
|
c. Check
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo apt install clinfo
|
||||||
|
sudo clinfo -l
|
||||||
|
```
|
||||||
|
|
||||||
|
Output (example):
|
||||||
|
|
||||||
|
```
|
||||||
|
Platform #0: Intel(R) OpenCL Graphics
|
||||||
|
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
||||||
|
|
||||||
|
|
||||||
|
Platform #0: Intel(R) OpenCL HD Graphics
|
||||||
|
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Install Intel® oneAPI Base toolkit.
|
||||||
|
|
||||||
|
|
||||||
|
a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
||||||
|
|
||||||
|
Recommend to install to default folder: **/opt/intel/oneapi**.
|
||||||
|
|
||||||
|
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
||||||
|
|
||||||
|
b. Check
|
||||||
|
|
||||||
|
```
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
sycl-ls
|
||||||
|
```
|
||||||
|
|
||||||
|
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
||||||
|
|
||||||
|
Output (example):
|
||||||
|
```
|
||||||
|
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
||||||
|
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
||||||
|
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
||||||
|
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Build locally:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
#for FP16
|
||||||
|
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
|
||||||
|
|
||||||
|
#for FP32
|
||||||
|
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
#build example/main only
|
||||||
|
#cmake --build . --config Release --target main
|
||||||
|
|
||||||
|
#build all binary
|
||||||
|
cmake --build . --config Release -v
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```
|
||||||
|
./examples/sycl/build.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
||||||
|
|
||||||
|
### Run
|
||||||
|
|
||||||
|
1. Put model file to folder **models**
|
||||||
|
|
||||||
|
2. Enable oneAPI running environment
|
||||||
|
|
||||||
|
```
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
3. List device ID
|
||||||
|
|
||||||
|
Run without parameter:
|
||||||
|
|
||||||
|
```
|
||||||
|
./build/bin/ls-sycl-device
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
./build/bin/main
|
||||||
|
```
|
||||||
|
|
||||||
|
Check the ID in startup log, like:
|
||||||
|
|
||||||
|
```
|
||||||
|
found 4 SYCL devices:
|
||||||
|
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||||
|
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||||
|
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||||
|
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||||
|
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||||
|
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||||
|
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||||
|
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|Attribute|Note|
|
||||||
|
|-|-|
|
||||||
|
|compute capability 1.3|Level-zero running time, recommended |
|
||||||
|
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
||||||
|
|
||||||
|
4. Set device ID and execute whisper.cpp
|
||||||
|
|
||||||
|
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
||||||
|
|
||||||
|
```
|
||||||
|
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||||
|
```
|
||||||
|
or run by script:
|
||||||
|
|
||||||
|
```
|
||||||
|
./examples/sycl/run_whisper.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
5. Check the device ID in output
|
||||||
|
|
||||||
|
Like:
|
||||||
|
```
|
||||||
|
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Environment Variable
|
||||||
|
|
||||||
|
#### Build
|
||||||
|
|
||||||
|
|Name|Value|Function|
|
||||||
|
|-|-|-|
|
||||||
|
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
|
||||||
|
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
|
||||||
|
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
||||||
|
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
|
||||||
|
|
||||||
|
#### Running
|
||||||
|
|
||||||
|
|
||||||
|
|Name|Value|Function|
|
||||||
|
|-|-|-|
|
||||||
|
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
||||||
|
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
||||||
|
|
||||||
|
## Known Issue
|
||||||
|
|
||||||
|
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
||||||
|
|
||||||
|
Miss to enable oneAPI running environment.
|
||||||
|
|
||||||
|
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
||||||
|
|
||||||
|
|
||||||
|
- Hang during startup
|
||||||
|
|
||||||
|
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
||||||
|
|
||||||
|
Solution: add **--no-mmap**.
|
||||||
|
|
||||||
|
## Todo
|
||||||
|
|
||||||
|
- Support to build in Windows.
|
||||||
|
|
||||||
|
- Support multiple cards.
|
@ -79,6 +79,9 @@ else()
|
|||||||
add_subdirectory(talk)
|
add_subdirectory(talk)
|
||||||
add_subdirectory(talk-llama)
|
add_subdirectory(talk-llama)
|
||||||
add_subdirectory(lsp)
|
add_subdirectory(lsp)
|
||||||
|
if (LLAMA_SYCL)
|
||||||
|
add_subdirectory(sycl)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(wchess)
|
add_subdirectory(wchess)
|
||||||
|
9
examples/sycl/CMakeLists.txt
Normal file
9
examples/sycl/CMakeLists.txt
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# MIT license
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
set(TARGET ls-sycl-device)
|
||||||
|
add_executable(${TARGET} ls-sycl-device.cpp)
|
||||||
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
47
examples/sycl/README.md
Normal file
47
examples/sycl/README.md
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# llama.cpp/example/sycl
|
||||||
|
|
||||||
|
This example program provide the tools for llama.cpp for SYCL on Intel GPU.
|
||||||
|
|
||||||
|
## Tool
|
||||||
|
|
||||||
|
|Tool Name| Function|Status|
|
||||||
|
|-|-|-|
|
||||||
|
|ls-sycl-device| List all SYCL devices with ID, compute capability, max work group size, ect.|Support|
|
||||||
|
|
||||||
|
### ls-sycl-device
|
||||||
|
|
||||||
|
List all SYCL devices with ID, compute capability, max work group size, ect.
|
||||||
|
|
||||||
|
1. Build the llama.cpp for SYCL for all targets.
|
||||||
|
|
||||||
|
2. Enable oneAPI running environment
|
||||||
|
|
||||||
|
```
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Execute
|
||||||
|
|
||||||
|
```
|
||||||
|
./build/bin/ls-sycl-device
|
||||||
|
```
|
||||||
|
|
||||||
|
Check the ID in startup log, like:
|
||||||
|
|
||||||
|
```
|
||||||
|
found 4 SYCL devices:
|
||||||
|
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||||
|
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||||
|
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||||
|
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||||
|
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||||
|
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||||
|
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||||
|
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|Attribute|Note|
|
||||||
|
|-|-|
|
||||||
|
|compute capability 1.3|Level-zero running time, recommended |
|
||||||
|
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
19
examples/sycl/build.sh
Normal file
19
examples/sycl/build.sh
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# MIT license
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
#for FP16
|
||||||
|
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON # faster for long-prompt inference
|
||||||
|
|
||||||
|
#for FP32
|
||||||
|
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
#build example/main only
|
||||||
|
#cmake --build . --config Release --target main
|
||||||
|
|
||||||
|
#build all binary
|
||||||
|
cmake --build . --config Release -v
|
11
examples/sycl/ls-sycl-device.cpp
Normal file
11
examples/sycl/ls-sycl-device.cpp
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
/*MIT license
|
||||||
|
Copyright (C) 2024 Intel Corporation
|
||||||
|
SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ggml-sycl.h"
|
||||||
|
|
||||||
|
int main(int argc, char ** argv) {
|
||||||
|
ggml_backend_sycl_print_sycl_devices();
|
||||||
|
return 0;
|
||||||
|
}
|
17
examples/sycl/run-whisper.sh
Normal file
17
examples/sycl/run-whisper.sh
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# MIT license
|
||||||
|
# Copyright (C) 2024 Intel Corporation
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
if [ $# -gt 0 ]; then
|
||||||
|
export GGML_SYCL_DEVICE=$1
|
||||||
|
else
|
||||||
|
export GGML_SYCL_DEVICE=0
|
||||||
|
fi
|
||||||
|
echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE
|
||||||
|
#export GGML_SYCL_DEBUG=1
|
||||||
|
./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
14
whisper.cpp
14
whisper.cpp
@ -12,6 +12,10 @@
|
|||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
#include "ggml-sycl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef WHISPER_USE_OPENVINO
|
#ifdef WHISPER_USE_OPENVINO
|
||||||
#include "openvino/whisper-openvino-encoder.h"
|
#include "openvino/whisper-openvino-encoder.h"
|
||||||
#endif
|
#endif
|
||||||
@ -1052,6 +1056,16 @@ static ggml_backend_t whisper_backend_init(const whisper_context_params & params
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
if (params.use_gpu) {
|
||||||
|
WHISPER_LOG_INFO("%s: using SYCL backend\n", __func__);
|
||||||
|
backend_gpu = ggml_backend_sycl_init(params.gpu_device);
|
||||||
|
if (!backend_gpu) {
|
||||||
|
WHISPER_LOG_ERROR("%s: ggml_backend_sycl_init() failed\n", __func__);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (backend_gpu) {
|
if (backend_gpu) {
|
||||||
return backend_gpu;
|
return backend_gpu;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user