refactoring : move main + stream in examples + other stuff

This commit is contained in:
Georgi Gerganov 2022-10-25 19:13:08 +03:00
parent 4c68f4cac0
commit c6710efde2
18 changed files with 205 additions and 102 deletions

20
.gitignore vendored
View File

@ -1,17 +1,21 @@
sync.sh
main
stream
*.o *.o
.cache .cache/
.vs/
.vscode/
.DS_Store
build/ build/
build-em/ build-em/
build-debug/ build-debug/
build-release/ build-release/
out/ build-sanitize-addr/
.vs/ build-sanitize-thread/
.vscode/
main
stream
bench
sync.sh
compile_commands.json compile_commands.json
.DS_Store
examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/

View File

@ -48,7 +48,7 @@ option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" O
if (NOT MSVC) if (NOT MSVC)
if (WHISPER_SANITIZE_THREAD) if (WHISPER_SANITIZE_THREAD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
endif() endif()
@ -133,7 +133,9 @@ else()
endif() endif()
endif() endif()
#
# whisper - this is the main library of the project # whisper - this is the main library of the project
#
set(TARGET whisper) set(TARGET whisper)
@ -167,40 +169,17 @@ install(TARGETS ${TARGET}
ARCHIVE DESTINATION lib/static ARCHIVE DESTINATION lib/static
) )
#
# bindings # bindings
#
add_subdirectory(bindings) add_subdirectory(bindings)
#
# programs, examples and tests # programs, examples and tests
#
if (WHISPER_STANDALONE) if (WHISPER_STANDALONE)
if (NOT EMSCRIPTEN)
# TODO: move to examples
# main
set(TARGET main)
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
# TODO: move to examples
if (WHISPER_SUPPORT_SDL2)
if (WHISPER_SUPPORT_SDL2)
# SDL2
find_package(SDL2 REQUIRED)
string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()
# stream
set(TARGET stream)
add_executable(${TARGET} stream.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endif ()
endif()
if (WHISPER_BUILD_TESTS) if (WHISPER_BUILD_TESTS)
enable_testing() enable_testing()
add_subdirectory(tests) add_subdirectory(tests)

View File

@ -19,13 +19,10 @@ endif
# Compile flags # Compile flags
# #
CFLAGS = -O3 -std=c11 CFLAGS = -I. -O3 -std=c11
CXXFLAGS = -O3 -std=c++11 CXXFLAGS = -I. -I./examples -O3 -std=c++11
LDFLAGS = LDFLAGS =
CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
# OS specific # OS specific
# TODO: support Windows # TODO: support Windows
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)
@ -76,8 +73,8 @@ endif
# Build library + main # Build library + main
# #
main: main.cpp ggml.o whisper.o main: examples/main/main.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/main/main.cpp whisper.o ggml.o -o main $(LDFLAGS)
./main -h ./main -h
ggml.o: ggml.c ggml.h ggml.o: ggml.c ggml.h
@ -90,7 +87,7 @@ libwhisper.a: ggml.o whisper.o
ar rcs libwhisper.a ggml.o whisper.o ar rcs libwhisper.a ggml.o whisper.o
clean: clean:
rm -f *.o main stream libwhisper.a rm -f *.o main stream bench libwhisper.a
# #
# Examples # Examples
@ -98,8 +95,11 @@ clean:
CC_SDL=`sdl2-config --cflags --libs` CC_SDL=`sdl2-config --cflags --libs`
stream: stream.cpp ggml.o whisper.o stream: examples/stream/stream.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS) $(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
bench: examples/bench/bench.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
# #
# Audio samples # Audio samples
@ -139,7 +139,7 @@ samples:
.PHONY: large .PHONY: large
tiny.en tiny base.en base small.en small medium.en medium large: main tiny.en tiny base.en base small.en small medium.en medium large: main
bash ./download-ggml-model.sh $@ bash ./models/download-ggml-model.sh $@
@echo "" @echo ""
@echo "===============================================" @echo "==============================================="
@echo "Running $@ on all samples in ./samples ..." @echo "Running $@ on all samples in ./samples ..."

View File

@ -24,23 +24,32 @@ Supported platforms:
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/issues/7) - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/issues/7)
- [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30) - [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30)
The entire implementation of the model is contained in 2 source files:
- [ggml.h](ggml.h) / [ggml.c](ggml.c)
- [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications. Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device:
https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4 https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
## Usage ## Quick start
To build the main program, run `make`. You can then transcribe a `.wav` file like this: First, download one of the Whisper models converted in [ggml format](models). For example:
```bash ```bash
./main -f input.wav bash ./models/download-ggml-model.sh base.en
``` ```
Before running the program, make sure to download one of the ggml Whisper models. For example: Now build the [main](examples/main) example and transcribe an audio file like this:
```bash ```bash
bash ./download-ggml-model.sh base.en # build the main example
make
# transcribe an audio file
./main -f input.wav
``` ```
--- ---
@ -73,7 +82,7 @@ options:
-m FNAME, --model FNAME model path (default: models/ggml-base.en.bin) -m FNAME, --model FNAME model path (default: models/ggml-base.en.bin)
-f FNAME, --file FNAME input WAV file path -f FNAME, --file FNAME input WAV file path
bash ./download-ggml-model.sh base.en bash ./models/download-ggml-model.sh base.en
Downloading ggml model base.en ... Downloading ggml model base.en ...
models/ggml-base.en.bin 100%[=============================================>] 141.11M 3.13MB/s in 79s models/ggml-base.en.bin 100%[=============================================>] 141.11M 3.13MB/s in 79s
Done! Model 'base.en' saved in 'models/ggml-base.en.bin' Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
@ -232,7 +241,7 @@ whisper_print_timings: total time = 33686.27 ms
## Real-time audio input example ## Real-time audio input example
This is a naive example of performing real-time inference on audio from your microphone. This is a naive example of performing real-time inference on audio from your microphone.
The `stream` tool samples the audio every half a second and runs the transcription continously. The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10). More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
```java ```java
@ -241,7 +250,7 @@ More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/i
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4 https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
The `stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this: The [stream](examples/stream) tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
```bash ```bash
# Install SDL2 on Linux # Install SDL2 on Linux
@ -264,8 +273,9 @@ to highlight words with high or low confidence:
- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c)) - The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
- The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)) - The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
- Simple usage is demonstrated in [main.cpp](main.cpp) - Sample usage is demonstrated in [main.cpp](examples/main)
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](stream.cpp) - Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
- Various other examples are available in the [examples](examples) folder
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
@ -279,11 +289,11 @@ the Accelerate framework utilizes the special-purpose AMX coprocessor available
This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274) This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
to run the python code with the following parameters: to run the python code with the following parameters:
``` ```
whisper --best_of None --beam_size None ... whisper --best_of None --beam_size None ...
``` ```
In the future, `whisper.cpp` will support more sampling strategies. In the future, `whisper.cpp` will support more sampling strategies.
## Memory usage ## Memory usage
@ -306,7 +316,7 @@ The original models are converted to a custom binary format. This allows to pack
- vocabulary - vocabulary
- weights - weights
You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script or from here: You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script or from here:
https://ggml.ggerganov.com https://ggml.ggerganov.com

View File

@ -4,11 +4,24 @@ find_package(Threads REQUIRED)
# third-party # third-party
#add_subdirectory(third-party) if (WHISPER_SUPPORT_SDL2)
# SDL2
find_package(SDL2 REQUIRED)
string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()
# examples # examples
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
if (EMSCRIPTEN) if (EMSCRIPTEN)
add_subdirectory(whisper.wasm) add_subdirectory(whisper.wasm)
else() else()
add_subdirectory(main)
add_subdirectory(stream)
add_subdirectory(bench)
endif() endif()

View File

@ -0,0 +1,3 @@
set(TARGET bench)
add_executable(${TARGET} bench.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})

3
examples/bench/README.md Normal file
View File

@ -0,0 +1,3 @@
# bench
TODO

78
examples/bench/bench.cpp Normal file
View File

@ -0,0 +1,78 @@
#include "whisper.h"
#include <cstdio>
#include <string>
#include <thread>
// command-line parameters
struct whisper_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
std::string model = "models/ggml-base.en.bin";
};
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-m" || arg == "--model") {
params.model = argv[++i];
} else if (arg == "-h" || arg == "--help") {
whisper_print_usage(argc, argv, params);
exit(0);
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params);
exit(0);
}
}
return true;
}
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME model path (default: %s)\n", params.model.c_str());
fprintf(stderr, "\n");
}
int main(int argc, char ** argv) {
whisper_params params;
if (whisper_params_parse(argc, argv, params) == false) {
return 1;
}
// whisper init
struct whisper_context * ctx = whisper_init(params.model.c_str());
if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n");
return 2;
}
if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
fprintf(stderr, "error: failed to set mel: %d\n", ret);
return 3;
}
if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
fprintf(stderr, "error: failed to encode model: %d\n", ret);
return 4;
}
whisper_print_timings(ctx);
whisper_free(ctx);
return 0;
}

View File

@ -0,0 +1,3 @@
set(TARGET main)
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})

0
examples/main/README.md Normal file
View File

View File

@ -290,6 +290,11 @@ int main(int argc, char ** argv) {
struct whisper_context * ctx = whisper_init(params.model.c_str()); struct whisper_context * ctx = whisper_init(params.model.c_str());
if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n");
return 3;
}
for (int f = 0; f < (int) params.fname_inp.size(); ++f) { for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
const auto fname_inp = params.fname_inp[f]; const auto fname_inp = params.fname_inp[f];
@ -300,22 +305,22 @@ int main(int argc, char ** argv) {
if (!drwav_init_file(&wav, fname_inp.c_str(), NULL)) { if (!drwav_init_file(&wav, fname_inp.c_str(), NULL)) {
fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], fname_inp.c_str()); fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], fname_inp.c_str());
whisper_print_usage(argc, argv, {}); whisper_print_usage(argc, argv, {});
return 3; return 4;
} }
if (wav.channels != 1 && wav.channels != 2) { if (wav.channels != 1 && wav.channels != 2) {
fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str()); fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str());
return 4; return 5;
} }
if (wav.sampleRate != WHISPER_SAMPLE_RATE) { if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str()); fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
return 5; return 6;
} }
if (wav.bitsPerSample != 16) { if (wav.bitsPerSample != 16) {
fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str()); fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str());
return 6; return 7;
} }
int n = wav.totalPCMFrameCount; int n = wav.totalPCMFrameCount;
@ -379,7 +384,7 @@ int main(int argc, char ** argv) {
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) { if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
fprintf(stderr, "%s: failed to process audio\n", argv[0]); fprintf(stderr, "%s: failed to process audio\n", argv[0]);
return 7; return 8;
} }
printf("\n"); printf("\n");

View File

@ -0,0 +1,7 @@
if (WHISPER_SUPPORT_SDL2)
# stream
set(TARGET stream)
add_executable(${TARGET} stream.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endif ()

View File

31
ggml.c
View File

@ -15,10 +15,39 @@
#include <stdio.h> #include <stdio.h>
#if defined _MSC_VER #if defined _MSC_VER
#include "msvc_thread_atomic.h" #include <Windows.h>
typedef volatile LONG atomic_int;
typedef atomic_int atomic_bool;
static void atomic_store(atomic_int* ptr, LONG val) {
InterlockedExchange(ptr, val);
}
static LONG atomic_load(atomic_int* ptr) {
return InterlockedCompareExchange(ptr, 0, 0);
}
static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
return InterlockedExchangeAdd(ptr, inc);
}
static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
return atomic_fetch_add(ptr, -(dec));
}
typedef HANDLE pthread_t;
typedef DWORD thread_ret_t;
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
out = CreateThread(NULL, 0, func, arg, 0, NULL);
return out != NULL;
}
static int pthread_join(pthread_t thread, void* unused) {
return (int) WaitForSingleObject(thread, INFINITE);
}
#else #else
#include <pthread.h> #include <pthread.h>
#include <stdatomic.h> #include <stdatomic.h>
typedef void* thread_ret_t; typedef void* thread_ret_t;
#endif #endif

View File

@ -3,7 +3,7 @@
# This script downloads Whisper model files that have already been converted to ggml format. # This script downloads Whisper model files that have already been converted to ggml format.
# This way you don't have to convert them yourself. # This way you don't have to convert them yourself.
ggml_path=$(dirname $(realpath $0)) models_path=$(dirname $(realpath $0))
# Whisper models # Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" ) models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
@ -38,14 +38,14 @@ fi
printf "Downloading ggml model $model ...\n" printf "Downloading ggml model $model ...\n"
mkdir -p models cd $models_path
if [ -f "models/ggml-$model.bin" ]; then if [ -f "ggml-$model.bin" ]; then
printf "Model $model already exists. Skipping download.\n" printf "Model $model already exists. Skipping download.\n"
exit 0 exit 0
fi fi
wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin wget --quiet --show-progress -O ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
printf "Failed to download ggml model $model \n" printf "Failed to download ggml model $model \n"

View File

@ -1,31 +0,0 @@
#pragma once
#include <Windows.h>
typedef volatile LONG atomic_int;
typedef atomic_int atomic_bool;
static void atomic_store(atomic_int* ptr, LONG val) {
InterlockedExchange(ptr, val);
}
static LONG atomic_load(atomic_int* ptr) {
return InterlockedCompareExchange(ptr, 0, 0);
}
static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
return InterlockedExchangeAdd(ptr, inc);
}
static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
return atomic_fetch_add(ptr, -(dec));
}
typedef HANDLE pthread_t;
typedef DWORD thread_ret_t;
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
out = CreateThread(NULL, 0, func, arg, 0, NULL);
return out != NULL;
}
static int pthread_join(pthread_t thread, void* unused) {
return (int) WaitForSingleObject(thread, INFINITE);
}