mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-19 20:57:52 +00:00
refactoring : move main + stream in examples + other stuff
This commit is contained in:
parent
4c68f4cac0
commit
c6710efde2
20
.gitignore
vendored
20
.gitignore
vendored
@ -1,17 +1,21 @@
|
||||
sync.sh
|
||||
main
|
||||
stream
|
||||
*.o
|
||||
.cache
|
||||
.cache/
|
||||
.vs/
|
||||
.vscode/
|
||||
.DS_Store
|
||||
|
||||
build/
|
||||
build-em/
|
||||
build-debug/
|
||||
build-release/
|
||||
out/
|
||||
.vs/
|
||||
.vscode/
|
||||
build-sanitize-addr/
|
||||
build-sanitize-thread/
|
||||
|
||||
main
|
||||
stream
|
||||
bench
|
||||
sync.sh
|
||||
compile_commands.json
|
||||
.DS_Store
|
||||
|
||||
examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
|
||||
examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
|
||||
|
@ -133,7 +133,9 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#
|
||||
# whisper - this is the main library of the project
|
||||
#
|
||||
|
||||
set(TARGET whisper)
|
||||
|
||||
@ -167,40 +169,17 @@ install(TARGETS ${TARGET}
|
||||
ARCHIVE DESTINATION lib/static
|
||||
)
|
||||
|
||||
#
|
||||
# bindings
|
||||
#
|
||||
|
||||
add_subdirectory(bindings)
|
||||
|
||||
#
|
||||
# programs, examples and tests
|
||||
#
|
||||
|
||||
if (WHISPER_STANDALONE)
|
||||
if (NOT EMSCRIPTEN)
|
||||
# TODO: move to examples
|
||||
# main
|
||||
set(TARGET main)
|
||||
add_executable(${TARGET} main.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
# TODO: move to examples
|
||||
if (WHISPER_SUPPORT_SDL2)
|
||||
if (WHISPER_SUPPORT_SDL2)
|
||||
# SDL2
|
||||
find_package(SDL2 REQUIRED)
|
||||
|
||||
string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
|
||||
|
||||
message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
|
||||
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
|
||||
endif()
|
||||
|
||||
# stream
|
||||
set(TARGET stream)
|
||||
add_executable(${TARGET} stream.cpp)
|
||||
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (WHISPER_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
|
22
Makefile
22
Makefile
@ -19,13 +19,10 @@ endif
|
||||
# Compile flags
|
||||
#
|
||||
|
||||
CFLAGS = -O3 -std=c11
|
||||
CXXFLAGS = -O3 -std=c++11
|
||||
CFLAGS = -I. -O3 -std=c11
|
||||
CXXFLAGS = -I. -I./examples -O3 -std=c++11
|
||||
LDFLAGS =
|
||||
|
||||
CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
|
||||
CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
|
||||
|
||||
# OS specific
|
||||
# TODO: support Windows
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
@ -76,8 +73,8 @@ endif
|
||||
# Build library + main
|
||||
#
|
||||
|
||||
main: main.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main $(LDFLAGS)
|
||||
main: examples/main/main.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp whisper.o ggml.o -o main $(LDFLAGS)
|
||||
./main -h
|
||||
|
||||
ggml.o: ggml.c ggml.h
|
||||
@ -90,7 +87,7 @@ libwhisper.a: ggml.o whisper.o
|
||||
ar rcs libwhisper.a ggml.o whisper.o
|
||||
|
||||
clean:
|
||||
rm -f *.o main stream libwhisper.a
|
||||
rm -f *.o main stream bench libwhisper.a
|
||||
|
||||
#
|
||||
# Examples
|
||||
@ -98,8 +95,11 @@ clean:
|
||||
|
||||
CC_SDL=`sdl2-config --cflags --libs`
|
||||
|
||||
stream: stream.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
|
||||
stream: examples/stream/stream.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
bench: examples/bench/bench.cpp ggml.o whisper.o
|
||||
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
|
||||
|
||||
#
|
||||
# Audio samples
|
||||
@ -139,7 +139,7 @@ samples:
|
||||
.PHONY: large
|
||||
|
||||
tiny.en tiny base.en base small.en small medium.en medium large: main
|
||||
bash ./download-ggml-model.sh $@
|
||||
bash ./models/download-ggml-model.sh $@
|
||||
@echo ""
|
||||
@echo "==============================================="
|
||||
@echo "Running $@ on all samples in ./samples ..."
|
||||
|
32
README.md
32
README.md
@ -24,23 +24,32 @@ Supported platforms:
|
||||
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/issues/7)
|
||||
- [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30)
|
||||
|
||||
The entire implementation of the model is contained in 2 source files:
|
||||
|
||||
- [ggml.h](ggml.h) / [ggml.c](ggml.c)
|
||||
- [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
|
||||
|
||||
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
||||
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device:
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
|
||||
|
||||
## Usage
|
||||
## Quick start
|
||||
|
||||
To build the main program, run `make`. You can then transcribe a `.wav` file like this:
|
||||
First, download one of the Whisper models converted in [ggml format](models). For example:
|
||||
|
||||
```bash
|
||||
./main -f input.wav
|
||||
bash ./models/download-ggml-model.sh base.en
|
||||
```
|
||||
|
||||
Before running the program, make sure to download one of the ggml Whisper models. For example:
|
||||
Now build the [main](examples/main) example and transcribe an audio file like this:
|
||||
|
||||
```bash
|
||||
bash ./download-ggml-model.sh base.en
|
||||
# build the main example
|
||||
make
|
||||
|
||||
# transcribe an audio file
|
||||
./main -f input.wav
|
||||
```
|
||||
|
||||
---
|
||||
@ -73,7 +82,7 @@ options:
|
||||
-m FNAME, --model FNAME model path (default: models/ggml-base.en.bin)
|
||||
-f FNAME, --file FNAME input WAV file path
|
||||
|
||||
bash ./download-ggml-model.sh base.en
|
||||
bash ./models/download-ggml-model.sh base.en
|
||||
Downloading ggml model base.en ...
|
||||
models/ggml-base.en.bin 100%[=============================================>] 141.11M 3.13MB/s in 79s
|
||||
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
|
||||
@ -232,7 +241,7 @@ whisper_print_timings: total time = 33686.27 ms
|
||||
## Real-time audio input example
|
||||
|
||||
This is a naive example of performing real-time inference on audio from your microphone.
|
||||
The `stream` tool samples the audio every half a second and runs the transcription continously.
|
||||
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
|
||||
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
||||
|
||||
```java
|
||||
@ -241,7 +250,7 @@ More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/i
|
||||
|
||||
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
|
||||
|
||||
The `stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
|
||||
The [stream](examples/stream) tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
|
||||
|
||||
```bash
|
||||
# Install SDL2 on Linux
|
||||
@ -264,8 +273,9 @@ to highlight words with high or low confidence:
|
||||
|
||||
- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
|
||||
- The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
|
||||
- Simple usage is demonstrated in [main.cpp](main.cpp)
|
||||
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](stream.cpp)
|
||||
- Sample usage is demonstrated in [main.cpp](examples/main)
|
||||
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
|
||||
- Various other examples are available in the [examples](examples) folder
|
||||
|
||||
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
|
||||
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
|
||||
@ -306,7 +316,7 @@ The original models are converted to a custom binary format. This allows to pack
|
||||
- vocabulary
|
||||
- weights
|
||||
|
||||
You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script or from here:
|
||||
You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script or from here:
|
||||
|
||||
https://ggml.ggerganov.com
|
||||
|
||||
|
@ -4,11 +4,24 @@ find_package(Threads REQUIRED)
|
||||
|
||||
# third-party
|
||||
|
||||
#add_subdirectory(third-party)
|
||||
if (WHISPER_SUPPORT_SDL2)
|
||||
# SDL2
|
||||
find_package(SDL2 REQUIRED)
|
||||
|
||||
string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
|
||||
|
||||
message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
|
||||
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
|
||||
endif()
|
||||
|
||||
# examples
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
add_subdirectory(whisper.wasm)
|
||||
else()
|
||||
add_subdirectory(main)
|
||||
add_subdirectory(stream)
|
||||
add_subdirectory(bench)
|
||||
endif()
|
||||
|
3
examples/bench/CMakeLists.txt
Normal file
3
examples/bench/CMakeLists.txt
Normal file
@ -0,0 +1,3 @@
|
||||
set(TARGET bench)
|
||||
add_executable(${TARGET} bench.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
|
3
examples/bench/README.md
Normal file
3
examples/bench/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# bench
|
||||
|
||||
TODO
|
78
examples/bench/bench.cpp
Normal file
78
examples/bench/bench.cpp
Normal file
@ -0,0 +1,78 @@
|
||||
#include "whisper.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
// command-line parameters
|
||||
struct whisper_params {
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
};
|
||||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||
|
||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "-t" || arg == "--threads") {
|
||||
params.n_threads = std::stoi(argv[++i]);
|
||||
} else if (arg == "-m" || arg == "--model") {
|
||||
params.model = argv[++i];
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
} else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help show this help message and exit\n");
|
||||
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
||||
fprintf(stderr, " -m FNAME, --model FNAME model path (default: %s)\n", params.model.c_str());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
whisper_params params;
|
||||
|
||||
if (whisper_params_parse(argc, argv, params) == false) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// whisper init
|
||||
|
||||
struct whisper_context * ctx = whisper_init(params.model.c_str());
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
|
||||
fprintf(stderr, "error: failed to set mel: %d\n", ret);
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode model: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
whisper_print_timings(ctx);
|
||||
whisper_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
3
examples/main/CMakeLists.txt
Normal file
3
examples/main/CMakeLists.txt
Normal file
@ -0,0 +1,3 @@
|
||||
set(TARGET main)
|
||||
add_executable(${TARGET} main.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
|
0
examples/main/README.md
Normal file
0
examples/main/README.md
Normal file
@ -290,6 +290,11 @@ int main(int argc, char ** argv) {
|
||||
|
||||
struct whisper_context * ctx = whisper_init(params.model.c_str());
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
||||
const auto fname_inp = params.fname_inp[f];
|
||||
|
||||
@ -300,22 +305,22 @@ int main(int argc, char ** argv) {
|
||||
if (!drwav_init_file(&wav, fname_inp.c_str(), NULL)) {
|
||||
fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], fname_inp.c_str());
|
||||
whisper_print_usage(argc, argv, {});
|
||||
return 3;
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (wav.channels != 1 && wav.channels != 2) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str());
|
||||
return 4;
|
||||
return 5;
|
||||
}
|
||||
|
||||
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
|
||||
return 5;
|
||||
return 6;
|
||||
}
|
||||
|
||||
if (wav.bitsPerSample != 16) {
|
||||
fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str());
|
||||
return 6;
|
||||
return 7;
|
||||
}
|
||||
|
||||
int n = wav.totalPCMFrameCount;
|
||||
@ -379,7 +384,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
||||
fprintf(stderr, "%s: failed to process audio\n", argv[0]);
|
||||
return 7;
|
||||
return 8;
|
||||
}
|
||||
|
||||
printf("\n");
|
7
examples/stream/CMakeLists.txt
Normal file
7
examples/stream/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
if (WHISPER_SUPPORT_SDL2)
|
||||
# stream
|
||||
set(TARGET stream)
|
||||
add_executable(${TARGET} stream.cpp)
|
||||
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif ()
|
0
examples/stream/README.md
Normal file
0
examples/stream/README.md
Normal file
31
ggml.c
31
ggml.c
@ -15,10 +15,39 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined _MSC_VER
|
||||
#include "msvc_thread_atomic.h"
|
||||
#include <Windows.h>
|
||||
|
||||
typedef volatile LONG atomic_int;
|
||||
typedef atomic_int atomic_bool;
|
||||
|
||||
static void atomic_store(atomic_int* ptr, LONG val) {
|
||||
InterlockedExchange(ptr, val);
|
||||
}
|
||||
static LONG atomic_load(atomic_int* ptr) {
|
||||
return InterlockedCompareExchange(ptr, 0, 0);
|
||||
}
|
||||
static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
|
||||
return InterlockedExchangeAdd(ptr, inc);
|
||||
}
|
||||
static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
|
||||
return atomic_fetch_add(ptr, -(dec));
|
||||
}
|
||||
|
||||
typedef HANDLE pthread_t;
|
||||
|
||||
typedef DWORD thread_ret_t;
|
||||
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
|
||||
out = CreateThread(NULL, 0, func, arg, 0, NULL);
|
||||
return out != NULL;
|
||||
}
|
||||
|
||||
static int pthread_join(pthread_t thread, void* unused) {
|
||||
return (int) WaitForSingleObject(thread, INFINITE);
|
||||
}
|
||||
#else
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
|
||||
typedef void* thread_ret_t;
|
||||
#endif
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
# This script downloads Whisper model files that have already been converted to ggml format.
|
||||
# This way you don't have to convert them yourself.
|
||||
|
||||
ggml_path=$(dirname $(realpath $0))
|
||||
models_path=$(dirname $(realpath $0))
|
||||
|
||||
# Whisper models
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
|
||||
@ -38,14 +38,14 @@ fi
|
||||
|
||||
printf "Downloading ggml model $model ...\n"
|
||||
|
||||
mkdir -p models
|
||||
cd $models_path
|
||||
|
||||
if [ -f "models/ggml-$model.bin" ]; then
|
||||
if [ -f "ggml-$model.bin" ]; then
|
||||
printf "Model $model already exists. Skipping download.\n"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
|
||||
wget --quiet --show-progress -O ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
printf "Failed to download ggml model $model \n"
|
@ -1,31 +0,0 @@
|
||||
#pragma once
|
||||
#include <Windows.h>
|
||||
|
||||
typedef volatile LONG atomic_int;
|
||||
typedef atomic_int atomic_bool;
|
||||
|
||||
static void atomic_store(atomic_int* ptr, LONG val) {
|
||||
InterlockedExchange(ptr, val);
|
||||
}
|
||||
static LONG atomic_load(atomic_int* ptr) {
|
||||
return InterlockedCompareExchange(ptr, 0, 0);
|
||||
}
|
||||
static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
|
||||
return InterlockedExchangeAdd(ptr, inc);
|
||||
}
|
||||
static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
|
||||
return atomic_fetch_add(ptr, -(dec));
|
||||
}
|
||||
|
||||
typedef HANDLE pthread_t;
|
||||
|
||||
typedef DWORD thread_ret_t;
|
||||
static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
|
||||
out = CreateThread(NULL, 0, func, arg, 0, NULL);
|
||||
return out != NULL;
|
||||
}
|
||||
|
||||
static int pthread_join(pthread_t thread, void* unused) {
|
||||
return (int) WaitForSingleObject(thread, INFINITE);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user