Compare commits
51 Commits
ggml-backe
...
gg/chess
Author | SHA1 | Date | |
---|---|---|---|
15c4fdce45 | |||
70741ba794 | |||
bb723282cc | |||
dc5513a709 | |||
ffc244845b | |||
8962a6bd67 | |||
d313034b9c | |||
8b0b0acff3 | |||
02ade14f67 | |||
8dba8204eb | |||
4260d4fc70 | |||
ee65df7982 | |||
03f254193b | |||
8f2d8eae10 | |||
a44b21bce0 | |||
f07ff2aa6a | |||
280e631bcf | |||
2f86da0d09 | |||
a787f7f85c | |||
c83a38e89d | |||
758c951729 | |||
eff3570f78 | |||
fa19bc4195 | |||
a01b2e0971 | |||
8159a9ab99 | |||
7516d9c16d | |||
46cc26d1b9 | |||
f784f9fa12 | |||
ca23f8ee6d | |||
e2f0eba2d4 | |||
d4353e48f7 | |||
bebf0da983 | |||
848e54f3ad | |||
7883d1cae4 | |||
ccc85b4ff8 | |||
c7606b47df | |||
d38af151a1 | |||
94267df08e | |||
8713c67133 | |||
57a60639bb | |||
bfbaa4dce5 | |||
1d79e78402 | |||
b6c5f49b78 | |||
d4231649e6 | |||
3e5c7feeff | |||
c23598e4ca | |||
54a08bde29 | |||
9f8bbd3fee | |||
3172006a24 | |||
684bc8bd70 | |||
b0502836b8 |
33
.github/workflows/build.yml
vendored
@ -320,6 +320,13 @@ jobs:
|
||||
cd ./build
|
||||
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
||||
|
||||
- name: Copy CUDA DLLs
|
||||
run: >
|
||||
Copy-Item -PassThru
|
||||
-Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
|
||||
-Include cudart64_*,cublas64_*,cublasLt64_*
|
||||
-Destination build/bin/${{ matrix.build }}
|
||||
|
||||
- name: Copy SDL2.dll
|
||||
if: matrix.sdl2 == 'ON'
|
||||
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
||||
@ -396,6 +403,32 @@ jobs:
|
||||
cd examples/whisper.android
|
||||
./gradlew assembleRelease --no-daemon
|
||||
|
||||
android_java:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: set up JDK 11
|
||||
uses: actions/setup-java@v3
|
||||
with:
|
||||
java-version: '11'
|
||||
distribution: 'temurin'
|
||||
cache: gradle
|
||||
|
||||
- name: Setup Android SDK
|
||||
uses: android-actions/setup-android@v2
|
||||
with:
|
||||
api-level: 30
|
||||
build-tools-version: 30.0.3
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
cd examples/whisper.android.java
|
||||
chmod +x ./gradlew
|
||||
./gradlew assembleRelease
|
||||
|
||||
java:
|
||||
needs: [ 'windows' ]
|
||||
runs-on: windows-latest
|
||||
|
5
.gitignore
vendored
@ -31,6 +31,7 @@ build-sanitize-thread/
|
||||
/talk-llama
|
||||
/bench
|
||||
/quantize
|
||||
/server
|
||||
/lsp
|
||||
|
||||
arm_neon.h
|
||||
@ -54,3 +55,7 @@ bindings/java/.idea/
|
||||
.idea/
|
||||
|
||||
benchmark_results.csv
|
||||
cmake-build-debug/
|
||||
.cxx/
|
||||
.gradle/
|
||||
local.properties
|
@ -1,6 +1,6 @@
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
|
||||
project(whisper.cpp VERSION 1.4.3)
|
||||
project(whisper.cpp VERSION 1.5.0)
|
||||
|
||||
# Add path to modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||
|
15
Makefile
@ -1,4 +1,4 @@
|
||||
default: main bench quantize
|
||||
default: main bench quantize server
|
||||
|
||||
ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
@ -338,7 +338,7 @@ libwhisper.so: $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so $(WHISPER_OBJ) $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f *.o main stream command talk talk-llama bench quantize lsp libwhisper.a libwhisper.so
|
||||
rm -f *.o main stream command talk talk-llama bench quantize server lsp libwhisper.a libwhisper.so
|
||||
|
||||
#
|
||||
# Examples
|
||||
@ -359,11 +359,14 @@ bench: examples/bench/bench.cpp $(WHISPER_OBJ)
|
||||
quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
|
||||
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
|
||||
|
||||
server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
|
||||
|
||||
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
|
||||
command: examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp examples/grammar-parser.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o command $(CC_SDL) $(LDFLAGS)
|
||||
|
||||
lsp: examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
|
||||
$(CXX) $(CXXFLAGS) examples/lsp/lsp.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o lsp $(CC_SDL) $(LDFLAGS)
|
||||
@ -418,9 +421,9 @@ samples:
|
||||
.PHONY: medium
|
||||
.PHONY: large-v1
|
||||
.PHONY: large-v2
|
||||
.PHONY: large
|
||||
.PHONY: large-v3
|
||||
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main
|
||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
|
||||
bash ./models/download-ggml-model.sh $@
|
||||
@echo ""
|
||||
@echo "==============================================="
|
||||
|
34
README.md
@ -6,7 +6,7 @@
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||
|
||||
Beta: [v1.4.3](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.4.3) / Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
Stable: [v1.5.0](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||
|
||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||
|
||||
@ -16,12 +16,10 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
|
||||
- VSX intrinsics support for POWER architectures
|
||||
- Mixed F16 / F32 precision
|
||||
- [4-bit and 5-bit integer quantization support](https://github.com/ggerganov/whisper.cpp#quantization)
|
||||
- Low memory usage (Flash Attention)
|
||||
- Zero memory allocations at runtime
|
||||
- Support for CPU-only inference
|
||||
- [Partial GPU support for NVIDIA via cuBLAS](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
|
||||
- [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
|
||||
- [Partial OpenCL GPU support via CLBlast](https://github.com/ggerganov/whisper.cpp#opencl-gpu-support-via-clblast)
|
||||
- [BLAS CPU support via OpenBLAS](https://github.com/ggerganov/whisper.cpp#blas-cpu-support-via-openblas)
|
||||
- [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
|
||||
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
|
||||
|
||||
@ -36,10 +34,8 @@ Supported platforms:
|
||||
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
||||
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
||||
|
||||
The entire implementation of the model is contained in 2 source files:
|
||||
|
||||
- Tensor operations: [ggml.h](ggml.h) / [ggml.c](ggml.c)
|
||||
- Transformer inference: [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
|
||||
The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp).
|
||||
The rest of the code is part of the [ggml](https://github.com/ggerganov/ggml) machine learning library.
|
||||
|
||||
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
||||
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
|
||||
@ -235,18 +231,18 @@ make medium.en
|
||||
make medium
|
||||
make large-v1
|
||||
make large-v2
|
||||
make large
|
||||
make large-v3
|
||||
```
|
||||
|
||||
## Memory usage
|
||||
|
||||
| Model | Disk | Mem | SHA |
|
||||
| --- | --- | --- | --- |
|
||||
| tiny | 75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
|
||||
| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
||||
| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
||||
| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
||||
| large | 2.9 GB | ~3.3 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
|
||||
| Model | Disk | Mem |
|
||||
| --- | --- | --- |
|
||||
| tiny | 75 MiB | ~273 MB |
|
||||
| base | 142 MiB | ~388 MB |
|
||||
| small | 466 MiB | ~852 MB |
|
||||
| medium | 1.5 GiB | ~2.1 GB |
|
||||
| large | 2.9 GiB | ~3.9 GB |
|
||||
|
||||
## Quantization
|
||||
|
||||
@ -400,12 +396,12 @@ This can result in significant speedup in encoder performance. Here are the inst
|
||||
|
||||
The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
|
||||
cached for the next run.
|
||||
|
||||
|
||||
For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
|
||||
|
||||
## NVIDIA GPU support via cuBLAS
|
||||
## NVIDIA GPU support
|
||||
|
||||
With NVIDIA cards the Encoder processing can to a large extent be offloaded to the GPU through cuBLAS.
|
||||
With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
|
||||
First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
|
||||
|
||||
Now build `whisper.cpp` with cuBLAS support:
|
||||
|
@ -24,7 +24,7 @@ const (
|
||||
|
||||
var (
|
||||
// The models which will be downloaded, if no model is specified as an argument
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"}
|
||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -9,6 +9,7 @@ archivesBaseName = 'whispercpp'
|
||||
group = 'io.github.ggerganov'
|
||||
version = '1.4.0'
|
||||
|
||||
|
||||
sourceCompatibility = 1.8
|
||||
targetCompatibility = 1.8
|
||||
|
||||
|
@ -2,6 +2,7 @@ package io.github.ggerganov.whispercpp;
|
||||
|
||||
import com.sun.jna.Native;
|
||||
import com.sun.jna.Pointer;
|
||||
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||
@ -9,6 +10,8 @@ import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer.
|
||||
@ -160,6 +163,28 @@ public class WhisperCpp implements AutoCloseable {
|
||||
|
||||
return str.toString().trim();
|
||||
}
|
||||
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||
if (ctx == null) {
|
||||
throw new IllegalStateException("Model not initialised");
|
||||
}
|
||||
|
||||
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||
throw new IOException("Failed to process audio");
|
||||
}
|
||||
|
||||
int nSegments = lib.whisper_full_n_segments(ctx);
|
||||
List<WhisperSegment> segments= new ArrayList<>(nSegments);
|
||||
|
||||
|
||||
for (int i = 0; i < nSegments; i++) {
|
||||
long t0 = lib.whisper_full_get_segment_t0(ctx, i);
|
||||
String text = lib.whisper_full_get_segment_text(ctx, i);
|
||||
long t1 = lib.whisper_full_get_segment_t1(ctx, i);
|
||||
segments.add(new WhisperSegment(t0,t1,text));
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
// public int getTextSegmentCount(Pointer ctx) {
|
||||
// return lib.whisper_full_n_segments(ctx);
|
||||
|
@ -0,0 +1,47 @@
|
||||
package io.github.ggerganov.whispercpp.bean;
|
||||
|
||||
/**
|
||||
* Created by litonglinux@qq.com on 10/21/2023_7:48 AM
|
||||
*/
|
||||
public class WhisperSegment {
|
||||
private long start, end;
|
||||
private String sentence;
|
||||
|
||||
public WhisperSegment() {
|
||||
}
|
||||
|
||||
public WhisperSegment(long start, long end, String sentence) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
public long getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public long getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public String getSentence() {
|
||||
return sentence;
|
||||
}
|
||||
|
||||
public void setStart(long start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
public void setEnd(long end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
public void setSentence(String sentence) {
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" + start + " --> " + end + "]:" + sentence;
|
||||
}
|
||||
}
|
@ -58,6 +58,9 @@ public class WhisperFullParams extends Structure {
|
||||
no_context = enable ? CBool.FALSE : CBool.TRUE;
|
||||
}
|
||||
|
||||
/** Generate timestamps or not? */
|
||||
public CBool no_timestamps;
|
||||
|
||||
/** Flag to force single segment output (useful for streaming). (default = false) */
|
||||
public CBool single_segment;
|
||||
|
||||
@ -304,10 +307,16 @@ public class WhisperFullParams extends Structure {
|
||||
logits_filter_callback = CallbackReference.getFunctionPointer(callback);
|
||||
}
|
||||
|
||||
/** Grammar stuff */
|
||||
public Pointer grammar_rules;
|
||||
public long n_grammar_rules;
|
||||
public long i_start_rule;
|
||||
public float grammar_penalty;
|
||||
|
||||
@Override
|
||||
protected List<String> getFieldOrder() {
|
||||
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
|
||||
"no_context", "single_segment",
|
||||
"no_context", "single_segment", "no_timestamps",
|
||||
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
|
||||
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "speed_up", "audio_ctx",
|
||||
"tdrz_enable", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
||||
@ -316,6 +325,7 @@ public class WhisperFullParams extends Structure {
|
||||
"new_segment_callback", "new_segment_callback_user_data",
|
||||
"progress_callback", "progress_callback_user_data",
|
||||
"encoder_begin_callback", "encoder_begin_callback_user_data",
|
||||
"logits_filter_callback", "logits_filter_callback_user_data");
|
||||
"logits_filter_callback", "logits_filter_callback_user_data",
|
||||
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package io.github.ggerganov.whispercpp;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
||||
import io.github.ggerganov.whispercpp.params.CBool;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||
@ -11,6 +12,7 @@ import javax.sound.sampled.AudioInputStream;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
|
||||
class WhisperCppTest {
|
||||
private static WhisperCpp whisper = new WhisperCpp();
|
||||
@ -20,11 +22,12 @@ class WhisperCppTest {
|
||||
static void init() throws FileNotFoundException {
|
||||
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
||||
// or you can provide the absolute path to the model file.
|
||||
//String modelName = "../../models/ggml-tiny.bin";
|
||||
String modelName = "../../models/ggml-tiny.en.bin";
|
||||
try {
|
||||
whisper.initContext(modelName);
|
||||
// whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
// whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
//whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
//whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
modelInitialised = true;
|
||||
} catch (FileNotFoundException ex) {
|
||||
System.out.println("Model " + modelName + " not found");
|
||||
@ -42,7 +45,7 @@ class WhisperCppTest {
|
||||
assertEquals(16384, params.n_max_text_ctx);
|
||||
assertFalse(params.translate);
|
||||
assertEquals(0.01f, params.thold_pt);
|
||||
assertEquals(2, params.beam_search.beam_size);
|
||||
assertEquals(5, params.beam_search.beam_size);
|
||||
assertEquals(-1.0f, params.beam_search.patience);
|
||||
}
|
||||
|
||||
@ -55,7 +58,7 @@ class WhisperCppTest {
|
||||
assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY.ordinal(), params.strategy);
|
||||
assertNotEquals(0, params.n_threads);
|
||||
assertEquals(16384, params.n_max_text_ctx);
|
||||
assertEquals(2, params.greedy.best_of);
|
||||
assertEquals(5, params.greedy.best_of);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -72,11 +75,11 @@ class WhisperCppTest {
|
||||
byte[] b = new byte[audioInputStream.available()];
|
||||
float[] floats = new float[b.length / 2];
|
||||
|
||||
// WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||
params.print_progress = CBool.FALSE;
|
||||
// params.initial_prompt = "and so my fellow Americans um, like";
|
||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||
|
||||
|
||||
try {
|
||||
@ -99,4 +102,43 @@ class WhisperCppTest {
|
||||
audioInputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFullTranscribeWithTime() throws Exception {
|
||||
if (!modelInitialised) {
|
||||
System.out.println("Model not initialised, skipping test");
|
||||
return;
|
||||
}
|
||||
|
||||
// Given
|
||||
File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
|
||||
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
|
||||
|
||||
byte[] b = new byte[audioInputStream.available()];
|
||||
float[] floats = new float[b.length / 2];
|
||||
|
||||
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||
params.print_progress = CBool.FALSE;
|
||||
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||
|
||||
try {
|
||||
audioInputStream.read(b);
|
||||
|
||||
for (int i = 0, j = 0; i < b.length; i += 2, j++) {
|
||||
int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
|
||||
floats[j] = intSample / 32767.0f;
|
||||
}
|
||||
|
||||
List<WhisperSegment> segments = whisper.fullTranscribeWithTime(params, floats);
|
||||
assertTrue(segments.size() > 0, "The size of segments should be greater than 0");
|
||||
for (WhisperSegment segment : segments) {
|
||||
System.out.println(segment);
|
||||
}
|
||||
} finally {
|
||||
audioInputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "whisper.cpp",
|
||||
"version": "1.4.3",
|
||||
"version": "1.5.0",
|
||||
"description": "Whisper speech recognition",
|
||||
"main": "whisper.js",
|
||||
"scripts": {
|
||||
|
@ -23,6 +23,7 @@ add_library(${TARGET} STATIC
|
||||
common.cpp
|
||||
common-ggml.h
|
||||
common-ggml.cpp
|
||||
grammar-parser.cpp
|
||||
)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
@ -64,6 +65,7 @@ elseif(CMAKE_JS_VERSION)
|
||||
else()
|
||||
add_subdirectory(main)
|
||||
add_subdirectory(stream)
|
||||
add_subdirectory(server)
|
||||
add_subdirectory(command)
|
||||
add_subdirectory(bench)
|
||||
add_subdirectory(quantize)
|
||||
@ -71,3 +73,5 @@ else()
|
||||
add_subdirectory(talk-llama)
|
||||
add_subdirectory(lsp)
|
||||
endif()
|
||||
|
||||
add_subdirectory(wchess)
|
||||
|
@ -81,7 +81,7 @@ int whisper_bench_full(const whisper_params & params) {
|
||||
}
|
||||
// heat encoder
|
||||
if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode model: %d\n", ret);
|
||||
fprintf(stderr, "error: failed to encode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
@ -90,13 +90,13 @@ int whisper_bench_full(const whisper_params & params) {
|
||||
|
||||
// prompt heat
|
||||
if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode model: %d\n", ret);
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
// text-generation heat
|
||||
if (int ret = whisper_decode(ctx, tokens, 1, 256, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode model: %d\n", ret);
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
@ -104,20 +104,30 @@ int whisper_bench_full(const whisper_params & params) {
|
||||
|
||||
// actual run
|
||||
if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode model: %d\n", ret);
|
||||
fprintf(stderr, "error: failed to encode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode model: %d\n", ret);
|
||||
// text-generation
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 1, i, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 1, i, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode model: %d\n", ret);
|
||||
// batched decoding
|
||||
for (int i = 0; i < 64; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 5, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
// prompt processing
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "common-sdl.h"
|
||||
#include "common.h"
|
||||
#include "whisper.h"
|
||||
#include "grammar-parser.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
@ -21,6 +22,11 @@
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
bool file_exists(const std::string & fname) {
|
||||
std::ifstream f(fname.c_str());
|
||||
return f.good();
|
||||
}
|
||||
|
||||
// command-line parameters
|
||||
struct whisper_params {
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
@ -30,8 +36,12 @@ struct whisper_params {
|
||||
int32_t max_tokens = 32;
|
||||
int32_t audio_ctx = 0;
|
||||
|
||||
float vad_thold = 0.6f;
|
||||
float freq_thold = 100.0f;
|
||||
float vad_thold = 0.6f;
|
||||
float freq_thold = 100.0f;
|
||||
|
||||
float grammar_penalty = 100.0f;
|
||||
|
||||
grammar_parser::parse_state grammar_parsed;
|
||||
|
||||
bool speed_up = false;
|
||||
bool translate = false;
|
||||
@ -45,6 +55,8 @@ struct whisper_params {
|
||||
std::string fname_out;
|
||||
std::string commands;
|
||||
std::string prompt;
|
||||
std::string context;
|
||||
std::string grammar;
|
||||
};
|
||||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||
@ -75,6 +87,9 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
||||
else if (arg == "-cmd" || arg == "--commands") { params.commands = argv[++i]; }
|
||||
else if (arg == "-p" || arg == "--prompt") { params.prompt = argv[++i]; }
|
||||
else if (arg == "-ctx" || arg == "--context") { params.context = argv[++i]; }
|
||||
else if ( arg == "--grammar") { params.grammar = argv[++i]; }
|
||||
else if ( arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
@ -109,16 +124,30 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
fprintf(stderr, " -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n", params.commands.c_str());
|
||||
fprintf(stderr, " -p, --prompt [%-7s] the required activation prompt\n", params.prompt.c_str());
|
||||
fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str());
|
||||
fprintf(stderr, " --grammar GRAMMAR [%-7s] GBNF grammar to guide decoding\n", params.grammar.c_str());
|
||||
fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
std::string transcribe(whisper_context * ctx, const whisper_params & params, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
|
||||
std::string transcribe(
|
||||
whisper_context * ctx,
|
||||
const whisper_params & params,
|
||||
const std::vector<float> & pcmf32,
|
||||
const std::string & grammar_rule,
|
||||
float & logprob_min,
|
||||
float & logprob_sum,
|
||||
int & n_tokens,
|
||||
int64_t & t_ms) {
|
||||
const auto t_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
prob = 0.0f;
|
||||
logprob_min = 0.0f;
|
||||
logprob_sum = 0.0f;
|
||||
n_tokens = 0;
|
||||
t_ms = 0;
|
||||
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
//whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
|
||||
wparams.print_progress = false;
|
||||
wparams.print_special = params.print_special;
|
||||
@ -126,19 +155,41 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
|
||||
wparams.print_timestamps = !params.no_timestamps;
|
||||
wparams.translate = params.translate;
|
||||
wparams.no_context = true;
|
||||
wparams.no_timestamps = params.no_timestamps;
|
||||
wparams.single_segment = true;
|
||||
wparams.max_tokens = params.max_tokens;
|
||||
wparams.language = params.language.c_str();
|
||||
wparams.n_threads = params.n_threads;
|
||||
|
||||
wparams.audio_ctx = params.audio_ctx;
|
||||
wparams.speed_up = params.speed_up;
|
||||
wparams.audio_ctx = params.audio_ctx;
|
||||
wparams.speed_up = params.speed_up;
|
||||
|
||||
wparams.temperature = 0.4f;
|
||||
wparams.temperature_inc = 1.0f;
|
||||
wparams.greedy.best_of = 5;
|
||||
|
||||
wparams.beam_search.beam_size = 5;
|
||||
|
||||
wparams.initial_prompt = params.context.data();
|
||||
|
||||
const auto & grammar_parsed = params.grammar_parsed;
|
||||
auto grammar_rules = grammar_parsed.c_rules();
|
||||
|
||||
if (!params.grammar_parsed.rules.empty() && !grammar_rule.empty()) {
|
||||
if (grammar_parsed.symbol_ids.find(grammar_rule) == grammar_parsed.symbol_ids.end()) {
|
||||
fprintf(stderr, "%s: warning: grammar rule '%s' not found - skipping grammar sampling\n", __func__, grammar_rule.c_str());
|
||||
} else {
|
||||
wparams.grammar_rules = grammar_rules.data();
|
||||
wparams.n_grammar_rules = grammar_rules.size();
|
||||
wparams.i_start_rule = grammar_parsed.symbol_ids.at(grammar_rule);
|
||||
wparams.grammar_penalty = params.grammar_penalty;
|
||||
}
|
||||
}
|
||||
|
||||
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
int prob_n = 0;
|
||||
std::string result;
|
||||
|
||||
const int n_segments = whisper_full_n_segments(ctx);
|
||||
@ -147,19 +198,17 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
|
||||
|
||||
result += text;
|
||||
|
||||
const int n_tokens = whisper_full_n_tokens(ctx, i);
|
||||
for (int j = 0; j < n_tokens; ++j) {
|
||||
const int n = whisper_full_n_tokens(ctx, i);
|
||||
for (int j = 0; j < n; ++j) {
|
||||
const auto token = whisper_full_get_token_data(ctx, i, j);
|
||||
|
||||
prob += token.p;
|
||||
++prob_n;
|
||||
if(token.plog > 0.0f) exit(0);
|
||||
logprob_min = std::min(logprob_min, token.plog);
|
||||
logprob_sum += token.plog;
|
||||
++n_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
if (prob_n > 0) {
|
||||
prob /= prob_n;
|
||||
}
|
||||
|
||||
const auto t_end = std::chrono::high_resolution_clock::now();
|
||||
t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
|
||||
|
||||
@ -250,7 +299,7 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
|
||||
fprintf(stderr, " ]\n");
|
||||
}
|
||||
|
||||
std::string k_prompt = "select one from the available words: ";
|
||||
std::string k_prompt = "select one from the available words: ";
|
||||
for (int i = 0; i < (int) allowed_commands.size(); ++i) {
|
||||
if (i > 0) {
|
||||
k_prompt += ", ";
|
||||
@ -418,7 +467,9 @@ int always_prompt_transcription(struct whisper_context * ctx, audio_async & audi
|
||||
bool is_running = true;
|
||||
bool ask_prompt = true;
|
||||
|
||||
float prob = 0.0f;
|
||||
float logprob_min = 0.0f;
|
||||
float logprob_sum = 0.0f;
|
||||
int n_tokens = 0;
|
||||
|
||||
std::vector<float> pcmf32_cur;
|
||||
|
||||
@ -456,7 +507,7 @@ int always_prompt_transcription(struct whisper_context * ctx, audio_async & audi
|
||||
// detect the commands
|
||||
audio.get(params.command_ms, pcmf32_cur);
|
||||
|
||||
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
|
||||
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, "", logprob_min, logprob_sum, n_tokens, t_ms));
|
||||
|
||||
const auto words = get_words(txt);
|
||||
|
||||
@ -492,18 +543,27 @@ int always_prompt_transcription(struct whisper_context * ctx, audio_async & audi
|
||||
|
||||
// general-purpose mode
|
||||
// freely transcribe the voice into text
|
||||
int process_general_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
|
||||
int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
|
||||
bool is_running = true;
|
||||
bool have_prompt = false;
|
||||
bool ask_prompt = true;
|
||||
|
||||
float prob0 = 0.0f;
|
||||
float prob = 0.0f;
|
||||
float logprob_min0 = 0.0f;
|
||||
float logprob_min = 0.0f;
|
||||
|
||||
float logprob_sum0 = 0.0f;
|
||||
float logprob_sum = 0.0f;
|
||||
|
||||
int n_tokens0 = 0;
|
||||
int n_tokens = 0;
|
||||
|
||||
std::vector<float> pcmf32_cur;
|
||||
std::vector<float> pcmf32_prompt;
|
||||
|
||||
const std::string k_prompt = "Ok Whisper, start listening for commands.";
|
||||
std::string k_prompt = "Ok Whisper, start listening for commands.";
|
||||
if (!params.prompt.empty()) {
|
||||
k_prompt = params.prompt;
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "%s: general-purpose mode\n", __func__);
|
||||
@ -536,9 +596,11 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
|
||||
// wait for activation phrase
|
||||
audio.get(params.prompt_ms, pcmf32_cur);
|
||||
|
||||
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob0, t_ms));
|
||||
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, "prompt", logprob_min0, logprob_sum0, n_tokens0, t_ms));
|
||||
|
||||
fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
|
||||
const float p = 100.0f * std::exp(logprob_min0);
|
||||
|
||||
fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms, p = %.2f%%)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms, p);
|
||||
|
||||
const float sim = similarity(txt, k_prompt);
|
||||
|
||||
@ -559,19 +621,30 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
|
||||
// we have heard the activation phrase, now detect the commands
|
||||
audio.get(params.command_ms, pcmf32_cur);
|
||||
|
||||
//printf("len prompt: %.4f\n", pcmf32_prompt.size() / (float) WHISPER_SAMPLE_RATE);
|
||||
//printf("len command: %.4f\n", pcmf32_cur.size() / (float) WHISPER_SAMPLE_RATE);
|
||||
|
||||
// prepend 3 second of silence
|
||||
pcmf32_cur.insert(pcmf32_cur.begin(), 3.0f*WHISPER_SAMPLE_RATE, 0.0f);
|
||||
|
||||
// prepend the prompt audio
|
||||
pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
|
||||
|
||||
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
|
||||
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, "root", logprob_min, logprob_sum, n_tokens, t_ms));
|
||||
|
||||
prob = 100.0f*(prob - prob0);
|
||||
//const float p = 100.0f * std::exp((logprob - logprob0) / (n_tokens - n_tokens0));
|
||||
const float p = 100.0f * std::exp(logprob_min);
|
||||
|
||||
//fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
|
||||
|
||||
// find the prompt in the text
|
||||
float best_sim = 0.0f;
|
||||
size_t best_len = 0;
|
||||
for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
|
||||
for (size_t n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
|
||||
if (n >= txt.size()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto prompt = txt.substr(0, n);
|
||||
|
||||
const float sim = similarity(prompt, k_prompt);
|
||||
@ -584,9 +657,16 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
|
||||
}
|
||||
}
|
||||
|
||||
const std::string command = ::trim(txt.substr(best_len));
|
||||
fprintf(stdout, "%s: DEBUG: txt = '%s', prob = %.2f%%\n", __func__, txt.c_str(), p);
|
||||
if (best_len == 0) {
|
||||
fprintf(stdout, "%s: WARNING: command not recognized, try again\n", __func__);
|
||||
} else {
|
||||
// cut the prompt from the decoded text
|
||||
const std::string command = ::trim(txt.substr(best_len));
|
||||
|
||||
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
|
||||
}
|
||||
|
||||
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
|
||||
@ -654,12 +734,36 @@ int main(int argc, char ** argv) {
|
||||
|
||||
int ret_val = 0;
|
||||
|
||||
if (!params.commands.empty()) {
|
||||
ret_val = process_command_list(ctx, audio, params);
|
||||
} else if (!params.prompt.empty()) {
|
||||
ret_val = always_prompt_transcription(ctx, audio, params);
|
||||
} else {
|
||||
ret_val = process_general_transcription(ctx, audio, params);
|
||||
if (!params.grammar.empty()) {
|
||||
auto & grammar = params.grammar_parsed;
|
||||
if (file_exists(params.grammar.c_str())) {
|
||||
// read grammar from file
|
||||
std::ifstream ifs(params.grammar.c_str());
|
||||
const std::string txt = std::string((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
|
||||
grammar = grammar_parser::parse(txt.c_str());
|
||||
} else {
|
||||
// read grammar from string
|
||||
grammar = grammar_parser::parse(params.grammar.c_str());
|
||||
}
|
||||
|
||||
// will be empty (default) if there are parse errors
|
||||
if (grammar.rules.empty()) {
|
||||
ret_val = 1;
|
||||
} else {
|
||||
fprintf(stderr, "%s: grammar:\n", __func__);
|
||||
grammar_parser::print_grammar(stderr, grammar);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (ret_val == 0) {
|
||||
if (!params.commands.empty()) {
|
||||
ret_val = process_command_list(ctx, audio, params);
|
||||
} else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
|
||||
ret_val = always_prompt_transcription(ctx, audio, params);
|
||||
} else {
|
||||
ret_val = process_general_transcription(ctx, audio, params);
|
||||
}
|
||||
}
|
||||
|
||||
audio.pause();
|
||||
|
@ -9,6 +9,11 @@ static const std::map<std::string, enum ggml_ftype> GGML_FTYPE_MAP = {
|
||||
{"q5_0", GGML_FTYPE_MOSTLY_Q5_0},
|
||||
{"q5_1", GGML_FTYPE_MOSTLY_Q5_1},
|
||||
{"q8_0", GGML_FTYPE_MOSTLY_Q8_0},
|
||||
{"q2_k", GGML_FTYPE_MOSTLY_Q2_K},
|
||||
{"q3_k", GGML_FTYPE_MOSTLY_Q3_K},
|
||||
{"q4_k", GGML_FTYPE_MOSTLY_Q4_K},
|
||||
{"q5_k", GGML_FTYPE_MOSTLY_Q5_K},
|
||||
{"q6_k", GGML_FTYPE_MOSTLY_Q6_K},
|
||||
};
|
||||
|
||||
void ggml_print_ftypes(FILE * fp) {
|
||||
@ -48,15 +53,15 @@ bool ggml_common_quantize_0(
|
||||
case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0; break;
|
||||
case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1; break;
|
||||
case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0; break;
|
||||
case GGML_FTYPE_MOSTLY_Q2_K: qtype = GGML_TYPE_Q2_K; break;
|
||||
case GGML_FTYPE_MOSTLY_Q3_K: qtype = GGML_TYPE_Q3_K; break;
|
||||
case GGML_FTYPE_MOSTLY_Q4_K: qtype = GGML_TYPE_Q4_K; break;
|
||||
case GGML_FTYPE_MOSTLY_Q5_K: qtype = GGML_TYPE_Q5_K; break;
|
||||
case GGML_FTYPE_MOSTLY_Q6_K: qtype = GGML_TYPE_Q6_K; break;
|
||||
case GGML_FTYPE_UNKNOWN:
|
||||
case GGML_FTYPE_ALL_F32:
|
||||
case GGML_FTYPE_MOSTLY_F16:
|
||||
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
|
||||
case GGML_FTYPE_MOSTLY_Q2_K:
|
||||
case GGML_FTYPE_MOSTLY_Q3_K:
|
||||
case GGML_FTYPE_MOSTLY_Q4_K:
|
||||
case GGML_FTYPE_MOSTLY_Q5_K:
|
||||
case GGML_FTYPE_MOSTLY_Q6_K:
|
||||
{
|
||||
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
|
||||
return false;
|
||||
@ -167,24 +172,17 @@ bool ggml_common_quantize_0(
|
||||
|
||||
switch ((ggml_type) ttype) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
{
|
||||
cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q4_1:
|
||||
{
|
||||
cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q5_0:
|
||||
{
|
||||
cur_size = ggml_quantize_q5_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q5_1:
|
||||
{
|
||||
cur_size = ggml_quantize_q5_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q8_0:
|
||||
case GGML_TYPE_Q2_K:
|
||||
case GGML_TYPE_Q3_K:
|
||||
case GGML_TYPE_Q4_K:
|
||||
case GGML_TYPE_Q5_K:
|
||||
case GGML_TYPE_Q6_K:
|
||||
{
|
||||
cur_size = ggml_quantize_q8_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data());
|
||||
cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements, hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_F32:
|
||||
case GGML_TYPE_F16:
|
||||
@ -192,11 +190,6 @@ bool ggml_common_quantize_0(
|
||||
case GGML_TYPE_I16:
|
||||
case GGML_TYPE_I32:
|
||||
case GGML_TYPE_Q8_1:
|
||||
case GGML_TYPE_Q2_K:
|
||||
case GGML_TYPE_Q3_K:
|
||||
case GGML_TYPE_Q4_K:
|
||||
case GGML_TYPE_Q5_K:
|
||||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_Q8_K:
|
||||
case GGML_TYPE_COUNT:
|
||||
{
|
||||
|
@ -139,10 +139,13 @@ void audio_async::callback(uint8_t * stream, int len) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t n_samples = len / sizeof(float);
|
||||
size_t n_samples = len / sizeof(float);
|
||||
|
||||
m_audio_new.resize(n_samples);
|
||||
memcpy(m_audio_new.data(), stream, n_samples * sizeof(float));
|
||||
if (n_samples > m_audio.size()) {
|
||||
n_samples = m_audio.size();
|
||||
|
||||
stream += (len - (n_samples * sizeof(float)));
|
||||
}
|
||||
|
||||
//fprintf(stderr, "%s: %zu samples, pos %zu, len %zu\n", __func__, n_samples, m_audio_pos, m_audio_len);
|
||||
|
||||
@ -153,7 +156,7 @@ void audio_async::callback(uint8_t * stream, int len) {
|
||||
const size_t n0 = m_audio.size() - m_audio_pos;
|
||||
|
||||
memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
|
||||
memcpy(&m_audio[0], &stream[n0], (n_samples - n0) * sizeof(float));
|
||||
memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
|
||||
|
||||
m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
|
||||
m_audio_len = m_audio.size();
|
||||
|
@ -41,7 +41,6 @@ private:
|
||||
std::mutex m_mutex;
|
||||
|
||||
std::vector<float> m_audio;
|
||||
std::vector<float> m_audio_new;
|
||||
size_t m_audio_pos = 0;
|
||||
size_t m_audio_len = 0;
|
||||
};
|
||||
|
423
examples/grammar-parser.cpp
Normal file
@ -0,0 +1,423 @@
|
||||
#include "grammar-parser.h"
|
||||
#include <cstdint>
|
||||
#include <cwchar>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <stdexcept>
|
||||
#include <exception>
|
||||
|
||||
namespace grammar_parser {
|
||||
// NOTE: assumes valid utf8 (but checks for overrun)
|
||||
// copied from whisper.cpp
|
||||
std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
||||
uint8_t first_byte = static_cast<uint8_t>(*src);
|
||||
uint8_t highbits = first_byte >> 4;
|
||||
int len = lookup[highbits];
|
||||
uint8_t mask = (1 << (8 - len)) - 1;
|
||||
uint32_t value = first_byte & mask;
|
||||
const char * end = src + len; // may overrun!
|
||||
const char * pos = src + 1;
|
||||
for ( ; pos < end && *pos; pos++) {
|
||||
value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
|
||||
}
|
||||
return std::make_pair(value, pos);
|
||||
}
|
||||
|
||||
uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||
auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
|
||||
return result.first->second;
|
||||
}
|
||||
|
||||
uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
||||
return next_id;
|
||||
}
|
||||
|
||||
void add_rule(
|
||||
parse_state & state,
|
||||
uint32_t rule_id,
|
||||
const std::vector<whisper_grammar_element> & rule) {
|
||||
if (state.rules.size() <= rule_id) {
|
||||
state.rules.resize(rule_id + 1);
|
||||
}
|
||||
state.rules[rule_id] = rule;
|
||||
}
|
||||
|
||||
bool is_word_char(char c) {
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
|
||||
}
|
||||
|
||||
std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
||||
const char * pos = src;
|
||||
const char * end = src + size;
|
||||
uint32_t value = 0;
|
||||
for ( ; pos < end && *pos; pos++) {
|
||||
value <<= 4;
|
||||
char c = *pos;
|
||||
if ('a' <= c && c <= 'f') {
|
||||
value += c - 'a' + 10;
|
||||
} else if ('A' <= c && c <= 'F') {
|
||||
value += c - 'A' + 10;
|
||||
} else if ('0' <= c && c <= '9') {
|
||||
value += c - '0';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pos != end) {
|
||||
throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
|
||||
}
|
||||
return std::make_pair(value, pos);
|
||||
}
|
||||
|
||||
const char * parse_space(const char * src, bool newline_ok) {
|
||||
const char * pos = src;
|
||||
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
||||
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
||||
if (*pos == '#') {
|
||||
while (*pos && *pos != '\r' && *pos != '\n') {
|
||||
pos++;
|
||||
}
|
||||
} else {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
const char * parse_name(const char * src) {
|
||||
const char * pos = src;
|
||||
while (is_word_char(*pos)) {
|
||||
pos++;
|
||||
}
|
||||
if (pos == src) {
|
||||
throw std::runtime_error(std::string("expecting name at ") + src);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
std::pair<uint32_t, const char *> parse_char(const char * src) {
|
||||
if (*src == '\\') {
|
||||
switch (src[1]) {
|
||||
case 'x': return parse_hex(src + 2, 2);
|
||||
case 'u': return parse_hex(src + 2, 4);
|
||||
case 'U': return parse_hex(src + 2, 8);
|
||||
case 't': return std::make_pair('\t', src + 2);
|
||||
case 'r': return std::make_pair('\r', src + 2);
|
||||
case 'n': return std::make_pair('\n', src + 2);
|
||||
case '\\':
|
||||
case '"':
|
||||
case '[':
|
||||
case ']':
|
||||
return std::make_pair(src[1], src + 2);
|
||||
default:
|
||||
throw std::runtime_error(std::string("unknown escape at ") + src);
|
||||
}
|
||||
} else if (*src) {
|
||||
return decode_utf8(src);
|
||||
}
|
||||
throw std::runtime_error("unexpected end of input");
|
||||
}
|
||||
|
||||
const char * parse_alternates(
|
||||
parse_state & state,
|
||||
const char * src,
|
||||
const std::string & rule_name,
|
||||
uint32_t rule_id,
|
||||
bool is_nested);
|
||||
|
||||
const char * parse_sequence(
|
||||
parse_state & state,
|
||||
const char * src,
|
||||
const std::string & rule_name,
|
||||
std::vector<whisper_grammar_element> & out_elements,
|
||||
bool is_nested) {
|
||||
size_t last_sym_start = out_elements.size();
|
||||
const char * pos = src;
|
||||
while (*pos) {
|
||||
if (*pos == '"') { // literal string
|
||||
pos++;
|
||||
last_sym_start = out_elements.size();
|
||||
while (*pos != '"') {
|
||||
auto char_pair = parse_char(pos);
|
||||
pos = char_pair.second;
|
||||
out_elements.push_back({WHISPER_GRETYPE_CHAR, char_pair.first});
|
||||
}
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (*pos == '[') { // char range(s)
|
||||
pos++;
|
||||
enum whisper_gretype start_type = WHISPER_GRETYPE_CHAR;
|
||||
if (*pos == '^') {
|
||||
pos++;
|
||||
start_type = WHISPER_GRETYPE_CHAR_NOT;
|
||||
}
|
||||
last_sym_start = out_elements.size();
|
||||
while (*pos != ']') {
|
||||
auto char_pair = parse_char(pos);
|
||||
pos = char_pair.second;
|
||||
enum whisper_gretype type = last_sym_start < out_elements.size()
|
||||
? WHISPER_GRETYPE_CHAR_ALT
|
||||
: start_type;
|
||||
|
||||
out_elements.push_back({type, char_pair.first});
|
||||
if (pos[0] == '-' && pos[1] != ']') {
|
||||
auto endchar_pair = parse_char(pos + 1);
|
||||
pos = endchar_pair.second;
|
||||
out_elements.push_back({WHISPER_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
|
||||
}
|
||||
}
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (is_word_char(*pos)) { // rule reference
|
||||
const char * name_end = parse_name(pos);
|
||||
uint32_t ref_rule_id = get_symbol_id(state, pos, name_end - pos);
|
||||
pos = parse_space(name_end, is_nested);
|
||||
last_sym_start = out_elements.size();
|
||||
out_elements.push_back({WHISPER_GRETYPE_RULE_REF, ref_rule_id});
|
||||
} else if (*pos == '(') { // grouping
|
||||
// parse nested alternates into synthesized rule
|
||||
pos = parse_space(pos + 1, true);
|
||||
uint32_t sub_rule_id = generate_symbol_id(state, rule_name);
|
||||
pos = parse_alternates(state, pos, rule_name, sub_rule_id, true);
|
||||
last_sym_start = out_elements.size();
|
||||
// output reference to synthesized rule
|
||||
out_elements.push_back({WHISPER_GRETYPE_RULE_REF, sub_rule_id});
|
||||
if (*pos != ')') {
|
||||
throw std::runtime_error(std::string("expecting ')' at ") + pos);
|
||||
}
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else if (*pos == '*' || *pos == '+' || *pos == '?') { // repetition operator
|
||||
if (last_sym_start == out_elements.size()) {
|
||||
throw std::runtime_error(std::string("expecting preceeding item to */+/? at ") + pos);
|
||||
}
|
||||
|
||||
// apply transformation to previous symbol (last_sym_start to end) according to
|
||||
// rewrite rules:
|
||||
// S* --> S' ::= S S' |
|
||||
// S+ --> S' ::= S S' | S
|
||||
// S? --> S' ::= S |
|
||||
uint32_t sub_rule_id = generate_symbol_id(state, rule_name);
|
||||
std::vector<whisper_grammar_element> sub_rule;
|
||||
// add preceding symbol to generated rule
|
||||
sub_rule.insert(
|
||||
sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end());
|
||||
if (*pos == '*' || *pos == '+') {
|
||||
// cause generated rule to recurse
|
||||
sub_rule.push_back({WHISPER_GRETYPE_RULE_REF, sub_rule_id});
|
||||
}
|
||||
// mark start of alternate def
|
||||
sub_rule.push_back({WHISPER_GRETYPE_ALT, 0});
|
||||
if (*pos == '+') {
|
||||
// add preceding symbol as alternate only for '+' (otherwise empty)
|
||||
sub_rule.insert(
|
||||
sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end());
|
||||
}
|
||||
sub_rule.push_back({WHISPER_GRETYPE_END, 0});
|
||||
add_rule(state, sub_rule_id, sub_rule);
|
||||
|
||||
// in original rule, replace previous symbol with reference to generated rule
|
||||
out_elements.resize(last_sym_start);
|
||||
out_elements.push_back({WHISPER_GRETYPE_RULE_REF, sub_rule_id});
|
||||
|
||||
pos = parse_space(pos + 1, is_nested);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
const char * parse_alternates(
|
||||
parse_state & state,
|
||||
const char * src,
|
||||
const std::string & rule_name,
|
||||
uint32_t rule_id,
|
||||
bool is_nested) {
|
||||
std::vector<whisper_grammar_element> rule;
|
||||
const char * pos = parse_sequence(state, src, rule_name, rule, is_nested);
|
||||
while (*pos == '|') {
|
||||
rule.push_back({WHISPER_GRETYPE_ALT, 0});
|
||||
pos = parse_space(pos + 1, true);
|
||||
pos = parse_sequence(state, pos, rule_name, rule, is_nested);
|
||||
}
|
||||
rule.push_back({WHISPER_GRETYPE_END, 0});
|
||||
add_rule(state, rule_id, rule);
|
||||
return pos;
|
||||
}
|
||||
|
||||
const char * parse_rule(parse_state & state, const char * src) {
|
||||
const char * name_end = parse_name(src);
|
||||
const char * pos = parse_space(name_end, false);
|
||||
size_t name_len = name_end - src;
|
||||
uint32_t rule_id = get_symbol_id(state, src, name_len);
|
||||
const std::string name(src, name_len);
|
||||
|
||||
if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
|
||||
throw std::runtime_error(std::string("expecting ::= at ") + pos);
|
||||
}
|
||||
pos = parse_space(pos + 3, true);
|
||||
|
||||
pos = parse_alternates(state, pos, name, rule_id, false);
|
||||
|
||||
if (*pos == '\r') {
|
||||
pos += pos[1] == '\n' ? 2 : 1;
|
||||
} else if (*pos == '\n') {
|
||||
pos++;
|
||||
} else if (*pos) {
|
||||
throw std::runtime_error(std::string("expecting newline or end at ") + pos);
|
||||
}
|
||||
return parse_space(pos, true);
|
||||
}
|
||||
|
||||
parse_state parse(const char * src) {
|
||||
try {
|
||||
parse_state state;
|
||||
const char * pos = parse_space(src, true);
|
||||
while (*pos) {
|
||||
pos = parse_rule(state, pos);
|
||||
}
|
||||
return state;
|
||||
} catch (const std::exception & err) {
|
||||
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
|
||||
return parse_state();
|
||||
}
|
||||
}
|
||||
|
||||
void print_grammar_char(FILE * file, uint32_t c) {
|
||||
if (0x20 <= c && c <= 0x7f) {
|
||||
fprintf(file, "%c", static_cast<char>(c));
|
||||
} else {
|
||||
// cop out of encoding UTF-8
|
||||
fprintf(file, "<U+%04X>", c);
|
||||
}
|
||||
}
|
||||
|
||||
bool is_char_element(whisper_grammar_element elem) {
|
||||
switch (elem.type) {
|
||||
case WHISPER_GRETYPE_CHAR: return true;
|
||||
case WHISPER_GRETYPE_CHAR_NOT: return true;
|
||||
case WHISPER_GRETYPE_CHAR_ALT: return true;
|
||||
case WHISPER_GRETYPE_CHAR_RNG_UPPER: return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
void print_rule_binary(FILE * file, const std::vector<whisper_grammar_element> & rule) {
|
||||
for (auto elem : rule) {
|
||||
switch (elem.type) {
|
||||
case WHISPER_GRETYPE_END: fprintf(file, "END"); break;
|
||||
case WHISPER_GRETYPE_ALT: fprintf(file, "ALT"); break;
|
||||
case WHISPER_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
|
||||
case WHISPER_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
|
||||
case WHISPER_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
|
||||
case WHISPER_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
|
||||
case WHISPER_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
|
||||
}
|
||||
switch (elem.type) {
|
||||
case WHISPER_GRETYPE_END:
|
||||
case WHISPER_GRETYPE_ALT:
|
||||
case WHISPER_GRETYPE_RULE_REF:
|
||||
fprintf(file, "(%u) ", elem.value);
|
||||
break;
|
||||
case WHISPER_GRETYPE_CHAR:
|
||||
case WHISPER_GRETYPE_CHAR_NOT:
|
||||
case WHISPER_GRETYPE_CHAR_RNG_UPPER:
|
||||
case WHISPER_GRETYPE_CHAR_ALT:
|
||||
fprintf(file, "(\"");
|
||||
print_grammar_char(file, elem.value);
|
||||
fprintf(file, "\") ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
void print_rule(
|
||||
FILE * file,
|
||||
uint32_t rule_id,
|
||||
const std::vector<whisper_grammar_element> & rule,
|
||||
const std::map<uint32_t, std::string> & symbol_id_names) {
|
||||
if (rule.empty() || rule.back().type != WHISPER_GRETYPE_END) {
|
||||
throw std::runtime_error(
|
||||
"malformed rule, does not end with WHISPER_GRETYPE_END: " + std::to_string(rule_id));
|
||||
}
|
||||
fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
|
||||
for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
|
||||
whisper_grammar_element elem = rule[i];
|
||||
switch (elem.type) {
|
||||
case WHISPER_GRETYPE_END:
|
||||
throw std::runtime_error(
|
||||
"unexpected end of rule: " + std::to_string(rule_id) + "," +
|
||||
std::to_string(i));
|
||||
case WHISPER_GRETYPE_ALT:
|
||||
fprintf(file, "| ");
|
||||
break;
|
||||
case WHISPER_GRETYPE_RULE_REF:
|
||||
fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
|
||||
break;
|
||||
case WHISPER_GRETYPE_CHAR:
|
||||
fprintf(file, "[");
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
case WHISPER_GRETYPE_CHAR_NOT:
|
||||
fprintf(file, "[^");
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
case WHISPER_GRETYPE_CHAR_RNG_UPPER:
|
||||
if (i == 0 || !is_char_element(rule[i - 1])) {
|
||||
throw std::runtime_error(
|
||||
"WHISPER_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
|
||||
std::to_string(rule_id) + "," + std::to_string(i));
|
||||
}
|
||||
fprintf(file, "-");
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
case WHISPER_GRETYPE_CHAR_ALT:
|
||||
if (i == 0 || !is_char_element(rule[i - 1])) {
|
||||
throw std::runtime_error(
|
||||
"WHISPER_GRETYPE_CHAR_ALT without preceding char: " +
|
||||
std::to_string(rule_id) + "," + std::to_string(i));
|
||||
}
|
||||
print_grammar_char(file, elem.value);
|
||||
break;
|
||||
}
|
||||
if (is_char_element(elem)) {
|
||||
switch (rule[i + 1].type) {
|
||||
case WHISPER_GRETYPE_CHAR_ALT:
|
||||
case WHISPER_GRETYPE_CHAR_RNG_UPPER:
|
||||
break;
|
||||
default:
|
||||
fprintf(file, "] ");
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
void print_grammar(FILE * file, const parse_state & state) {
|
||||
try {
|
||||
std::map<uint32_t, std::string> symbol_id_names;
|
||||
for (auto kv : state.symbol_ids) {
|
||||
symbol_id_names[kv.second] = kv.first;
|
||||
}
|
||||
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
||||
// fprintf(file, "%zu: ", i);
|
||||
// print_rule_binary(file, state.rules[i]);
|
||||
print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
|
||||
// fprintf(file, "\n");
|
||||
}
|
||||
} catch (const std::exception & err) {
|
||||
fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const whisper_grammar_element *> parse_state::c_rules() const{
|
||||
std::vector<const whisper_grammar_element *> ret;
|
||||
for (const auto & rule : rules) {
|
||||
ret.push_back(rule.data());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
29
examples/grammar-parser.h
Normal file
@ -0,0 +1,29 @@
|
||||
// Implements a parser for an extended Backus-Naur form (BNF), producing the
|
||||
// binary context-free grammar format specified by whisper.h. Supports character
|
||||
// ranges, grouping, and repetition operators. As an example, a grammar for
|
||||
// arithmetic might look like:
|
||||
//
|
||||
// root ::= expr
|
||||
// expr ::= term ([-+*/] term)*
|
||||
// term ::= num | "(" space expr ")" space
|
||||
// num ::= [0-9]+ space
|
||||
// space ::= [ \t\n]*
|
||||
|
||||
#pragma once
|
||||
#include "whisper.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace grammar_parser {
|
||||
struct parse_state {
|
||||
std::map<std::string, uint32_t> symbol_ids;
|
||||
std::vector<std::vector<whisper_grammar_element>> rules;
|
||||
|
||||
std::vector<const whisper_grammar_element *> c_rules() const;
|
||||
};
|
||||
|
||||
parse_state parse(const char * src);
|
||||
void print_grammar(FILE * file, const parse_state & state);
|
||||
}
|
@ -48,7 +48,7 @@ if [ -n "$3" ]; then
|
||||
fi
|
||||
|
||||
# Whisper models
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
||||
|
||||
# list available models
|
||||
function list_models {
|
||||
|
@ -62,8 +62,8 @@ struct whisper_params {
|
||||
int32_t progress_step = 5;
|
||||
int32_t max_context = -1;
|
||||
int32_t max_len = 0;
|
||||
int32_t best_of = 2;
|
||||
int32_t beam_size = -1;
|
||||
int32_t best_of = whisper_full_default_params(WHISPER_SAMPLING_GREEDY).greedy.best_of;
|
||||
int32_t beam_size = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH).beam_search.beam_size;
|
||||
|
||||
float word_thold = 0.01f;
|
||||
float entropy_thold = 2.40f;
|
||||
@ -165,8 +165,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
|
||||
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
|
||||
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
@ -925,9 +925,9 @@ int main(int argc, char ** argv) {
|
||||
if (params.detect_language) {
|
||||
params.language = "auto";
|
||||
}
|
||||
fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, %stimestamps = %d ...\n",
|
||||
fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, %d beams + best of %d, lang = %s, task = %s, %stimestamps = %d ...\n",
|
||||
__func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
||||
params.n_threads, params.n_processors,
|
||||
params.n_threads, params.n_processors, params.beam_size, params.best_of,
|
||||
params.language.c_str(),
|
||||
params.translate ? "translate" : "transcribe",
|
||||
params.tinydiarize ? "tdrz = 1, " : "",
|
||||
|
6
examples/server/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
||||
set(TARGET server)
|
||||
add_executable(${TARGET} server.cpp httplib.h json.hpp)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
|
59
examples/server/README.md
Normal file
@ -0,0 +1,59 @@
|
||||
# whisper.cpp http server
|
||||
|
||||
Simple http server. WAV Files are passed to the inference model via http requests.
|
||||
|
||||
```
|
||||
./server -h
|
||||
|
||||
usage: ./bin/server [options]
|
||||
|
||||
options:
|
||||
-h, --help [default] show this help message and exit
|
||||
-t N, --threads N [4 ] number of threads to use during computation
|
||||
-p N, --processors N [1 ] number of processors to use during computation
|
||||
-ot N, --offset-t N [0 ] time offset in milliseconds
|
||||
-on N, --offset-n N [0 ] segment index offset
|
||||
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
||||
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
||||
-ml N, --max-len N [0 ] maximum segment length in characters
|
||||
-sow, --split-on-word [false ] split on word rather than on token
|
||||
-bo N, --best-of N [2 ] number of best candidates to keep
|
||||
-bs N, --beam-size N [-1 ] beam size for beam search
|
||||
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
||||
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
||||
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
||||
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
|
||||
-tr, --translate [false ] translate from source language to english
|
||||
-di, --diarize [false ] stereo audio diarization
|
||||
-tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
|
||||
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
||||
-ps, --print-special [false ] print special tokens
|
||||
-pc, --print-colors [false ] print colors
|
||||
-pp, --print-progress [false ] print progress
|
||||
-nt, --no-timestamps [false ] do not print timestamps
|
||||
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
|
||||
-dl, --detect-language [false ] exit after automatically detecting language
|
||||
--prompt PROMPT [ ] initial prompt
|
||||
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
||||
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
||||
--host HOST, [127.0.0.1] Hostname/ip-adress for the server
|
||||
--port PORT, [8080 ] Port number for the server
|
||||
```
|
||||
|
||||
## request examples
|
||||
|
||||
**/inference**
|
||||
```
|
||||
curl 127.0.0.1:8080/inference \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F file="@<file-path>" \
|
||||
-F temperature="0.2" \
|
||||
-F response-format="json"
|
||||
```
|
||||
|
||||
**/load**
|
||||
```
|
||||
curl 127.0.0.1:8080/load \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F model="<path-to-model-file>"
|
||||
```
|
9262
examples/server/httplib.h
Normal file
24596
examples/server/json.hpp
Normal file
699
examples/server/server.cpp
Normal file
@ -0,0 +1,699 @@
|
||||
#include "common.h"
|
||||
|
||||
#include "whisper.h"
|
||||
#include "httplib.h"
|
||||
#include "json.hpp"
|
||||
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
using namespace httplib;
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace {
|
||||
|
||||
// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
|
||||
// Lowest is red, middle is yellow, highest is green.
|
||||
const std::vector<std::string> k_colors = {
|
||||
"\033[38;5;196m", "\033[38;5;202m", "\033[38;5;208m", "\033[38;5;214m", "\033[38;5;220m",
|
||||
"\033[38;5;226m", "\033[38;5;190m", "\033[38;5;154m", "\033[38;5;118m", "\033[38;5;82m",
|
||||
};
|
||||
|
||||
// output formats
|
||||
const std::string json_format = "json";
|
||||
const std::string text_format = "text";
|
||||
const std::string srt_format = "srt";
|
||||
const std::string vjson_format = "verbose_json";
|
||||
const std::string vtt_format = "vtt";
|
||||
|
||||
struct server_params
|
||||
{
|
||||
std::string hostname = "127.0.0.1";
|
||||
std::string public_path = "examples/server/public";
|
||||
|
||||
int32_t port = 8080;
|
||||
int32_t read_timeout = 600;
|
||||
int32_t write_timeout = 600;
|
||||
};
|
||||
|
||||
struct whisper_params {
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
int32_t n_processors = 1;
|
||||
int32_t offset_t_ms = 0;
|
||||
int32_t offset_n = 0;
|
||||
int32_t duration_ms = 0;
|
||||
int32_t progress_step = 5;
|
||||
int32_t max_context = -1;
|
||||
int32_t max_len = 0;
|
||||
int32_t best_of = 2;
|
||||
int32_t beam_size = -1;
|
||||
|
||||
float word_thold = 0.01f;
|
||||
float entropy_thold = 2.40f;
|
||||
float logprob_thold = -1.00f;
|
||||
float userdef_temp = 0.20f;
|
||||
|
||||
bool speed_up = false;
|
||||
bool debug_mode = false;
|
||||
bool translate = false;
|
||||
bool detect_language = false;
|
||||
bool diarize = false;
|
||||
bool tinydiarize = false;
|
||||
bool split_on_word = false;
|
||||
bool no_fallback = false;
|
||||
bool print_special = false;
|
||||
bool print_colors = false;
|
||||
bool print_progress = false;
|
||||
bool no_timestamps = false;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string language = "en";
|
||||
std::string prompt = "";
|
||||
std::string font_path = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
|
||||
std::string response_format = json_format;
|
||||
|
||||
// [TDRZ] speaker turn string
|
||||
std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
|
||||
|
||||
std::string openvino_encode_device = "CPU";
|
||||
};
|
||||
|
||||
// 500 -> 00:05.000
|
||||
// 6000 -> 01:00.000
|
||||
std::string to_timestamp(int64_t t, bool comma = false) {
|
||||
int64_t msec = t * 10;
|
||||
int64_t hr = msec / (1000 * 60 * 60);
|
||||
msec = msec - hr * (1000 * 60 * 60);
|
||||
int64_t min = msec / (1000 * 60);
|
||||
msec = msec - min * (1000 * 60);
|
||||
int64_t sec = msec / 1000;
|
||||
msec = msec - sec * 1000;
|
||||
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
int timestamp_to_sample(int64_t t, int n_samples) {
|
||||
return std::max(0, std::min((int) n_samples - 1, (int) ((t*WHISPER_SAMPLE_RATE)/100)));
|
||||
}
|
||||
|
||||
bool is_file_exist(const char *fileName)
|
||||
{
|
||||
std::ifstream infile(fileName);
|
||||
return infile.good();
|
||||
}
|
||||
|
||||
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params,
|
||||
const server_params& sparams) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "usage: %s [options] \n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||
fprintf(stderr, " -p N, --processors N [%-7d] number of processors to use during computation\n", params.n_processors);
|
||||
fprintf(stderr, " -ot N, --offset-t N [%-7d] time offset in milliseconds\n", params.offset_t_ms);
|
||||
fprintf(stderr, " -on N, --offset-n N [%-7d] segment index offset\n", params.offset_n);
|
||||
fprintf(stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms);
|
||||
fprintf(stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
|
||||
fprintf(stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len);
|
||||
fprintf(stderr, " -sow, --split-on-word [%-7s] split on word rather than on token\n", params.split_on_word ? "true" : "false");
|
||||
fprintf(stderr, " -bo N, --best-of N [%-7d] number of best candidates to keep\n", params.best_of);
|
||||
fprintf(stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n", params.beam_size);
|
||||
fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
|
||||
fprintf(stderr, " -et N, --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n", params.entropy_thold);
|
||||
fprintf(stderr, " -lpt N, --logprob-thold N [%-7.2f] log probability threshold for decoder fail\n", params.logprob_thold);
|
||||
// fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
||||
fprintf(stderr, " -debug, --debug-mode [%-7s] enable debug mode (eg. dump log_mel)\n", params.debug_mode ? "true" : "false");
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
|
||||
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
|
||||
fprintf(stderr, " -nf, --no-fallback [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
|
||||
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
||||
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
|
||||
fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
|
||||
fprintf(stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", params.no_timestamps ? "true" : "false");
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language ('auto' for auto-detect)\n", params.language.c_str());
|
||||
fprintf(stderr, " -dl, --detect-language [%-7s] exit after automatically detecting language\n", params.detect_language ? "true" : "false");
|
||||
fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
|
||||
// server params
|
||||
fprintf(stderr, " --host HOST, [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
|
||||
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
|
||||
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params, server_params & sparams) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
whisper_print_usage(argc, argv, params, sparams);
|
||||
exit(0);
|
||||
}
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if (arg == "-p" || arg == "--processors") { params.n_processors = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ot" || arg == "--offset-t") { params.offset_t_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-on" || arg == "--offset-n") { params.offset_n = std::stoi(argv[++i]); }
|
||||
else if (arg == "-d" || arg == "--duration") { params.duration_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mc" || arg == "--max-context") { params.max_context = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ml" || arg == "--max-len") { params.max_len = std::stoi(argv[++i]); }
|
||||
else if (arg == "-bo" || arg == "--best-of") { params.best_of = std::stoi(argv[++i]); }
|
||||
else if (arg == "-bs" || arg == "--beam-size") { params.beam_size = std::stoi(argv[++i]); }
|
||||
else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-et" || arg == "--entropy-thold") { params.entropy_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-lpt" || arg == "--logprob-thold") { params.logprob_thold = std::stof(argv[++i]); }
|
||||
// else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||
else if (arg == "-debug"|| arg == "--debug-mode") { params.debug_mode = true; }
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
|
||||
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
|
||||
else if (arg == "-sow" || arg == "--split-on-word") { params.split_on_word = true; }
|
||||
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
|
||||
else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
|
||||
else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
|
||||
else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; }
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-dl" || arg == "--detect-language") { params.detect_language = true; }
|
||||
else if ( arg == "--prompt") { params.prompt = argv[++i]; }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
// server params
|
||||
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
|
||||
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
|
||||
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params, sparams);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct whisper_print_user_data {
|
||||
const whisper_params * params;
|
||||
|
||||
const std::vector<std::vector<float>> * pcmf32s;
|
||||
int progress_prev;
|
||||
};
|
||||
|
||||
std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
|
||||
std::string speaker = "";
|
||||
const int64_t n_samples = pcmf32s[0].size();
|
||||
|
||||
const int64_t is0 = timestamp_to_sample(t0, n_samples);
|
||||
const int64_t is1 = timestamp_to_sample(t1, n_samples);
|
||||
|
||||
double energy0 = 0.0f;
|
||||
double energy1 = 0.0f;
|
||||
|
||||
for (int64_t j = is0; j < is1; j++) {
|
||||
energy0 += fabs(pcmf32s[0][j]);
|
||||
energy1 += fabs(pcmf32s[1][j]);
|
||||
}
|
||||
|
||||
if (energy0 > 1.1*energy1) {
|
||||
speaker = "0";
|
||||
} else if (energy1 > 1.1*energy0) {
|
||||
speaker = "1";
|
||||
} else {
|
||||
speaker = "?";
|
||||
}
|
||||
|
||||
//printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, speaker = %s\n", is0, is1, energy0, energy1, speaker.c_str());
|
||||
|
||||
if (!id_only) {
|
||||
speaker.insert(0, "(speaker ");
|
||||
speaker.append(")");
|
||||
}
|
||||
|
||||
return speaker;
|
||||
}
|
||||
|
||||
void whisper_print_progress_callback(struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, int progress, void * user_data) {
|
||||
int progress_step = ((whisper_print_user_data *) user_data)->params->progress_step;
|
||||
int * progress_prev = &(((whisper_print_user_data *) user_data)->progress_prev);
|
||||
if (progress >= *progress_prev + progress_step) {
|
||||
*progress_prev += progress_step;
|
||||
fprintf(stderr, "%s: progress = %3d%%\n", __func__, progress);
|
||||
}
|
||||
}
|
||||
|
||||
void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
|
||||
const auto & params = *((whisper_print_user_data *) user_data)->params;
|
||||
const auto & pcmf32s = *((whisper_print_user_data *) user_data)->pcmf32s;
|
||||
|
||||
const int n_segments = whisper_full_n_segments(ctx);
|
||||
|
||||
std::string speaker = "";
|
||||
|
||||
int64_t t0 = 0;
|
||||
int64_t t1 = 0;
|
||||
|
||||
// print the last n_new segments
|
||||
const int s0 = n_segments - n_new;
|
||||
|
||||
if (s0 == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
for (int i = s0; i < n_segments; i++) {
|
||||
if (!params.no_timestamps || params.diarize) {
|
||||
t0 = whisper_full_get_segment_t0(ctx, i);
|
||||
t1 = whisper_full_get_segment_t1(ctx, i);
|
||||
}
|
||||
|
||||
if (!params.no_timestamps) {
|
||||
printf("[%s --> %s] ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
|
||||
}
|
||||
|
||||
if (params.diarize && pcmf32s.size() == 2) {
|
||||
speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
|
||||
}
|
||||
|
||||
if (params.print_colors) {
|
||||
for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
|
||||
if (params.print_special == false) {
|
||||
const whisper_token id = whisper_full_get_token_id(ctx, i, j);
|
||||
if (id >= whisper_token_eot(ctx)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const char * text = whisper_full_get_token_text(ctx, i, j);
|
||||
const float p = whisper_full_get_token_p (ctx, i, j);
|
||||
|
||||
const int col = std::max(0, std::min((int) k_colors.size() - 1, (int) (std::pow(p, 3)*float(k_colors.size()))));
|
||||
|
||||
printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
|
||||
}
|
||||
} else {
|
||||
const char * text = whisper_full_get_segment_text(ctx, i);
|
||||
|
||||
printf("%s%s", speaker.c_str(), text);
|
||||
}
|
||||
|
||||
if (params.tinydiarize) {
|
||||
if (whisper_full_get_segment_speaker_turn_next(ctx, i)) {
|
||||
printf("%s", params.tdrz_speaker_turn.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// with timestamps or speakers: each segment on new line
|
||||
if (!params.no_timestamps || params.diarize) {
|
||||
printf("\n");
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
std::string output_str(struct whisper_context * ctx, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
||||
std::stringstream result;
|
||||
const int n_segments = whisper_full_n_segments(ctx);
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
const char * text = whisper_full_get_segment_text(ctx, i);
|
||||
std::string speaker = "";
|
||||
|
||||
if (params.diarize && pcmf32s.size() == 2)
|
||||
{
|
||||
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
||||
speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
|
||||
}
|
||||
|
||||
result << speaker << text << "\n";
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void get_req_parameters(const Request & req, whisper_params & params)
|
||||
{
|
||||
// user model configu.has_fileion
|
||||
if (req.has_file("offset-t"))
|
||||
{
|
||||
params.offset_t_ms = std::stoi(req.get_file_value("offset-t").content);
|
||||
}
|
||||
if (req.has_file("offset-n"))
|
||||
{
|
||||
params.offset_n = std::stoi(req.get_file_value("offset-n").content);
|
||||
}
|
||||
if (req.has_file("duration"))
|
||||
{
|
||||
params.duration_ms = std::stoi(req.get_file_value("duration").content);
|
||||
}
|
||||
if (req.has_file("max-context"))
|
||||
{
|
||||
params.max_context = std::stoi(req.get_file_value("max-context").content);
|
||||
}
|
||||
if (req.has_file("prompt"))
|
||||
{
|
||||
params.prompt = req.get_file_value("prompt").content;
|
||||
}
|
||||
if (req.has_file("response-format"))
|
||||
{
|
||||
params.response_format = req.get_file_value("response-format").content;
|
||||
}
|
||||
if (req.has_file("temerature"))
|
||||
{
|
||||
params.userdef_temp = std::stof(req.get_file_value("temperature").content);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
whisper_params params;
|
||||
server_params sparams;
|
||||
|
||||
std::mutex whisper_mutex;
|
||||
|
||||
if (whisper_params_parse(argc, argv, params, sparams) == false) {
|
||||
whisper_print_usage(argc, argv, params, sparams);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1) {
|
||||
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
|
||||
whisper_print_usage(argc, argv, params, sparams);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (params.diarize && params.tinydiarize) {
|
||||
fprintf(stderr, "error: cannot use both --diarize and --tinydiarize\n");
|
||||
whisper_print_usage(argc, argv, params, sparams);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// whisper init
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
// initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
|
||||
whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
|
||||
|
||||
Server svr;
|
||||
|
||||
std::string const default_content = "<html>hello</html>";
|
||||
|
||||
// this is only called if no index.html is found in the public --path
|
||||
svr.Get("/", [&default_content](const Request &, Response &res){
|
||||
res.set_content(default_content, "text/html");
|
||||
return false;
|
||||
});
|
||||
|
||||
svr.Post("/inference", [&](const Request &req, Response &res){
|
||||
// aquire whisper model mutex lock
|
||||
whisper_mutex.lock();
|
||||
|
||||
// first check user requested fields of the request
|
||||
if (!req.has_file("file"))
|
||||
{
|
||||
fprintf(stderr, "error: no 'file' field in the request\n");
|
||||
const std::string error_resp = "{\"error\":\"no 'file' field in the request\"}";
|
||||
res.set_content(error_resp, "application/json");
|
||||
whisper_mutex.unlock();
|
||||
return;
|
||||
}
|
||||
auto audio_file = req.get_file_value("file");
|
||||
|
||||
// check non-required fields
|
||||
get_req_parameters(req, params);
|
||||
|
||||
std::string filename{audio_file.filename};
|
||||
printf("Received request: %s\n", filename.c_str());
|
||||
|
||||
// audio arrays
|
||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||
|
||||
// write file to temporary file
|
||||
std::ofstream temp_file{filename, std::ios::binary};
|
||||
temp_file << audio_file.content;
|
||||
|
||||
// read wav content into pcmf32
|
||||
if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) {
|
||||
fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
|
||||
const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
|
||||
res.set_content(error_resp, "application/json");
|
||||
whisper_mutex.unlock();
|
||||
return;
|
||||
}
|
||||
// remove temp file
|
||||
std::remove(filename.c_str());
|
||||
|
||||
printf("Successfully loaded %s\n", filename.c_str());
|
||||
|
||||
// print system information
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
|
||||
params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
|
||||
}
|
||||
|
||||
// print some info about the processing
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
if (!whisper_is_multilingual(ctx)) {
|
||||
if (params.language != "en" || params.translate) {
|
||||
params.language = "en";
|
||||
params.translate = false;
|
||||
fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
|
||||
}
|
||||
}
|
||||
if (params.detect_language) {
|
||||
params.language = "auto";
|
||||
}
|
||||
fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, %d processors, lang = %s, task = %s, %stimestamps = %d ...\n",
|
||||
__func__, filename.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
||||
params.n_threads, params.n_processors,
|
||||
params.language.c_str(),
|
||||
params.translate ? "translate" : "transcribe",
|
||||
params.tinydiarize ? "tdrz = 1, " : "",
|
||||
params.no_timestamps ? 0 : 1);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
// run the inference
|
||||
{
|
||||
|
||||
printf("Running whisper.cpp inference on %s\n", filename.c_str());
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
wparams.strategy = params.beam_size > 1 ? WHISPER_SAMPLING_BEAM_SEARCH : WHISPER_SAMPLING_GREEDY;
|
||||
|
||||
wparams.print_realtime = false;
|
||||
wparams.print_progress = params.print_progress;
|
||||
wparams.print_timestamps = !params.no_timestamps;
|
||||
wparams.print_special = params.print_special;
|
||||
wparams.translate = params.translate;
|
||||
wparams.language = params.language.c_str();
|
||||
wparams.detect_language = params.detect_language;
|
||||
wparams.n_threads = params.n_threads;
|
||||
wparams.n_max_text_ctx = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx;
|
||||
wparams.offset_ms = params.offset_t_ms;
|
||||
wparams.duration_ms = params.duration_ms;
|
||||
|
||||
wparams.thold_pt = params.word_thold;
|
||||
wparams.split_on_word = params.split_on_word;
|
||||
|
||||
wparams.speed_up = params.speed_up;
|
||||
wparams.debug_mode = params.debug_mode;
|
||||
|
||||
wparams.tdrz_enable = params.tinydiarize; // [TDRZ]
|
||||
|
||||
wparams.initial_prompt = params.prompt.c_str();
|
||||
|
||||
wparams.greedy.best_of = params.best_of;
|
||||
wparams.beam_search.beam_size = params.beam_size;
|
||||
|
||||
wparams.temperature_inc = params.userdef_temp;
|
||||
wparams.entropy_thold = params.entropy_thold;
|
||||
wparams.logprob_thold = params.logprob_thold;
|
||||
|
||||
whisper_print_user_data user_data = { ¶ms, &pcmf32s, 0 };
|
||||
|
||||
// this callback is called on each new segment
|
||||
if (!wparams.print_realtime) {
|
||||
wparams.new_segment_callback = whisper_print_segment_callback;
|
||||
wparams.new_segment_callback_user_data = &user_data;
|
||||
}
|
||||
|
||||
if (wparams.print_progress) {
|
||||
wparams.progress_callback = whisper_print_progress_callback;
|
||||
wparams.progress_callback_user_data = &user_data;
|
||||
}
|
||||
|
||||
// examples for abort mechanism
|
||||
// in examples below, we do not abort the processing, but we could if the flag is set to true
|
||||
|
||||
// the callback is called before every encoder run - if it returns false, the processing is aborted
|
||||
{
|
||||
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||
|
||||
wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
||||
bool is_aborted = *(bool*)user_data;
|
||||
return !is_aborted;
|
||||
};
|
||||
wparams.encoder_begin_callback_user_data = &is_aborted;
|
||||
}
|
||||
|
||||
// the callback is called before every computation - if it returns true, the computation is aborted
|
||||
{
|
||||
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||
|
||||
wparams.abort_callback = [](void * user_data) {
|
||||
bool is_aborted = *(bool*)user_data;
|
||||
return is_aborted;
|
||||
};
|
||||
wparams.abort_callback_user_data = &is_aborted;
|
||||
}
|
||||
|
||||
if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
|
||||
fprintf(stderr, "%s: failed to process audio\n", argv[0]);
|
||||
const std::string error_resp = "{\"error\":\"failed to process audio\"}";
|
||||
res.set_content(error_resp, "application/json");
|
||||
whisper_mutex.unlock();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// return results to user
|
||||
if (params.response_format == text_format)
|
||||
{
|
||||
std::string results = output_str(ctx, params, pcmf32s);
|
||||
res.set_content(results.c_str(), "text/html");
|
||||
}
|
||||
// TODO add more output formats
|
||||
else
|
||||
{
|
||||
std::string results = output_str(ctx, params, pcmf32s);
|
||||
json jres = json{
|
||||
{"text", results}
|
||||
};
|
||||
res.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace),
|
||||
"application/json");
|
||||
}
|
||||
|
||||
// return whisper model mutex lock
|
||||
whisper_mutex.unlock();
|
||||
});
|
||||
svr.Post("/load", [&](const Request &req, Response &res){
|
||||
whisper_mutex.lock();
|
||||
if (!req.has_file("model"))
|
||||
{
|
||||
fprintf(stderr, "error: no 'model' field in the request\n");
|
||||
const std::string error_resp = "{\"error\":\"no 'model' field in the request\"}";
|
||||
res.set_content(error_resp, "application/json");
|
||||
whisper_mutex.unlock();
|
||||
return;
|
||||
}
|
||||
std::string model = req.get_file_value("model").content;
|
||||
if (!is_file_exist(model.c_str()))
|
||||
{
|
||||
fprintf(stderr, "error: 'model': %s not found!\n", model.c_str());
|
||||
const std::string error_resp = "{\"error\":\"model not found!\"}";
|
||||
res.set_content(error_resp, "application/json");
|
||||
whisper_mutex.unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
// clean up
|
||||
whisper_free(ctx);
|
||||
|
||||
// whisper init
|
||||
ctx = whisper_init_from_file_with_params(model.c_str(), cparams);
|
||||
|
||||
// TODO perhaps load prior model here instead of exit
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: model init failed, no model loaded must exit\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
|
||||
whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
|
||||
|
||||
const std::string success = "Load was successful!";
|
||||
res.set_content(success, "application/text");
|
||||
|
||||
// check if the model is in the file system
|
||||
whisper_mutex.unlock();
|
||||
});
|
||||
|
||||
svr.set_exception_handler([](const Request &, Response &res, std::exception_ptr ep) {
|
||||
const char fmt[] = "500 Internal Server Error\n%s";
|
||||
char buf[BUFSIZ];
|
||||
try {
|
||||
std::rethrow_exception(std::move(ep));
|
||||
} catch (std::exception &e) {
|
||||
snprintf(buf, sizeof(buf), fmt, e.what());
|
||||
} catch (...) {
|
||||
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
|
||||
}
|
||||
res.set_content(buf, "text/plain");
|
||||
res.status = 500;
|
||||
});
|
||||
|
||||
svr.set_error_handler([](const Request &, Response &res) {
|
||||
if (res.status == 400) {
|
||||
res.set_content("Invalid request", "text/plain");
|
||||
} else if (res.status != 500) {
|
||||
res.set_content("File Not Found", "text/plain");
|
||||
res.status = 404;
|
||||
}
|
||||
});
|
||||
|
||||
// set timeouts and change hostname and port
|
||||
svr.set_read_timeout(sparams.read_timeout);
|
||||
svr.set_write_timeout(sparams.write_timeout);
|
||||
|
||||
if (!svr.bind_to_port(sparams.hostname, sparams.port))
|
||||
{
|
||||
fprintf(stderr, "\ncouldn't bind to server socket: hostname=%s port=%d\n\n",
|
||||
sparams.hostname.c_str(), sparams.port);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Set the base directory for serving static files
|
||||
svr.set_base_dir(sparams.public_path);
|
||||
|
||||
// to make it ctrl+clickable:
|
||||
printf("\nwhisper server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port);
|
||||
|
||||
if (!svr.listen_after_bind())
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
whisper_print_timings(ctx);
|
||||
whisper_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
@ -53,6 +53,7 @@ struct whisper_params {
|
||||
int32_t capture_id = -1;
|
||||
int32_t max_tokens = 32;
|
||||
int32_t audio_ctx = 0;
|
||||
int32_t n_gpu_layers = 999;
|
||||
|
||||
float vad_thold = 0.6f;
|
||||
float freq_thold = 100.0f;
|
||||
@ -90,6 +91,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ngl" || arg == "--n-gpu-layers") { params.n_gpu_layers = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||
@ -134,6 +136,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
|
||||
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
|
||||
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
|
||||
fprintf(stderr, " -ngl N, --n-gpu-layers N [%-7d] number of layers to store in VRAM\n", params.n_gpu_layers);
|
||||
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
|
||||
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
|
||||
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
||||
@ -268,6 +271,8 @@ int main(int argc, char ** argv) {
|
||||
auto lmparams = llama_model_default_params();
|
||||
if (!params.use_gpu) {
|
||||
lmparams.n_gpu_layers = 0;
|
||||
} else {
|
||||
lmparams.n_gpu_layers = params.n_gpu_layers;
|
||||
}
|
||||
|
||||
struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams);
|
||||
@ -681,8 +686,8 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
text_to_speak = ::replace(text_to_speak, "\"", "");
|
||||
int ret = system((params.speak + " " + std::to_string(voice_id) + " \"" + text_to_speak + "\"").c_str());
|
||||
text_to_speak = ::replace(text_to_speak, "'", "'\"'\"'");
|
||||
int ret = system((params.speak + " " + std::to_string(voice_id) + " '" + text_to_speak + "'").c_str());
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to speak\n", __func__);
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ help()
|
||||
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
||||
echo "options:"
|
||||
echo "-s Step in seconds (default is $step)."
|
||||
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')."
|
||||
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
|
||||
echo "-t Number of threads to use."
|
||||
echo "-h Print this help page."
|
||||
echo
|
||||
|
9
examples/wchess/CMakeLists.txt
Normal file
@ -0,0 +1,9 @@
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_subdirectory(libwchess)
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
add_subdirectory(wchess.wasm)
|
||||
else()
|
||||
add_subdirectory(wchess.cmd)
|
||||
endif()
|
19
examples/wchess/libwchess/CMakeLists.txt
Normal file
@ -0,0 +1,19 @@
|
||||
add_library(libwchess
|
||||
WChess.cpp
|
||||
WChess.h
|
||||
Chessboard.cpp
|
||||
Chessboard.h
|
||||
)
|
||||
|
||||
target_link_libraries(libwchess
|
||||
PUBLIC
|
||||
whisper
|
||||
common
|
||||
)
|
||||
|
||||
target_include_directories(libwchess
|
||||
PUBLIC
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
|
||||
)
|
||||
|
||||
add_executable(test-chessboard test-chessboard.cpp Chessboard.cpp)
|
714
examples/wchess/libwchess/Chessboard.cpp
Normal file
@ -0,0 +1,714 @@
|
||||
#include "Chessboard.h"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <set>
|
||||
|
||||
namespace {
|
||||
// remove std::string_view, c++17 -> c++11
|
||||
constexpr std::array<const char*, 64> positions = {
|
||||
"a1", "b1", "c1", "d1", "e1", "f1", "g1", "h1",
|
||||
"a2", "b2", "c2", "d2", "e2", "f2", "g2", "h2",
|
||||
"a3", "b3", "c3", "d3", "e3", "f3", "g3", "h3",
|
||||
"a4", "b4", "c4", "d4", "e4", "f4", "g4", "h4",
|
||||
"a5", "b5", "c5", "d5", "e5", "f5", "g5", "h5",
|
||||
"a6", "b6", "c6", "d6", "e6", "f6", "g6", "h6",
|
||||
"a7", "b7", "c7", "d7", "e7", "f7", "g7", "h7",
|
||||
"a8", "b8", "c8", "d8", "e8", "f8", "g8", "h8",
|
||||
};
|
||||
constexpr int INVALID_POS = positions.size();
|
||||
constexpr int R = 0; // rank index
|
||||
constexpr int F = 1; // file index
|
||||
#define POS ((c[F] - '1') * 8 + (c[R] - 'a'))
|
||||
constexpr int operator ""_P(const char * c, size_t size) {
|
||||
return size < 2 || POS < 0 || POS > INVALID_POS ? INVALID_POS : POS;
|
||||
}
|
||||
#undef POS
|
||||
|
||||
struct sview {
|
||||
const char * ptr = nullptr;
|
||||
size_t size = 0;
|
||||
|
||||
sview() = default;
|
||||
sview(const char * p, size_t s) : ptr(p), size(s) {}
|
||||
sview(const std::string& s) : ptr(s.data()), size(s.size()) {}
|
||||
|
||||
size_t find(char del, size_t pos) {
|
||||
while (pos < size && ptr[pos] != del) ++pos;
|
||||
return pos < size ? pos : std::string::npos;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<sview> split(sview str, char del) {
|
||||
std::vector<sview> res;
|
||||
size_t cur = 0;
|
||||
size_t last = 0;
|
||||
while (cur != std::string::npos) {
|
||||
if (str.ptr[last] == ' ') {
|
||||
++last;
|
||||
continue;
|
||||
}
|
||||
cur = str.find(del, last);
|
||||
size_t len = cur == std::string::npos ? str.size - last : cur - last;
|
||||
res.emplace_back(str.ptr + last, len);
|
||||
last = cur + 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t strToPos(sview str) {
|
||||
return operator ""_P(str.ptr, str.size);
|
||||
}
|
||||
|
||||
constexpr std::array<const char*, 6> pieceNames = {
|
||||
"pawn", "knight", "bishop", "rook", "queen", "king",
|
||||
};
|
||||
|
||||
int strToType(sview str) {
|
||||
auto it = std::find_if(pieceNames.begin(), pieceNames.end(), [str] (const char* name) { return strncmp(name, str.ptr, str.size) == 0; });
|
||||
return it != pieceNames.end() ? int(it - pieceNames.begin()) : pieceNames.size();
|
||||
}
|
||||
}
|
||||
|
||||
Chessboard::Chessboard()
|
||||
: blackPieces {{
|
||||
{Piece::Pawn, Piece::Black, "a7"_P },
|
||||
{Piece::Pawn, Piece::Black, "b7"_P },
|
||||
{Piece::Pawn, Piece::Black, "c7"_P },
|
||||
{Piece::Pawn, Piece::Black, "d7"_P },
|
||||
{Piece::Pawn, Piece::Black, "e7"_P },
|
||||
{Piece::Pawn, Piece::Black, "f7"_P },
|
||||
{Piece::Pawn, Piece::Black, "g7"_P },
|
||||
{Piece::Pawn, Piece::Black, "h7"_P },
|
||||
{Piece::Rook, Piece::Black, "a8"_P },
|
||||
{Piece::Knight, Piece::Black, "b8"_P },
|
||||
{Piece::Bishop, Piece::Black, "c8"_P },
|
||||
{Piece::Queen, Piece::Black, "d8"_P },
|
||||
{Piece::King, Piece::Black, "e8"_P },
|
||||
{Piece::Bishop, Piece::Black, "f8"_P },
|
||||
{Piece::Knight, Piece::Black, "g8"_P },
|
||||
{Piece::Rook, Piece::Black, "h8"_P },
|
||||
}}
|
||||
, whitePieces {{
|
||||
{Piece::Pawn, Piece::White, "a2"_P },
|
||||
{Piece::Pawn, Piece::White, "b2"_P },
|
||||
{Piece::Pawn, Piece::White, "c2"_P },
|
||||
{Piece::Pawn, Piece::White, "d2"_P },
|
||||
{Piece::Pawn, Piece::White, "e2"_P },
|
||||
{Piece::Pawn, Piece::White, "f2"_P },
|
||||
{Piece::Pawn, Piece::White, "g2"_P },
|
||||
{Piece::Pawn, Piece::White, "h2"_P },
|
||||
{Piece::Rook, Piece::White, "a1"_P },
|
||||
{Piece::Knight, Piece::White, "b1"_P },
|
||||
{Piece::Bishop, Piece::White, "c1"_P },
|
||||
{Piece::Queen, Piece::White, "d1"_P },
|
||||
{Piece::King, Piece::White, "e1"_P },
|
||||
{Piece::Bishop, Piece::White, "f1"_P },
|
||||
{Piece::Knight, Piece::White, "g1"_P },
|
||||
{Piece::Rook, Piece::White, "h1"_P },
|
||||
}}
|
||||
, board {{
|
||||
&whitePieces[ 8], &whitePieces[ 9], &whitePieces[10], &whitePieces[11], &whitePieces[12], &whitePieces[13], &whitePieces[14], &whitePieces[15],
|
||||
&whitePieces[ 0], &whitePieces[ 1], &whitePieces[ 2], &whitePieces[ 3], &whitePieces[ 4], &whitePieces[ 5], &whitePieces[ 6], &whitePieces[ 7],
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
&blackPieces[ 0], &blackPieces[ 1], &blackPieces[ 2], &blackPieces[ 3], &blackPieces[ 4], &blackPieces[ 5], &blackPieces[ 6], &blackPieces[ 7],
|
||||
&blackPieces[ 8], &blackPieces[ 9], &blackPieces[10], &blackPieces[11], &blackPieces[12], &blackPieces[13], &blackPieces[14], &blackPieces[15],
|
||||
}}
|
||||
, whiteMoves {
|
||||
{"b1"_P, "a3"_P}, {"b1"_P, "c3"_P},
|
||||
{"g1"_P, "f3"_P}, {"g1"_P, "h3"_P},
|
||||
{"a2"_P, "a3"_P}, {"a2"_P, "a4"_P},
|
||||
{"b2"_P, "b3"_P}, {"b2"_P, "b4"_P},
|
||||
{"c2"_P, "c3"_P}, {"c2"_P, "c4"_P},
|
||||
{"d2"_P, "d3"_P}, {"d2"_P, "d4"_P},
|
||||
{"e2"_P, "e3"_P}, {"e2"_P, "e4"_P},
|
||||
{"f2"_P, "f3"_P}, {"f2"_P, "f4"_P},
|
||||
{"g2"_P, "g3"_P}, {"g2"_P, "g4"_P},
|
||||
{"h2"_P, "h3"_P}, {"h2"_P, "h4"_P},
|
||||
}
|
||||
, blackMoves {
|
||||
{"a7"_P, "a5"_P}, {"a7"_P, "a6"_P},
|
||||
{"b7"_P, "b5"_P}, {"b7"_P, "b6"_P},
|
||||
{"c7"_P, "c5"_P}, {"c7"_P, "c6"_P},
|
||||
{"d7"_P, "d5"_P}, {"d7"_P, "d6"_P},
|
||||
{"e7"_P, "e5"_P}, {"e7"_P, "e6"_P},
|
||||
{"f7"_P, "f5"_P}, {"f7"_P, "f6"_P},
|
||||
{"g7"_P, "g5"_P}, {"g7"_P, "g6"_P},
|
||||
{"h7"_P, "h5"_P}, {"h7"_P, "h6"_P},
|
||||
{"b8"_P, "a6"_P}, {"b8"_P, "c6"_P},
|
||||
{"g8"_P, "f6"_P}, {"g8"_P, "h6"_P},
|
||||
}
|
||||
|
||||
{
|
||||
static_assert(pieceNames.size() == Chessboard::Piece::Taken, "Mismatch between piece names and types");
|
||||
std::sort(whiteMoves.begin(), whiteMoves.end());
|
||||
std::sort(blackMoves.begin(), blackMoves.end());
|
||||
}
|
||||
|
||||
std::string Chessboard::getRules(const std::string& prompt) const {
|
||||
// leading space is very important!
|
||||
std::string result =
|
||||
"\n"
|
||||
"# leading space is very important!\n"
|
||||
"\n";
|
||||
if (prompt.empty()) {
|
||||
result += "move ::= \" \" ((piece | frompos) \" \" \"to \"?)? topos\n";
|
||||
//result += "move ::= \" \" frompos \" \" \"to \"? topos\n";
|
||||
}
|
||||
else {
|
||||
// result += "move ::= prompt \" \" ((piece | frompos) \" \" \"to \"?)? topos\n"
|
||||
result += "move ::= prompt \" \" frompos \" \" \"to \"? topos\n"
|
||||
"\n"
|
||||
"prompt ::= \" " + prompt + "\"\n";
|
||||
}
|
||||
|
||||
std::set<std::string> pieces;
|
||||
std::set<std::string> from_pos;
|
||||
std::set<std::string> to_pos;
|
||||
auto& allowed_moves = m_moveCounter % 2 ? blackMoves : whiteMoves;
|
||||
for (auto& m : allowed_moves) {
|
||||
if (board[m.first]->type != Piece::Taken) pieces.insert(pieceNames[board[m.first]->type]);
|
||||
from_pos.insert(positions[m.first]);
|
||||
to_pos.insert(positions[m.second]);
|
||||
}
|
||||
if (!pieces.empty()) {
|
||||
result += "piece ::= (";
|
||||
for (auto& p : pieces) result += " \"" + p + "\" |";
|
||||
result.pop_back();
|
||||
result += ")\n\n";
|
||||
}
|
||||
if (!from_pos.empty()) {
|
||||
result += "frompos ::= (";
|
||||
for (auto& p : from_pos) result += " \"" + p + "\" |";
|
||||
result.pop_back();
|
||||
result += ")\n";
|
||||
}
|
||||
if (!to_pos.empty()) {
|
||||
result += "topos ::= (";
|
||||
for (auto& p : to_pos) result += " \"" + p + "\" |";
|
||||
result.pop_back();
|
||||
result += ")\n";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Chessboard::stringifyBoard() {
|
||||
static constexpr std::array<char, 6> blackShort = {
|
||||
'p', 'n', 'b', 'r', 'q', 'k',
|
||||
};
|
||||
static constexpr std::array<char, 6> whiteShort = {
|
||||
'P', 'N', 'B', 'R', 'Q', 'K',
|
||||
};
|
||||
|
||||
std::string result;
|
||||
result.reserve(16 + 2 * 64 + 16);
|
||||
for (char rank = 'a'; rank <= 'h'; ++rank) {
|
||||
result.push_back(rank);
|
||||
result.push_back(' ');
|
||||
}
|
||||
result.back() = '\n';
|
||||
for (int i = 7; i >= 0; --i) {
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
auto p = board[i * 8 + j];
|
||||
if (p) result.push_back(p->color == Piece::White ? whiteShort[p->type] : blackShort[p->type]);
|
||||
else result.push_back((i + j) % 2 ? '.' : '*');
|
||||
result.push_back(' ');
|
||||
}
|
||||
result.push_back('0' + i + 1);
|
||||
result.push_back('\n');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Chessboard::process(const std::string& command) {
|
||||
auto color = Piece::Colors(m_moveCounter % 2);
|
||||
fprintf(stdout, "%s: Command to %s: '%s%.*s%s'\n", __func__, (color ? "Black" : "White"), "\033[1m", int(command.size()), command.data(), "\033[0m");
|
||||
if (command.empty()) return "";
|
||||
auto tokens = split(command, ' ');
|
||||
for (auto& t : tokens) fprintf(stdout, "%s: Token %.*s\n", __func__, int(t.size), t.ptr);
|
||||
auto pos_from = INVALID_POS;
|
||||
auto type = Piece::Types::Taken;
|
||||
auto pos_to = INVALID_POS;
|
||||
if (tokens.size() == 1) {
|
||||
type = Piece::Types::Pawn;
|
||||
pos_to = strToPos(tokens.front());
|
||||
}
|
||||
else {
|
||||
pos_from = strToPos(tokens.front());
|
||||
if (pos_from == INVALID_POS) type = Piece::Types(strToType(tokens.front()));
|
||||
pos_to = strToPos(tokens.back());
|
||||
}
|
||||
if (pos_to == INVALID_POS) return "";
|
||||
if (pos_from == INVALID_POS) {
|
||||
if (type == Piece::Types::Taken) return "";
|
||||
auto& pieces = color ? blackPieces : whitePieces;
|
||||
auto pieceIndex = 0u;
|
||||
for (; pieceIndex < pieces.size(); ++pieceIndex) {
|
||||
if (pieces[pieceIndex].type == type && validateMove(pieces[pieceIndex], pos_to)) break;
|
||||
}
|
||||
if (pieceIndex == pieces.size()) return "";
|
||||
pos_from = pieces[pieceIndex].pos;
|
||||
}
|
||||
if (board[pos_from] == nullptr) return "";
|
||||
if (board[pos_from]->color != color) return "";
|
||||
|
||||
Move m = {pos_from, pos_to};
|
||||
auto& allowed_moves = color ? blackMoves : whiteMoves;
|
||||
fprintf(stdout, "%s:allowed size %d :\n", __func__, int(allowed_moves.size()));
|
||||
for (auto& m : allowed_moves) fprintf(stdout, " %s %s; ", positions[m.first], positions[m.second]);
|
||||
fprintf(stdout, "\n");
|
||||
if (!std::binary_search(allowed_moves.begin(), allowed_moves.end(), m)) return "";
|
||||
|
||||
move(m);
|
||||
|
||||
{
|
||||
auto it = std::remove_if(allowed_moves.begin(), allowed_moves.end(), [m] (const Move& move) { return move.first == m.first; });
|
||||
allowed_moves.erase(it, allowed_moves.end());
|
||||
}
|
||||
|
||||
std::vector<Piece*> affected = { board[m.second] };
|
||||
for (auto& p : whitePieces) {
|
||||
if (&p == board[m.second]
|
||||
|| validateMove(p, m.first)
|
||||
|| validateMove(p, m.second)
|
||||
|| std::binary_search(whiteMoves.begin(), whiteMoves.end(), Move(p.pos, m.second))
|
||||
) {
|
||||
auto it = std::remove_if(whiteMoves.begin(), whiteMoves.end(), [&p] (const Move& m) { return m.first == p.pos; });
|
||||
whiteMoves.erase(it, whiteMoves.end());
|
||||
affected.push_back(&p);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& p : blackPieces) {
|
||||
if (&p == board[m.second]
|
||||
|| validateMove(p, m.first)
|
||||
|| validateMove(p, m.second)
|
||||
|| std::binary_search(blackMoves.begin(), blackMoves.end(), Move(p.pos, m.second))
|
||||
) {
|
||||
auto it = std::remove_if(blackMoves.begin(), blackMoves.end(), [&p] (const Move& m) { return m.first == p.pos; });
|
||||
blackMoves.erase(it, blackMoves.end());
|
||||
affected.push_back(&p);
|
||||
}
|
||||
}
|
||||
for (auto& p : affected) getValidMoves(*p, p->color ? blackMoves : whiteMoves);
|
||||
|
||||
std::sort(blackMoves.begin(), blackMoves.end());
|
||||
std::sort(whiteMoves.begin(), whiteMoves.end());
|
||||
|
||||
std::string result = positions[m.first];
|
||||
result += "-";
|
||||
result += positions[m.second];
|
||||
++m_moveCounter;
|
||||
fprintf(stdout, "%s: Move '%s%s%s'\n", __func__, "\033[1m", result.data(), "\033[0m");
|
||||
return result;
|
||||
}
|
||||
|
||||
void Chessboard::getValidMoves(const Piece& piece, std::vector<Move>& result) {
|
||||
std::string cur = positions[piece.pos];
|
||||
switch (piece.type) {
|
||||
case Piece::Pawn: {
|
||||
std::string next = cur;
|
||||
piece.color ? --next[F] : ++next[F]; // one down / up
|
||||
std::string left = { char(next[R] - 1), next[F]};
|
||||
auto pos = strToPos(left);
|
||||
if (pos != INVALID_POS && board[pos] && board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
std::string right = { char(next[R] + 1), next[F]};
|
||||
pos = strToPos(right);
|
||||
if (pos != INVALID_POS && board[pos] && board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !board[pos]) result.emplace_back(piece.pos, pos);
|
||||
else break;
|
||||
if (piece.color ? cur[F] != '7' : cur[F] != '2') break;
|
||||
piece.color ? --next[F] : ++next[F]; // one down / up
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !board[pos]) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
case Piece::Knight: {
|
||||
std::string next = cur;
|
||||
--next[F]; --next[F]; --next[R];
|
||||
auto pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
--next[F]; --next[F]; ++next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[F]; ++next[F]; --next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[F]; ++next[F]; ++next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
--next[F]; --next[R]; --next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[F]; --next[R]; --next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
--next[F]; ++next[R]; ++next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[F]; ++next[R]; ++next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
case Piece::Bishop: {
|
||||
std::string next = cur;
|
||||
while (true) {
|
||||
--next[R]; --next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
--next[R]; ++next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[R]; --next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[R]; ++next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Piece::Rook: {
|
||||
std::string next = cur;
|
||||
while (true) {
|
||||
--next[R];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[R];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
--next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Piece::Queen: {
|
||||
std::string next = cur;
|
||||
while (true) {
|
||||
--next[R]; --next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
--next[R]; ++next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[R]; --next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[R]; ++next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
--next[R];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[R];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
--next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
next = cur;
|
||||
while (true) {
|
||||
++next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos == INVALID_POS) break;
|
||||
else if (board[pos]) {
|
||||
if (board[pos]->color != piece.color) result.emplace_back(piece.pos, pos);
|
||||
break;
|
||||
}
|
||||
result.emplace_back(piece.pos, pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Piece::King: {
|
||||
std::string next = cur;
|
||||
--next[R]; --next[F];
|
||||
auto pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
--next[R]; ++next[F];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[R]; --next[F];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[R]; ++next[F];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
--next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[R];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
--next[F];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
next = cur;
|
||||
++next[F];
|
||||
pos = strToPos(next);
|
||||
if (pos != INVALID_POS && !(board[pos] && board[pos]->color == piece.color)) result.emplace_back(piece.pos, pos);
|
||||
|
||||
break;
|
||||
}
|
||||
case Piece::Taken: break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
bool Chessboard::validatePawnMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
|
||||
int direction = color == Piece::White ? 1 : -1;
|
||||
bool two_ranks = color == Piece::White ? from_rank == 1 : from_rank == 6;
|
||||
if (from_file == to_file) {
|
||||
if (from_rank == to_rank - direction) return board[to_rank * 8 + to_file] == nullptr;
|
||||
if (two_ranks && from_rank == to_rank - direction * 2) return board[(to_rank - direction) * 8 + to_file] == nullptr && board[to_rank * 8 + to_file] == nullptr;
|
||||
}
|
||||
else if (from_file + 1 == to_file || from_file - 1 == to_file) {
|
||||
if (from_rank == to_rank - direction) return board[to_rank * 8 + to_file] != nullptr && board[to_rank * 8 + to_file]->color != color;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Chessboard::validateKnightMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
|
||||
int dr = std::abs(from_rank - to_rank);
|
||||
int df = std::abs(from_file - to_file);
|
||||
if ((dr == 2 && df == 1) || (dr == 1 && df == 2)) return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Chessboard::validateBishopMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
|
||||
if (from_rank - from_file == to_rank - to_file) {
|
||||
int direction = from_rank < to_rank ? 1 : -1;
|
||||
from_rank += direction;
|
||||
from_file += direction;
|
||||
while (from_rank != to_rank) {
|
||||
if (board[from_rank * 8 + from_file]) return false;
|
||||
from_rank += direction;
|
||||
from_file += direction;
|
||||
}
|
||||
return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
|
||||
}
|
||||
if (from_rank + from_file == to_rank + to_file) {
|
||||
int direction = from_rank < to_rank ? 1 : -1;
|
||||
from_rank += direction;
|
||||
from_file -= direction;
|
||||
while (from_rank != to_rank) {
|
||||
if (board[from_rank * 8 + from_file]) return false;
|
||||
from_rank += direction;
|
||||
from_file -= direction;
|
||||
}
|
||||
return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Chessboard::validateRookMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
|
||||
if (from_rank == to_rank) {
|
||||
int direction = from_file < to_file ? 1 : -1;
|
||||
from_file += direction;
|
||||
while (from_file != to_file) {
|
||||
if (board[from_rank * 8 + from_file]) return false;
|
||||
from_file += direction;
|
||||
}
|
||||
return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
|
||||
}
|
||||
if (from_file == to_file) {
|
||||
int direction = from_rank < to_rank ? 1 : -1;
|
||||
from_rank += direction;
|
||||
while (from_rank != to_rank) {
|
||||
if (board[from_rank * 8 + from_file]) return false;
|
||||
from_rank += direction;
|
||||
}
|
||||
return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Chessboard::validateQueenMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
|
||||
if (validateBishopMove(color, from_rank, from_file, to_rank, to_file)) return true;
|
||||
return validateRookMove(color, from_rank, from_file, to_rank, to_file);
|
||||
}
|
||||
|
||||
bool Chessboard::validateKingMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file) {
|
||||
if (std::abs(from_rank - to_rank) < 2 && std::abs(from_file - to_file) < 2) {
|
||||
return board[to_rank * 8 + to_file] == nullptr || board[to_rank * 8 + to_file]->color != color;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Chessboard::validateMove(const Piece& piece, int pos) {
|
||||
if (piece.type == Piece::Taken) return false;
|
||||
if (piece.pos == pos) return false;
|
||||
int i = piece.pos / 8;
|
||||
int j = piece.pos - i * 8;
|
||||
|
||||
int ii = pos / 8;
|
||||
int jj = pos - ii * 8;
|
||||
|
||||
switch (piece.type) {
|
||||
case Piece::Pawn: return validatePawnMove(piece.color, i, j, ii, jj);
|
||||
case Piece::Knight: return validateKnightMove(piece.color, i, j, ii, jj);
|
||||
case Piece::Bishop: return validateBishopMove(piece.color, i, j, ii, jj);
|
||||
case Piece::Rook: return validateRookMove(piece.color, i, j, ii, jj);
|
||||
case Piece::Queen: return validateQueenMove(piece.color, i, j, ii, jj);
|
||||
case Piece::King: return validateKingMove(piece.color, i, j, ii, jj);
|
||||
default: break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Chessboard::move(const Move& m) {
|
||||
if (!board[m.first] || (board[m.second] && board[m.first]->color == board[m.second]->color)) return false;
|
||||
if (board[m.second]) board[m.second]->type = Piece::Taken;
|
||||
board[m.second] = board[m.first];
|
||||
board[m.first] = nullptr;
|
||||
board[m.second]->pos = m.second;
|
||||
return true;
|
||||
}
|
59
examples/wchess/libwchess/Chessboard.h
Normal file
@ -0,0 +1,59 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
class Chessboard {
|
||||
public:
|
||||
Chessboard();
|
||||
std::string process(const std::string& t);
|
||||
std::string stringifyBoard();
|
||||
std::string getRules(const std::string & prompt) const;
|
||||
using Move = std::pair<int, int>;
|
||||
private:
|
||||
bool move(const Move& move);
|
||||
|
||||
struct Piece {
|
||||
enum Types {
|
||||
Pawn,
|
||||
Knight,
|
||||
Bishop,
|
||||
Rook,
|
||||
Queen,
|
||||
King,
|
||||
Taken,
|
||||
};
|
||||
|
||||
enum Colors {
|
||||
White,
|
||||
Black,
|
||||
};
|
||||
|
||||
Types type;
|
||||
Colors color;
|
||||
int pos;
|
||||
};
|
||||
|
||||
using PieceSet = std::array<Piece, 16>;
|
||||
|
||||
PieceSet blackPieces;
|
||||
PieceSet whitePieces;
|
||||
int m_moveCounter = 0;
|
||||
|
||||
using Board = std::array<Piece*, 64>;
|
||||
Board board;
|
||||
|
||||
std::vector<Move> whiteMoves;
|
||||
std::vector<Move> blackMoves;
|
||||
|
||||
bool validateMove(const Piece& piece, int pos);
|
||||
void getValidMoves(const Piece& piece, std::vector<Move>& moves);
|
||||
// just basic validation
|
||||
// fixme: missing en passant, castling, promotion, etc.
|
||||
bool validatePawnMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
|
||||
bool validateKnightMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
|
||||
bool validateBishopMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
|
||||
bool validateRookMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
|
||||
bool validateQueenMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
|
||||
bool validateKingMove(Piece::Colors color, int from_rank, int from_file, int to_rank, int to_file);
|
||||
};
|
220
examples/wchess/libwchess/WChess.cpp
Normal file
@ -0,0 +1,220 @@
|
||||
#include "WChess.h"
|
||||
#include "Chessboard.h"
|
||||
#include "grammar-parser.h"
|
||||
#include "common.h"
|
||||
#include <thread>
|
||||
|
||||
WChess::WChess(whisper_context * ctx,
|
||||
const whisper_full_params & wparams,
|
||||
callbacks cb,
|
||||
settings s)
|
||||
: m_ctx(ctx)
|
||||
, m_wparams(wparams)
|
||||
, m_cb(cb)
|
||||
, m_settings(s)
|
||||
, m_board(new Chessboard())
|
||||
{}
|
||||
|
||||
WChess::~WChess() = default;
|
||||
|
||||
void WChess::set_status(const std::string& msg) const {
|
||||
if (m_cb.set_status) (*m_cb.set_status)(msg);
|
||||
}
|
||||
|
||||
void WChess::set_moves(const std::string& moves) const {
|
||||
if (m_cb.set_moves) (*m_cb.set_moves)(moves);
|
||||
}
|
||||
|
||||
bool WChess::check_running() const {
|
||||
if (m_cb.check_running) return (*m_cb.check_running)();
|
||||
return false;
|
||||
}
|
||||
|
||||
void WChess::clear_audio() const {
|
||||
if (m_cb.clear_audio) (*m_cb.clear_audio)();
|
||||
}
|
||||
|
||||
void WChess::get_audio(int ms, std::vector<float>& pcmf32) const {
|
||||
if (m_cb.get_audio) (*m_cb.get_audio)(ms, pcmf32);
|
||||
}
|
||||
|
||||
std::string WChess::stringify_board() const {
|
||||
return m_board->stringifyBoard();
|
||||
}
|
||||
|
||||
void WChess::run() {
|
||||
set_status("loading data ...");
|
||||
|
||||
bool have_prompt = true;
|
||||
bool ask_prompt = !have_prompt;
|
||||
|
||||
float logprob_min0 = 0.0f;
|
||||
float logprob_min = 0.0f;
|
||||
|
||||
float logprob_sum0 = 0.0f;
|
||||
float logprob_sum = 0.0f;
|
||||
|
||||
int n_tokens0 = 0;
|
||||
int n_tokens = 0;
|
||||
|
||||
std::vector<float> pcmf32_cur;
|
||||
std::vector<float> pcmf32_prompt;
|
||||
|
||||
const std::string k_prompt = have_prompt ? "" : "checkmate";
|
||||
|
||||
while (check_running()) {
|
||||
// delay
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
|
||||
if (ask_prompt) {
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
|
||||
fprintf(stdout, "\n");
|
||||
|
||||
{
|
||||
char txt[1024];
|
||||
snprintf(txt, sizeof(txt), "Say the following phrase: '%s'", k_prompt.c_str());
|
||||
set_status(txt);
|
||||
}
|
||||
|
||||
ask_prompt = false;
|
||||
}
|
||||
|
||||
int64_t t_ms = 0;
|
||||
|
||||
{
|
||||
get_audio(m_settings.vad_ms, pcmf32_cur);
|
||||
|
||||
if (!pcmf32_cur.empty()) {
|
||||
fprintf(stdout, "%s: Processing ...\n", __func__);
|
||||
set_status("Processing ...");
|
||||
|
||||
if (!have_prompt) {
|
||||
const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
|
||||
|
||||
fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
|
||||
|
||||
const float sim = similarity(txt, k_prompt);
|
||||
|
||||
if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
|
||||
fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
|
||||
ask_prompt = true;
|
||||
} else {
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
|
||||
fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
|
||||
fprintf(stdout, "\n");
|
||||
|
||||
{
|
||||
char txt[1024];
|
||||
snprintf(txt, sizeof(txt), "Success! Waiting for voice commands ...");
|
||||
set_status(txt);
|
||||
}
|
||||
|
||||
// save the audio for the prompt
|
||||
pcmf32_prompt = pcmf32_cur;
|
||||
have_prompt = true;
|
||||
}
|
||||
} else {
|
||||
if (!pcmf32_prompt.empty()) pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
|
||||
static const size_t MIN_SIZE = 1.2 * WHISPER_SAMPLE_RATE;
|
||||
if (MIN_SIZE > pcmf32_cur.size()) pcmf32_cur.insert(pcmf32_cur.begin(), MIN_SIZE - pcmf32_cur.size(), 0.0f);
|
||||
|
||||
std::string rules = m_board->getRules(k_prompt);
|
||||
fprintf(stdout, "%s: grammar rules:\n'%s'\n", __func__, rules.c_str());
|
||||
|
||||
auto grammar_parsed = grammar_parser::parse(rules.c_str());
|
||||
auto grammar_rules = grammar_parsed.c_rules();
|
||||
|
||||
m_wparams.grammar_rules = grammar_rules.data();
|
||||
m_wparams.n_grammar_rules = grammar_rules.size();
|
||||
|
||||
m_wparams.i_start_rule = grammar_parsed.symbol_ids.at("move");
|
||||
auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
|
||||
|
||||
const float p = 100.0f * std::exp(logprob_min);
|
||||
|
||||
fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
|
||||
|
||||
// find the prompt in the text
|
||||
float best_sim = 0.0f;
|
||||
size_t best_len = 0;
|
||||
for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
|
||||
const auto prompt = txt.substr(0, n);
|
||||
|
||||
const float sim = similarity(prompt, k_prompt);
|
||||
|
||||
//fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
|
||||
|
||||
if (sim > best_sim) {
|
||||
best_sim = sim;
|
||||
best_len = n;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stdout, "%s: DEBUG: txt = '%s', prob = %.2f%%\n", __func__, txt.c_str(), p);
|
||||
std::string command = ::trim(txt.substr(best_len));
|
||||
|
||||
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
|
||||
fprintf(stdout, "\n");
|
||||
|
||||
{
|
||||
char txt[1024];
|
||||
snprintf(txt, sizeof(txt), "Command '%s', (t = %d ms)", command.c_str(), (int) t_ms);
|
||||
set_status(txt);
|
||||
}
|
||||
if (!command.empty()) {
|
||||
auto move = m_board->process(command);
|
||||
if (!move.empty()) {
|
||||
set_moves(std::move(move));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clear_audio();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string WChess::transcribe(
|
||||
const std::vector<float> & pcmf32,
|
||||
float & logprob_min,
|
||||
float & logprob_sum,
|
||||
int & n_tokens,
|
||||
int64_t & t_ms) {
|
||||
const auto t_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
logprob_min = 0.0f;
|
||||
logprob_sum = 0.0f;
|
||||
n_tokens = 0;
|
||||
t_ms = 0;
|
||||
|
||||
if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
const int n_segments = whisper_full_n_segments(m_ctx);
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
const char * text = whisper_full_get_segment_text(m_ctx, i);
|
||||
|
||||
result += text;
|
||||
|
||||
const int n = whisper_full_n_tokens(m_ctx, i);
|
||||
for (int j = 0; j < n; ++j) {
|
||||
const auto token = whisper_full_get_token_data(m_ctx, i, j);
|
||||
|
||||
if(token.plog > 0.0f) return {};
|
||||
logprob_min = std::min(logprob_min, token.plog);
|
||||
logprob_sum += token.plog;
|
||||
++n_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
const auto t_end = std::chrono::high_resolution_clock::now();
|
||||
t_ms = std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count();
|
||||
|
||||
return result;
|
||||
}
|
62
examples/wchess/libwchess/WChess.h
Normal file
@ -0,0 +1,62 @@
|
||||
#pragma once
|
||||
#include "whisper.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
class Chessboard;
|
||||
|
||||
class WChess {
|
||||
public:
|
||||
using SetStatusCb = void (*)(const std::string &);
|
||||
using CheckRunningCb = bool (*)();
|
||||
using GetAudioCb = void (*)(int, std::vector<float> &);
|
||||
using SetMovesCb = void (*)(const std::string &);
|
||||
using ClearAudioCb = void (*)();
|
||||
|
||||
struct callbacks {
|
||||
SetStatusCb set_status = nullptr;
|
||||
CheckRunningCb check_running = nullptr;
|
||||
GetAudioCb get_audio = nullptr;
|
||||
SetMovesCb set_moves = nullptr;
|
||||
ClearAudioCb clear_audio = nullptr;
|
||||
};
|
||||
|
||||
struct settings {
|
||||
int32_t vad_ms = 2000;
|
||||
int32_t prompt_ms = 5000;
|
||||
int32_t command_ms = 4000;
|
||||
float vad_thold = 0.2f;
|
||||
float freq_thold = 100.0f;
|
||||
bool print_energy = false;
|
||||
};
|
||||
|
||||
WChess(
|
||||
whisper_context * ctx,
|
||||
const whisper_full_params & wparams,
|
||||
callbacks cb,
|
||||
settings s
|
||||
);
|
||||
~WChess();
|
||||
|
||||
void run();
|
||||
std::string stringify_board() const;
|
||||
private:
|
||||
void get_audio(int ms, std::vector<float>& pcmf32) const;
|
||||
void set_status(const std::string& msg) const;
|
||||
void set_moves(const std::string& moves) const;
|
||||
bool check_running() const;
|
||||
void clear_audio() const;
|
||||
std::string transcribe(
|
||||
const std::vector<float> & pcmf32,
|
||||
float & logprob_min,
|
||||
float & logprob_sum,
|
||||
int & n_tokens,
|
||||
int64_t & t_ms);
|
||||
|
||||
whisper_context * m_ctx;
|
||||
whisper_full_params m_wparams;
|
||||
const callbacks m_cb;
|
||||
const settings m_settings;
|
||||
std::unique_ptr<Chessboard> m_board;
|
||||
};
|
88
examples/wchess/libwchess/test-chessboard.cpp
Normal file
@ -0,0 +1,88 @@
|
||||
#include "Chessboard.h"
|
||||
|
||||
#define ASSERT(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
||||
fflush(stderr); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
int main() {
|
||||
|
||||
{
|
||||
// pawns
|
||||
Chessboard chess;
|
||||
|
||||
ASSERT(chess.process("pawn to d4, e5, e3, pawn to d5") == "d2-d4 e7-e5 e2-e3 d7-d5");
|
||||
ASSERT(chess.process("pawn to d4") == ""); // wrong
|
||||
ASSERT(chess.process("pawn to c5") == ""); // wrong
|
||||
ASSERT(chess.process("pawn to d5") == ""); // wrong
|
||||
ASSERT(chess.process("pawn to d3") == ""); // wrong
|
||||
ASSERT(chess.process("pawn to f5") == ""); // wrong, white's turn
|
||||
ASSERT(chess.process("h4") == "h2-h4");
|
||||
ASSERT(chess.process("d4") == "e5-d4");
|
||||
ASSERT(chess.process("e4") == "e3-e4");
|
||||
ASSERT(chess.process("d4") == ""); // wrong
|
||||
ASSERT(chess.process("e4") == "d5-e4");
|
||||
}
|
||||
|
||||
{
|
||||
// rook
|
||||
Chessboard chess;
|
||||
|
||||
ASSERT(chess.process("rook to a3") == ""); // wrong
|
||||
ASSERT(chess.process("a4, h5, rook to a3, rook to h6") == "a2-a4 h7-h5 a1-a3 h8-h6");
|
||||
ASSERT(chess.process("rook to d3, rook to e6") == "a3-d3 h6-e6");
|
||||
ASSERT(chess.process("rook to d4, rook to e5") == "d3-d4 e6-e5");
|
||||
ASSERT(chess.process("rook to a4") == ""); // wrong
|
||||
ASSERT(chess.process("rook to d8") == ""); // wrong
|
||||
ASSERT(chess.process("rook to d3") == "d4-d3");
|
||||
ASSERT(chess.process("rook to e2") == "e5-e2");
|
||||
}
|
||||
|
||||
{
|
||||
// knight
|
||||
Chessboard chess;
|
||||
|
||||
ASSERT(chess.process("knight to c3, knight to c6") == "b1-c3 b8-c6");
|
||||
ASSERT(chess.process("knight to c3") == ""); // wrong
|
||||
ASSERT(chess.process("knight to a2") == ""); // wrong
|
||||
ASSERT(chess.process("knight to b4") == ""); // wrong, white's turn
|
||||
ASSERT(chess.process("knight to b5") == "c3-b5");
|
||||
ASSERT(chess.process("knight to a5") == "c6-a5");
|
||||
ASSERT(chess.process("knight to c7") == "b5-c7");
|
||||
}
|
||||
|
||||
{
|
||||
// bishop
|
||||
Chessboard chess;
|
||||
|
||||
ASSERT(chess.process("b3, b6, bishop to b2, bishop to b7") == "b2-b3 b7-b6 c1-b2 c8-b7");
|
||||
ASSERT(chess.process("bishop to a1") == ""); // wrong
|
||||
ASSERT(chess.process("bishop to h8") == ""); // wrong
|
||||
ASSERT(chess.process("bishop to a6") == ""); // wrong, white's turn
|
||||
ASSERT(chess.process("bishop to g7") == "b2-g7");
|
||||
}
|
||||
|
||||
{
|
||||
// queen
|
||||
Chessboard chess;
|
||||
ASSERT(chess.process("queen to d8") == ""); // wrong
|
||||
ASSERT(chess.process("queen to f1") == ""); // wrong
|
||||
ASSERT(chess.process("queen to h5") == ""); // wrong
|
||||
ASSERT(chess.process("e3, d5, queen to h5, queen to d6") == "e2-e3 d7-d5 d1-h5 d8-d6");
|
||||
ASSERT(chess.process("queen to c5") == ""); // wrong, white's turn
|
||||
ASSERT(chess.process("queen to f7") == "h5-f7");
|
||||
}
|
||||
|
||||
{
|
||||
// king
|
||||
Chessboard chess;
|
||||
ASSERT(chess.process("d3, d6, king to d2, king to d7, king to c3, king to c6, king to c4") == "d2-d3 d7-d6 e1-d2 e8-d7 d2-c3 d7-c6 c3-c4");
|
||||
ASSERT(chess.process("bishop to e6") == "c8-e6");
|
||||
ASSERT(chess.process("king to b3") == "c4-b3"); // !! check check not implemented
|
||||
}
|
||||
}
|
8
examples/wchess/wchess.cmd/CMakeLists.txt
Normal file
@ -0,0 +1,8 @@
|
||||
if (WHISPER_SDL2)
|
||||
set(TARGET wchess)
|
||||
add_executable(${TARGET} wchess.cmd.cpp)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE libwchess common-sdl ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif ()
|
207
examples/wchess/wchess.cmd/wchess.cmd.cpp
Normal file
@ -0,0 +1,207 @@
|
||||
// Command line voice assisted chess
|
||||
//
|
||||
// Speak chess move commands to the microphone.
|
||||
// The moves will translated to chessboard positions.
|
||||
//
|
||||
//
|
||||
|
||||
#include "WChess.h"
|
||||
#include "common-sdl.h"
|
||||
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
|
||||
// command-line parameters
|
||||
struct whisper_params {
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
int32_t prompt_ms = 5000;
|
||||
int32_t command_ms = 8000;
|
||||
int32_t capture_id = -1;
|
||||
int32_t max_tokens = 32;
|
||||
int32_t audio_ctx = 0;
|
||||
|
||||
float vad_thold = 0.6f;
|
||||
float freq_thold = 100.0f;
|
||||
|
||||
float grammar_penalty = 100.0f;
|
||||
|
||||
bool speed_up = false;
|
||||
bool translate = false;
|
||||
bool print_special = false;
|
||||
bool print_energy = false;
|
||||
bool no_timestamps = true;
|
||||
bool use_gpu = true;
|
||||
|
||||
std::string language = "en";
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
std::string fname_out;
|
||||
std::string commands;
|
||||
std::string prompt;
|
||||
std::string context;
|
||||
std::string grammar;
|
||||
};
|
||||
|
||||
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||
fprintf(stderr, " -pms N, --prompt-ms N [%-7d] prompt duration in milliseconds\n", params.prompt_ms);
|
||||
fprintf(stderr, " -cms N, --command-ms N [%-7d] command duration in milliseconds\n", params.command_ms);
|
||||
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.capture_id);
|
||||
fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
|
||||
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio_ctx);
|
||||
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.vad_thold);
|
||||
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.freq_thold);
|
||||
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
|
||||
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
|
||||
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
||||
fprintf(stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n", params.print_energy ? "true" : "false");
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
fprintf(stderr, " -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n", params.commands.c_str());
|
||||
fprintf(stderr, " -p, --prompt [%-7s] the required activation prompt\n", params.prompt.c_str());
|
||||
fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str());
|
||||
fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if (arg == "-pms" || arg == "--prompt-ms") { params.prompt_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-cms" || arg == "--command-ms") { params.command_ms = std::stoi(argv[++i]); }
|
||||
else if (arg == "-c" || arg == "--capture") { params.capture_id = std::stoi(argv[++i]); }
|
||||
else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
|
||||
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
||||
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
||||
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
||||
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
||||
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
||||
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
||||
else if (arg == "-cmd" || arg == "--commands") { params.commands = argv[++i]; }
|
||||
else if (arg == "-p" || arg == "--prompt") { params.prompt = argv[++i]; }
|
||||
else if (arg == "-ctx" || arg == "--context") { params.context = argv[++i]; }
|
||||
else if ( arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<WChess> g_wchess;
|
||||
void set_moves(const std::string & moves) {
|
||||
if (!moves.empty()) fprintf(stdout, "%s", g_wchess->stringify_board().c_str());
|
||||
}
|
||||
|
||||
audio_async g_audio(30*1000);
|
||||
void get_audio(int ms, std::vector<float> & pcmf32_cur) {
|
||||
g_audio.get(ms, pcmf32_cur);
|
||||
}
|
||||
|
||||
void clear_audio() {
|
||||
g_audio.clear();
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
whisper_params params;
|
||||
|
||||
if (whisper_params_parse(argc, argv, params) == false) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (whisper_lang_id(params.language.c_str()) == -1) {
|
||||
fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
|
||||
// init audio
|
||||
|
||||
if (!g_audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
|
||||
fprintf(stderr, "%s: audio.init() failed!\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
|
||||
|
||||
wparams.print_progress = false;
|
||||
wparams.print_special = params.print_special;
|
||||
wparams.print_realtime = false;
|
||||
wparams.print_timestamps = !params.no_timestamps;
|
||||
wparams.translate = params.translate;
|
||||
wparams.no_context = true;
|
||||
wparams.no_timestamps = params.no_timestamps;
|
||||
wparams.single_segment = true;
|
||||
wparams.max_tokens = params.max_tokens;
|
||||
wparams.language = params.language.c_str();
|
||||
wparams.n_threads = params.n_threads;
|
||||
|
||||
wparams.audio_ctx = params.audio_ctx;
|
||||
wparams.speed_up = params.speed_up;
|
||||
|
||||
wparams.temperature = 0.4f;
|
||||
wparams.temperature_inc = 1.0f;
|
||||
wparams.greedy.best_of = 5;
|
||||
|
||||
wparams.beam_search.beam_size = 5;
|
||||
|
||||
wparams.initial_prompt = params.context.data();
|
||||
|
||||
g_audio.resume();
|
||||
|
||||
// wait for 1 second to avoid any buffered noise
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
g_audio.clear();
|
||||
|
||||
WChess::callbacks cb;
|
||||
cb.check_running = sdl_poll_events;
|
||||
cb.get_audio = get_audio;
|
||||
cb.set_moves = set_moves;
|
||||
cb.clear_audio = clear_audio;
|
||||
|
||||
WChess::settings s;
|
||||
s.vad_ms = 2000;
|
||||
s.prompt_ms = params.prompt_ms;
|
||||
s.command_ms = params.command_ms;
|
||||
s.vad_thold = params.vad_thold;
|
||||
s.freq_thold = params.freq_thold;
|
||||
s.print_energy = params.print_energy;
|
||||
|
||||
g_wchess.reset(new WChess(ctx, wparams, cb, s));
|
||||
set_moves("start");
|
||||
g_wchess->run();
|
||||
|
||||
g_audio.pause();
|
||||
|
||||
whisper_print_timings(ctx);
|
||||
whisper_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
51
examples/wchess/wchess.wasm/CMakeLists.txt
Normal file
@ -0,0 +1,51 @@
|
||||
set(TARGET wchess.wasm)
|
||||
|
||||
add_executable(${TARGET}
|
||||
wchess.wasm.cpp
|
||||
)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE
|
||||
common
|
||||
libwchess
|
||||
)
|
||||
|
||||
unset(EXTRA_FLAGS)
|
||||
|
||||
if (WHISPER_WASM_SINGLE_FILE)
|
||||
set(EXTRA_FLAGS "-s SINGLE_FILE=1")
|
||||
message(STATUS "Embedding WASM inside chess.js")
|
||||
|
||||
add_custom_command(
|
||||
TARGET ${TARGET} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${CMAKE_BINARY_DIR}/bin/${TARGET}.js
|
||||
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/chess.js
|
||||
)
|
||||
endif()
|
||||
|
||||
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
|
||||
--bind \
|
||||
-s USE_PTHREADS=1 \
|
||||
-s PTHREAD_POOL_SIZE=8 \
|
||||
-s INITIAL_MEMORY=1024MB \
|
||||
-s TOTAL_MEMORY=1024MB \
|
||||
-s FORCE_FILESYSTEM=1 \
|
||||
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
|
||||
${EXTRA_FLAGS} \
|
||||
")
|
||||
|
||||
|
||||
add_custom_command(
|
||||
TARGET ${TARGET} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/chessboardjs-1.0.0
|
||||
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/jquery-3.7.1.min.js
|
||||
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/
|
||||
)
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
|
||||
configure_file(${CMAKE_SOURCE_DIR}/examples/helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/helpers.js @ONLY)
|
32
examples/wchess/wchess.wasm/chessboardjs-1.0.0/CHANGELOG.md
Normal file
@ -0,0 +1,32 @@
|
||||
# chessboard.js Change Log
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [1.0.0] - 2019-06-11
|
||||
- Orientation methods now return current orientation. [Issue #64]
|
||||
- Drop support for IE8
|
||||
- Do not check for `window.JSON` (Error #1004)
|
||||
- Rename `ChessBoard` to `Chessboard` (`ChessBoard` is still supported, however)
|
||||
- id query selectors are now supported as the first argument to `Chessboard()`
|
||||
- Remove Error #1002
|
||||
- Format code according to [StandardJS]
|
||||
- Bump minimum jQuery version to 1.8.3
|
||||
- Throttle piece drag functions
|
||||
|
||||
## [0.3.0] - 2013-08-10
|
||||
- Added `appearSpeed` animation config property
|
||||
- Added `onSnapbackEnd` event
|
||||
- Added `onMoveEnd` event
|
||||
|
||||
## [0.2.0] - 2013-08-05
|
||||
- Added `onMouseoverSquare` and `onMouseoutSquare` events
|
||||
- Added `onSnapEnd` event
|
||||
- Added square code as CSS class on the squares
|
||||
- Added [chess.js] integration examples
|
||||
|
||||
## [0.1.0] - 2013-05-21
|
||||
- Initial release
|
||||
|
||||
[chess.js]:https://github.com/jhlywa/chess.js
|
||||
[Issue #64]:https://github.com/oakmac/chessboardjs/issues/64
|
||||
[StandardJS]:https://standardjs.com/
|
20
examples/wchess/wchess.wasm/chessboardjs-1.0.0/LICENSE.md
Normal file
@ -0,0 +1,20 @@
|
||||
Copyright 2019 Chris Oakman
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
82
examples/wchess/wchess.wasm/chessboardjs-1.0.0/README.md
Normal file
@ -0,0 +1,82 @@
|
||||
# chessboard.js
|
||||
|
||||
chessboard.js is a JavaScript chessboard component. It depends on [jQuery].
|
||||
|
||||
Please see [chessboardjs.com] for documentation and examples.
|
||||
|
||||
## What is chessboard.js?
|
||||
|
||||
chessboard.js is a JavaScript chessboard component with a flexible "just a
|
||||
board" API that
|
||||
|
||||
chessboard.js is a standalone JavaScript Chess Board. It is designed to be "just
|
||||
a board" and expose a powerful API so that it can be used in different ways.
|
||||
Here's a non-exhaustive list of things you can do with chessboard.js:
|
||||
|
||||
- Use chessboard.js to show game positions alongside your expert commentary.
|
||||
- Use chessboard.js to have a tactics website where users have to guess the best
|
||||
move.
|
||||
- Integrate chessboard.js and [chess.js] with a PGN database and allow people to
|
||||
search and playback games (see [Example 5000])
|
||||
- Build a chess server and have users play their games out using the
|
||||
chessboard.js board.
|
||||
|
||||
chessboard.js is flexible enough to handle any of these situations with relative
|
||||
ease.
|
||||
|
||||
## What can chessboard.js **not** do?
|
||||
|
||||
The scope of chessboard.js is limited to "just a board." This is intentional and
|
||||
makes chessboard.js flexible for handling a multitude of chess-related problems.
|
||||
|
||||
This is a common source of confusion for new users. [remove?]
|
||||
|
||||
Specifically, chessboard.js does not understand anything about how the game of
|
||||
chess is played: how a knight moves, who's turn is it, is White in check?, etc.
|
||||
|
||||
Fortunately, the powerful [chess.js] library deals with exactly this sort of
|
||||
problem domain and plays nicely with chessboard.js's flexible API. Some examples
|
||||
of chessboard.js combined with chess.js: 5000, 5001, 5002
|
||||
|
||||
Please see the powerful [chess.js] library for an API to deal with these sorts
|
||||
of questions.
|
||||
|
||||
|
||||
This logic is distinct from the logic of the board. Please see the powerful
|
||||
[chess.js] library for this aspect of your application.
|
||||
|
||||
|
||||
|
||||
Here is a list of things that chessboard.js is **not**:
|
||||
|
||||
- A chess engine
|
||||
- A legal move validator
|
||||
- A PGN parser
|
||||
|
||||
chessboard.js is designed to work well with any of those things, but the idea
|
||||
behind chessboard.js is that the logic that controls the board should be
|
||||
independent of those other problems.
|
||||
|
||||
## Docs and Examples
|
||||
|
||||
- Docs - <http://chessboardjs.com/docs>
|
||||
- Examples - <http://chessboardjs.com/examples>
|
||||
|
||||
## Developer Tools
|
||||
|
||||
```sh
|
||||
# create a build in the build/ directory
|
||||
npm run build
|
||||
|
||||
# re-build the website
|
||||
npm run website
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
[MIT License](LICENSE.md)
|
||||
|
||||
[jQuery]:https://jquery.com/
|
||||
[chessboardjs.com]:http://chessboardjs.com
|
||||
[chess.js]:https://github.com/jhlywa/chess.js
|
||||
[Example 5000]:http://chessboardjs.com/examples#5000
|
@ -0,0 +1,54 @@
|
||||
/*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
|
||||
|
||||
.clearfix-7da63 {
|
||||
clear: both;
|
||||
}
|
||||
|
||||
.board-b72b1 {
|
||||
border: 2px solid #404040;
|
||||
box-sizing: content-box;
|
||||
}
|
||||
|
||||
.square-55d63 {
|
||||
float: left;
|
||||
position: relative;
|
||||
|
||||
/* disable any native browser highlighting */
|
||||
-webkit-touch-callout: none;
|
||||
-webkit-user-select: none;
|
||||
-khtml-user-select: none;
|
||||
-moz-user-select: none;
|
||||
-ms-user-select: none;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.white-1e1d7 {
|
||||
background-color: #f0d9b5;
|
||||
color: #b58863;
|
||||
}
|
||||
|
||||
.black-3c85d {
|
||||
background-color: #b58863;
|
||||
color: #f0d9b5;
|
||||
}
|
||||
|
||||
.highlight1-32417, .highlight2-9c5d2 {
|
||||
box-shadow: inset 0 0 3px 3px yellow;
|
||||
}
|
||||
|
||||
.notation-322f9 {
|
||||
cursor: default;
|
||||
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
font-size: 14px;
|
||||
position: absolute;
|
||||
}
|
||||
|
||||
.alpha-d2270 {
|
||||
bottom: 1px;
|
||||
right: 3px;
|
||||
}
|
||||
|
||||
.numeric-fc462 {
|
||||
top: 2px;
|
||||
left: 2px;
|
||||
}
|
2
examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.min.css
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */
|
||||
.clearfix-7da63{clear:both}.board-b72b1{border:2px solid #404040;box-sizing:content-box}.square-55d63{float:left;position:relative;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.white-1e1d7{background-color:#f0d9b5;color:#b58863}.black-3c85d{background-color:#b58863;color:#f0d9b5}.highlight1-32417,.highlight2-9c5d2{box-shadow:inset 0 0 3px 3px #ff0}.notation-322f9{cursor:default;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;position:absolute}.alpha-d2270{bottom:1px;right:3px}.numeric-fc462{top:2px;left:2px}
|
After Width: | Height: | Size: 1.4 KiB |
After Width: | Height: | Size: 2.9 KiB |
After Width: | Height: | Size: 1.8 KiB |
After Width: | Height: | Size: 777 B |
After Width: | Height: | Size: 2.6 KiB |
After Width: | Height: | Size: 748 B |
After Width: | Height: | Size: 2.3 KiB |
After Width: | Height: | Size: 2.8 KiB |
After Width: | Height: | Size: 2.3 KiB |
After Width: | Height: | Size: 1.5 KiB |
After Width: | Height: | Size: 3.7 KiB |
After Width: | Height: | Size: 1.1 KiB |
2
examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0.min.js
vendored
Normal file
29
examples/wchess/wchess.wasm/chessboardjs-1.0.0/package.json
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
"author": "Chris Oakman <chris@oakmac.com> (http://chrisoakman.com/)",
|
||||
"name": "@chrisoakman/chessboardjs",
|
||||
"description": "JavaScript chessboard widget",
|
||||
"homepage": "https://chessboardjs.com",
|
||||
"license": "MIT",
|
||||
"version": "1.0.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git://github.com/oakmac/chessboardjs.git"
|
||||
},
|
||||
"files": ["dist/"],
|
||||
"dependencies": {
|
||||
"jquery": ">=3.4.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"csso": "3.5.1",
|
||||
"fs-plus": "3.1.1",
|
||||
"kidif": "1.1.0",
|
||||
"mustache": "2.3.0",
|
||||
"standard": "10.0.2",
|
||||
"uglify-js": "3.6.0"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "standard lib/chessboard.js && node scripts/build.js",
|
||||
"standard": "standard --fix lib/*.js website/js/*.js",
|
||||
"website": "node scripts/website.js"
|
||||
}
|
||||
}
|
376
examples/wchess/wchess.wasm/index-tmpl.html
Normal file
@ -0,0 +1,376 @@
|
||||
<!doctype html>
|
||||
<html lang="en-us">
|
||||
<head>
|
||||
<title>wchess : Voice assistant example using Whisper + WebAssembly</title>
|
||||
|
||||
<style>
|
||||
#output {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
margin: 0 auto;
|
||||
margin-top: 10px;
|
||||
border-left: 0px;
|
||||
border-right: 0px;
|
||||
padding-left: 0px;
|
||||
padding-right: 0px;
|
||||
display: block;
|
||||
background-color: black;
|
||||
color: white;
|
||||
font-size: 10px;
|
||||
font-family: 'Lucida Console', Monaco, monospace;
|
||||
outline: none;
|
||||
white-space: pre;
|
||||
overflow-wrap: normal;
|
||||
overflow-x: scroll;
|
||||
}
|
||||
</style>
|
||||
<link rel="stylesheet" href="css/chessboard-1.0.0.min.css" integrity="sha384-q94+BZtLrkL1/ohfjR8c6L+A6qzNH9R2hBLwyoAfu3i/WCvQjzL2RQJ3uNHDISdU" crossorigin="anonymous">
|
||||
</head>
|
||||
<body onload="loadWhisper()">
|
||||
<div id="main-container">
|
||||
<b>wchess : Voice assistant example using Whisper + WebAssembly</b>
|
||||
|
||||
<br><br>
|
||||
|
||||
You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">GitHub</a>.
|
||||
|
||||
<br><br>
|
||||
|
||||
<b>More examples:</b>
|
||||
<a href="https://whisper.ggerganov.com/">main</a> |
|
||||
<a href="https://whisper.ggerganov.com/bench">bench</a> |
|
||||
<a href="https://whisper.ggerganov.com/stream">stream</a> |
|
||||
<a href="https://whisper.ggerganov.com/command">command</a> |
|
||||
<a href="https://whisper.ggerganov.com/talk">talk</a> |
|
||||
|
||||
<br><br>
|
||||
|
||||
<hr>
|
||||
|
||||
<div id="model-whisper">
|
||||
Whisper model: <span id="model-whisper-status"></span>
|
||||
<span id="fetch-whisper-progress"></span>
|
||||
<button id="clear" onclick="clearCache()">Clear Cache</button>
|
||||
<!--
|
||||
<input type="file" id="file" name="file" onchange="loadFile(event, 'whisper.bin')" />
|
||||
-->
|
||||
</div>
|
||||
|
||||
<br>
|
||||
<div id="myBoard" style="width: 400px"></div>
|
||||
<script src="js/jquery-3.7.1.min.js"></script>
|
||||
<script src="js/chessboard-1.0.0.min.js"></script>
|
||||
<script>
|
||||
var board = Chessboard('myBoard', 'start')
|
||||
</script>
|
||||
|
||||
<br>
|
||||
|
||||
<div id="input">
|
||||
<button id="toggler" disabled>Hold</button>
|
||||
</div>
|
||||
|
||||
<br>
|
||||
|
||||
<div id="state">
|
||||
Status: <b><span id="state-status">not started</span></b>
|
||||
|
||||
<pre id="state-moves">[The moves will be displayed here]</pre>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
Debug output:
|
||||
<textarea id="output" rows="20"></textarea>
|
||||
|
||||
<br>
|
||||
|
||||
<b>Troubleshooting</b>
|
||||
|
||||
<br><br>
|
||||
|
||||
The page does some heavy computations, so make sure:
|
||||
|
||||
<ul>
|
||||
<li>To use a modern web browser (e.g. Chrome, Firefox)</li>
|
||||
<li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
|
||||
<li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
|
||||
</ul>
|
||||
|
||||
<div class="cell-version">
|
||||
<span>
|
||||
|
|
||||
Build time: <span class="nav-link">@GIT_DATE@</span> |
|
||||
Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
|
||||
Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
|
||||
<a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/command.wasm">Source Code</a> |
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="text/javascript" src="js/helpers.js"></script>
|
||||
<script type='text/javascript'>
|
||||
// web audio context
|
||||
var context = null;
|
||||
|
||||
// the command instance
|
||||
var instance = null;
|
||||
|
||||
// model name
|
||||
var model_whisper = null;
|
||||
|
||||
var Module = {
|
||||
print: printTextarea,
|
||||
printErr: printTextarea,
|
||||
setStatus: function(text) {
|
||||
printTextarea('js: ' + text);
|
||||
},
|
||||
monitorRunDependencies: function(left) {
|
||||
},
|
||||
preRun: function() {
|
||||
printTextarea('js: Preparing ...');
|
||||
},
|
||||
postRun: function() {
|
||||
printTextarea('js: Module initialized successfully!');
|
||||
instance = Module.init('whisper.bin');
|
||||
|
||||
if (instance) {
|
||||
printTextarea("js: whisper initialized, instance: " + instance);
|
||||
}
|
||||
else {
|
||||
printTextarea("js: failed to initialize whisper");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// fetch models
|
||||
//
|
||||
|
||||
let dbVersion = 1
|
||||
let dbName = 'whisper.ggerganov.com';
|
||||
let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
|
||||
|
||||
function storeFS(fname, buf) {
|
||||
// write to WASM file using FS_createDataFile
|
||||
// if the file exists, delete it
|
||||
try {
|
||||
Module.FS_unlink(fname);
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
Module.FS_createDataFile("/", fname, buf, true, true);
|
||||
|
||||
printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
|
||||
|
||||
document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
|
||||
|
||||
if (model_whisper != null) {
|
||||
document.getElementById('toggler').disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
function loadWhisper() {
|
||||
// let urls = {
|
||||
// 'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
|
||||
// 'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
|
||||
|
||||
// 'tiny-en-q5_1': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin',
|
||||
// 'base-en-q5_1': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin',
|
||||
// };
|
||||
|
||||
// let sizes = {
|
||||
// 'tiny.en': 75,
|
||||
// 'base.en': 142,
|
||||
|
||||
// 'tiny-en-q5_1': 31,
|
||||
// 'base-en-q5_1': 57,
|
||||
// };
|
||||
|
||||
let url = 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin';
|
||||
let dst = 'whisper.bin';
|
||||
let size_mb = 75;
|
||||
|
||||
model_whisper = 'tiny.en';
|
||||
|
||||
document.getElementById('model-whisper-status').innerHTML = 'loading "' + model_whisper + '" ... ';
|
||||
|
||||
cbProgress = function(p) {
|
||||
let el = document.getElementById('fetch-whisper-progress');
|
||||
el.innerHTML = Math.round(100*p) + '%';
|
||||
};
|
||||
|
||||
cbCancel = function() {
|
||||
var el;
|
||||
el = document.getElementById('model-whisper-status'); if (el) el.innerHTML = '';
|
||||
};
|
||||
|
||||
loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
|
||||
}
|
||||
|
||||
//
|
||||
// microphone
|
||||
//
|
||||
|
||||
const kSampleRate = 16000;
|
||||
const kRestartRecording_s = 120;
|
||||
const kIntervalAudio_ms = 250; // pass the recorded audio to the C++ instance at this rate
|
||||
|
||||
var mediaRecorder = null;
|
||||
var doRecording = false;
|
||||
var startTime = 0;
|
||||
|
||||
window.AudioContext = window.AudioContext || window.webkitAudioContext;
|
||||
window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
|
||||
|
||||
function stopRecording() {
|
||||
Module.set_status("paused");
|
||||
mediaRecorder.stop();
|
||||
}
|
||||
|
||||
function startRecording() {
|
||||
if (!context) {
|
||||
context = new AudioContext({
|
||||
sampleRate: kSampleRate,
|
||||
channelCount: 1,
|
||||
echoCancellation: false,
|
||||
autoGainControl: true,
|
||||
noiseSuppression: true,
|
||||
});
|
||||
}
|
||||
|
||||
startTime = Date.now();
|
||||
|
||||
var chunks = [];
|
||||
var stream = null;
|
||||
|
||||
navigator.mediaDevices.getUserMedia({audio: true, video: false})
|
||||
.then(function(s) {
|
||||
stream = s;
|
||||
mediaRecorder = new MediaRecorder(stream);
|
||||
mediaRecorder.ondataavailable = function(e) {
|
||||
chunks.push(e.data);
|
||||
|
||||
var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
|
||||
var reader = new FileReader();
|
||||
|
||||
reader.onload = function(event) {
|
||||
var buf = new Uint8Array(reader.result);
|
||||
|
||||
if (!context) {
|
||||
return;
|
||||
}
|
||||
context.decodeAudioData(buf.buffer, function(audioBuffer) {
|
||||
var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
|
||||
var source = offlineContext.createBufferSource();
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(offlineContext.destination);
|
||||
source.start(0);
|
||||
|
||||
offlineContext.startRendering().then(function(renderedBuffer) {
|
||||
let audio = renderedBuffer.getChannelData(0);
|
||||
|
||||
if (instance) {
|
||||
printTextarea('js: number of samples: ' + audio.length);
|
||||
Module.set_audio(instance, audio);
|
||||
}
|
||||
});
|
||||
|
||||
mediaRecorder = null;
|
||||
context = null;
|
||||
});
|
||||
}
|
||||
|
||||
reader.readAsArrayBuffer(blob);
|
||||
};
|
||||
|
||||
mediaRecorder.onstop = function(e) {
|
||||
stream.getTracks().forEach(function(track) {
|
||||
track.stop();
|
||||
});
|
||||
};
|
||||
|
||||
mediaRecorder.start();
|
||||
})
|
||||
.catch(function(err) {
|
||||
printTextarea('js: error getting audio stream: ' + err);
|
||||
});
|
||||
}
|
||||
|
||||
//
|
||||
// main
|
||||
//
|
||||
|
||||
var nLines = 0;
|
||||
var intervalUpdate = null;
|
||||
var movesAll = '';
|
||||
|
||||
document.body.addEventListener('keydown', function(event) {
|
||||
if (event.keyCode === 32) {
|
||||
document.getElementById('toggler').innerText = "Release";
|
||||
onStart();
|
||||
}
|
||||
}, true);
|
||||
|
||||
document.body.addEventListener('keyup', function(event) {
|
||||
if (event.keyCode === 32) {
|
||||
document.getElementById('toggler').innerText = "Hold";
|
||||
onStop();
|
||||
}
|
||||
}, true);
|
||||
|
||||
document.getElementById('toggler').addEventListener('mousedown', function(event) {
|
||||
this.innerText = "Release";
|
||||
onStart();
|
||||
}, true);
|
||||
|
||||
document.getElementById('toggler').addEventListener('mouseup', function(event) {
|
||||
this.innerText = "Hold";
|
||||
onStop();
|
||||
}, true);
|
||||
|
||||
function onStart() {
|
||||
if (!instance) {
|
||||
return;
|
||||
}
|
||||
|
||||
startRecording();
|
||||
}
|
||||
|
||||
function onStop() {
|
||||
printTextarea('js: stopping recording ...');
|
||||
stopRecording();
|
||||
|
||||
var interval = setInterval(function() {
|
||||
var moves = Module.get_moves();
|
||||
|
||||
if (moves != null && moves.length > 1) {
|
||||
clearInterval(interval);
|
||||
|
||||
for (move of moves.split(' ')) {
|
||||
board.move(move);
|
||||
}
|
||||
|
||||
movesAll += moves + '<br>';
|
||||
nLines++;
|
||||
|
||||
// if more than 10 lines, remove the first line
|
||||
if (nLines > 10) {
|
||||
var i = movesAll.indexOf('<br>');
|
||||
if (i > 0) {
|
||||
movesAll = movesAll.substring(i + 4);
|
||||
nLines--;
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('state-status').innerHTML = Module.get_status();
|
||||
document.getElementById('state-moves').innerHTML = movesAll;
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
|
||||
</script>
|
||||
<script type="text/javascript" src="js/chess.js"></script>
|
||||
</body>
|
||||
</html>
|
2
examples/wchess/wchess.wasm/jquery-3.7.1.min.js
vendored
Normal file
173
examples/wchess/wchess.wasm/wchess.wasm.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
#include <WChess.h>
|
||||
#include <emscripten/bind.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
constexpr int N_THREAD = 8;
|
||||
|
||||
std::vector<struct whisper_context *> g_contexts(4, nullptr);
|
||||
|
||||
std::mutex g_mutex;
|
||||
std::thread g_worker;
|
||||
|
||||
std::atomic<bool> g_running(false);
|
||||
|
||||
std::string g_status = "";
|
||||
std::string g_status_forced = "";
|
||||
std::string g_moves = "";
|
||||
|
||||
std::vector<float> g_pcmf32;
|
||||
|
||||
void set_status(const std::string & status) {
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
g_status = status;
|
||||
}
|
||||
|
||||
void set_moves(const std::string & moves) {
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
g_moves = moves;
|
||||
}
|
||||
|
||||
void get_audio(int /* ms */, std::vector<float> & audio) {
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
audio = g_pcmf32;
|
||||
}
|
||||
|
||||
bool check_running() {
|
||||
return g_running;
|
||||
}
|
||||
|
||||
void clear_audio() {
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
g_pcmf32.clear();
|
||||
}
|
||||
|
||||
void wchess_main(size_t i) {
|
||||
struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
|
||||
|
||||
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
|
||||
wparams.offset_ms = 0;
|
||||
wparams.translate = false;
|
||||
wparams.no_context = true;
|
||||
wparams.single_segment = true;
|
||||
wparams.print_realtime = false;
|
||||
wparams.print_progress = false;
|
||||
wparams.print_timestamps = true;
|
||||
wparams.print_special = false;
|
||||
wparams.no_timestamps = true;
|
||||
|
||||
wparams.max_tokens = 32;
|
||||
wparams.audio_ctx = 768; // partial encoder context for better performance
|
||||
|
||||
wparams.temperature = 0.0f;
|
||||
wparams.temperature_inc = 2.0f;
|
||||
wparams.greedy.best_of = 1;
|
||||
|
||||
wparams.beam_search.beam_size = 1;
|
||||
|
||||
wparams.language = "en";
|
||||
|
||||
wparams.grammar_penalty = 100.0;
|
||||
wparams.initial_prompt = "bishop to c3, rook to d4, knight to e5, d4 d5, knight to c3, c3, queen to d4, king b1, pawn to a1, bishop to b2, knight to c3,";
|
||||
|
||||
printf("command: using %d threads\n", wparams.n_threads);
|
||||
|
||||
WChess::callbacks cb;
|
||||
cb.set_status = set_status;
|
||||
cb.check_running = check_running;
|
||||
cb.get_audio = get_audio;
|
||||
cb.set_moves = set_moves;
|
||||
cb.clear_audio = clear_audio;
|
||||
|
||||
WChess(g_contexts[i], wparams, cb, {}).run();
|
||||
if (i < g_contexts.size()) {
|
||||
whisper_free(g_contexts[i]);
|
||||
g_contexts[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
EMSCRIPTEN_BINDINGS(command) {
|
||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
||||
if (g_contexts[i] == nullptr) {
|
||||
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||
if (g_contexts[i] != nullptr) {
|
||||
g_running = true;
|
||||
if (g_worker.joinable()) {
|
||||
g_worker.join();
|
||||
}
|
||||
g_worker = std::thread([i]() {
|
||||
wchess_main(i);
|
||||
});
|
||||
|
||||
return i + 1;
|
||||
} else {
|
||||
return (size_t) 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (size_t) 0;
|
||||
}));
|
||||
|
||||
emscripten::function("free", emscripten::optional_override([](size_t /* index */) {
|
||||
if (g_running) {
|
||||
g_running = false;
|
||||
}
|
||||
}));
|
||||
|
||||
emscripten::function("set_audio", emscripten::optional_override([](size_t index, const emscripten::val & audio) {
|
||||
--index;
|
||||
|
||||
if (index >= g_contexts.size()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (g_contexts[index] == nullptr) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
const int n = audio["length"].as<int>();
|
||||
|
||||
emscripten::val heap = emscripten::val::module_property("HEAPU8");
|
||||
emscripten::val memory = heap["buffer"];
|
||||
|
||||
g_pcmf32.resize(n);
|
||||
|
||||
emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(g_pcmf32.data()), n);
|
||||
memoryView.call<void>("set", audio);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}));
|
||||
|
||||
emscripten::function("get_moves", emscripten::optional_override([]() {
|
||||
std::string moves;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
moves = std::move(g_moves);
|
||||
}
|
||||
|
||||
return moves;
|
||||
}));
|
||||
|
||||
emscripten::function("get_status", emscripten::optional_override([]() {
|
||||
std::string status;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
status = g_status_forced.empty() ? g_status : g_status_forced;
|
||||
}
|
||||
|
||||
return status;
|
||||
}));
|
||||
|
||||
emscripten::function("set_status", emscripten::optional_override([](const std::string & status) {
|
||||
std::lock_guard<std::mutex> lock(g_mutex);
|
||||
g_status_forced = status;
|
||||
}));
|
||||
}
|
15
examples/whisper.android.java/.gitignore
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
*.iml
|
||||
.gradle
|
||||
/local.properties
|
||||
/.idea/caches
|
||||
/.idea/libraries
|
||||
/.idea/modules.xml
|
||||
/.idea/workspace.xml
|
||||
/.idea/navEditor.xml
|
||||
/.idea/assetWizardSettings.xml
|
||||
.DS_Store
|
||||
/build
|
||||
/captures
|
||||
.externalNativeBuild
|
||||
.cxx
|
||||
local.properties
|
20
examples/whisper.android.java/README.md
Normal file
@ -0,0 +1,20 @@
|
||||
A sample Android app using java code and [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions.
|
||||
|
||||
To use:
|
||||
|
||||
1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1]
|
||||
2. Copy the model to the "app/src/main/assets/models" folder.
|
||||
3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)).
|
||||
4. Copy the sample to the "app/src/main/assets/samples" folder.
|
||||
5. Modify the modelFilePath in the WhisperService.java
|
||||
6. Modify the sampleFilePath in the WhisperService.java
|
||||
7. Select the "release" active build variant, and use Android Studio to run and deploy to your device.
|
||||
[^1]: I recommend the tiny or base models for running on an Android device.
|
||||
|
||||
PS:
|
||||
1. Do not move this android project folder individually to other folders, because this android project folder depends on the files of the whole project.
|
||||
2. The cpp code is compiled during the build process
|
||||
3. If you want to import a compiled cpp project in your Android project, please refer to the https://github.com/litongjava/whisper.cpp.android.java.demo
|
||||
|
||||

|
||||
|
BIN
examples/whisper.android.java/README_files/1.jpg
Normal file
After Width: | Height: | Size: 67 KiB |
1
examples/whisper.android.java/app/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/build
|
58
examples/whisper.android.java/app/build.gradle
Normal file
@ -0,0 +1,58 @@
|
||||
plugins {
|
||||
id 'com.android.application'
|
||||
}
|
||||
|
||||
android {
|
||||
compileSdkVersion 30
|
||||
buildToolsVersion '30.0.3'
|
||||
|
||||
defaultConfig {
|
||||
applicationId "com.litongjava.whisper.android.java"
|
||||
minSdkVersion 21
|
||||
targetSdkVersion 30
|
||||
versionCode 1
|
||||
versionName "1.0"
|
||||
|
||||
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
cppFlags ""
|
||||
}
|
||||
}
|
||||
ndk {
|
||||
abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64'
|
||||
}
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
release {
|
||||
signingConfig signingConfigs.debug
|
||||
minifyEnabled true
|
||||
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
|
||||
}
|
||||
}
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path "src/main/jni/whisper/CMakeLists.txt"
|
||||
}
|
||||
}
|
||||
ndkVersion "25.2.9519653"
|
||||
compileOptions {
|
||||
sourceCompatibility JavaVersion.VERSION_1_8
|
||||
targetCompatibility JavaVersion.VERSION_1_8
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'androidx.appcompat:appcompat:1.1.0'
|
||||
implementation 'com.google.android.material:material:1.1.0'
|
||||
implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
|
||||
testImplementation 'junit:junit:4.+'
|
||||
androidTestImplementation 'androidx.test.ext:junit:1.1.5'
|
||||
androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
|
||||
|
||||
//litongjava
|
||||
implementation 'com.litongjava:android-view-inject:1.0'
|
||||
implementation 'com.litongjava:jfinal-aop:1.0.1'
|
||||
implementation 'com.litongjava:litongjava-android-utils:1.0.0'
|
||||
}
|
21
examples/whisper.android.java/app/proguard-rules.pro
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
# Add project specific ProGuard rules here.
|
||||
# You can control the set of applied configuration files using the
|
||||
# proguardFiles setting in build.gradle.
|
||||
#
|
||||
# For more details, see
|
||||
# http://developer.android.com/guide/developing/tools/proguard.html
|
||||
|
||||
# If your project uses WebView with JS, uncomment the following
|
||||
# and specify the fully qualified class name to the JavaScript interface
|
||||
# class:
|
||||
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
|
||||
# public *;
|
||||
#}
|
||||
|
||||
# Uncomment this to preserve the line number information for
|
||||
# debugging stack traces.
|
||||
#-keepattributes SourceFile,LineNumberTable
|
||||
|
||||
# If you keep the line number information, uncomment this to
|
||||
# hide the original source file name.
|
||||
#-renamesourcefileattribute SourceFile
|
@ -0,0 +1,26 @@
|
||||
package com.litongjava.whisper.android.java;
|
||||
|
||||
import android.content.Context;
|
||||
|
||||
import androidx.test.platform.app.InstrumentationRegistry;
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
/**
|
||||
* Instrumented test, which will execute on an Android device.
|
||||
*
|
||||
* @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
|
||||
*/
|
||||
@RunWith(AndroidJUnit4.class)
|
||||
public class ExampleInstrumentedTest {
|
||||
@Test
|
||||
public void useAppContext() {
|
||||
// Context of the app under test.
|
||||
Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
|
||||
assertEquals("com.litongjava.whisper.android.java", appContext.getPackageName());
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.litongjava.whisper.android.java">
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:name=".app.App"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
android:label="@string/app_name"
|
||||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/Theme.Whisperandroidjava">
|
||||
<activity android:name=".MainActivity">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<configuration debug="false" xmlns="http://ch.qos.logback/xml/ns/logback"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://ch.qos.logback/xml/ns/logback https://raw.githubusercontent.com/enricopulatzo/logback-XSD/master/src/main/xsd/logback.xsd
|
||||
http://ch.qos.logback/xml/ns/logback ">
|
||||
<!--Define the storage address of the log file Do not use relative paths in the LogBack configuration. -->
|
||||
<property name="LOG_HOME" value="logs" />
|
||||
<!--Formatted output: %d means the date, %-6level: log level from the left display 6 characters wide, %m: log message, %n is a newline character -->
|
||||
<property name="CONSOLE_LOG_PATTERN"
|
||||
value="%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-6level%logger{0}.%M:%L - %m%n" />
|
||||
|
||||
<!-- console output -->
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
|
||||
<pattern>${CONSOLE_LOG_PATTERN}</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- Generate log files on a daily basis -->
|
||||
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
|
||||
<pattern>${CONSOLE_LOG_PATTERN}</pattern>
|
||||
</encoder>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!--File name for log file output -->
|
||||
<fileNamePattern>${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log</fileNamePattern>
|
||||
<!--Maximum size of log file -->
|
||||
<maxHistory>180</maxHistory>
|
||||
</rollingPolicy>
|
||||
<!--日志文件最大的大小 -->
|
||||
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
|
||||
<maxFileSize>10MB</maxFileSize>
|
||||
</triggeringPolicy>
|
||||
</appender>
|
||||
<!-- Log output level and source-->
|
||||
<root level="info">
|
||||
<appender-ref ref="STDOUT" />
|
||||
<appender-ref ref="FILE" />
|
||||
</root>
|
||||
</configuration>
|
@ -0,0 +1,107 @@
|
||||
package com.litongjava.whisper.android.java;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
|
||||
import android.content.Context;
|
||||
import android.os.Build;
|
||||
import android.os.Bundle;
|
||||
import android.os.Handler;
|
||||
import android.os.Looper;
|
||||
import android.view.View;
|
||||
import android.widget.TextView;
|
||||
|
||||
import com.blankj.utilcode.util.ThreadUtils;
|
||||
import com.litongjava.android.view.inject.annotation.FindViewById;
|
||||
import com.litongjava.android.view.inject.annotation.FindViewByIdLayout;
|
||||
import com.litongjava.android.view.inject.annotation.OnClick;
|
||||
import com.litongjava.android.view.inject.utils.ViewInjectUtils;
|
||||
import com.litongjava.jfinal.aop.Aop;
|
||||
import com.litongjava.jfinal.aop.AopManager;
|
||||
import com.litongjava.whisper.android.java.services.WhisperService;
|
||||
import com.litongjava.whisper.android.java.task.LoadModelTask;
|
||||
import com.litongjava.whisper.android.java.task.TranscriptionTask;
|
||||
import com.litongjava.whisper.android.java.utils.AssetUtils;
|
||||
import com.whispercpp.java.whisper.WhisperLib;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
||||
@FindViewByIdLayout(R.layout.activity_main)
|
||||
public class MainActivity extends AppCompatActivity {
|
||||
|
||||
@FindViewById(R.id.sample_text)
|
||||
private TextView tv;
|
||||
|
||||
Logger log = LoggerFactory.getLogger(this.getClass());
|
||||
private WhisperService whisperService = Aop.get(WhisperService.class);
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
//setContentView(R.layout.activity_main);
|
||||
ViewInjectUtils.injectActivity(this, this);
|
||||
initAopBean();
|
||||
showSystemInfo();
|
||||
}
|
||||
|
||||
private void initAopBean() {
|
||||
Handler mainHandler = new Handler(Looper.getMainLooper());
|
||||
AopManager.me().addSingletonObject(mainHandler);
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
@OnClick(R.id.loadModelBtn)
|
||||
public void loadModelBtn_OnClick(View v) {
|
||||
Context context = getBaseContext();
|
||||
ThreadUtils.executeByIo(new LoadModelTask(tv));
|
||||
}
|
||||
|
||||
@OnClick(R.id.transcriptSampleBtn)
|
||||
public void transcriptSampleBtn_OnClick(View v) {
|
||||
Context context = getBaseContext();
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
String sampleFilePath = "samples/jfk.wav";
|
||||
File filesDir = context.getFilesDir();
|
||||
File sampleFile = AssetUtils.copyFileIfNotExists(context, filesDir, sampleFilePath);
|
||||
long end = System.currentTimeMillis();
|
||||
String msg = "copy file:" + (end - start) + "ms";
|
||||
outputMsg(tv, msg);
|
||||
ThreadUtils.executeByIo(new TranscriptionTask(tv, sampleFile));
|
||||
}
|
||||
|
||||
private void outputMsg(TextView tv, String msg) {
|
||||
tv.append(msg + "\n");
|
||||
log.info(msg);
|
||||
}
|
||||
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
@OnClick(R.id.systemInfoBtn)
|
||||
public void systemInfoBtn_OnClick(View v) {
|
||||
showSystemInfo();
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public void showSystemInfo() {
|
||||
String systemInfo = WhisperLib.getSystemInfo();
|
||||
tv.append(systemInfo + "\n");
|
||||
}
|
||||
|
||||
@OnClick(R.id.clearBtn)
|
||||
public void clearBtn_OnClick(View v) {
|
||||
tv.setText("");
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
@Override
|
||||
protected void onDestroy() {
|
||||
super.onDestroy();
|
||||
whisperService.release();
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package com.litongjava.whisper.android.java.app;
|
||||
|
||||
import android.app.Application;
|
||||
|
||||
import com.blankj.utilcode.util.Utils;
|
||||
|
||||
public class App extends Application {
|
||||
@Override
|
||||
public void onCreate() {
|
||||
super.onCreate();
|
||||
Utils.init(this);
|
||||
}
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
package com.litongjava.whisper.android.java.bean;
|
||||
|
||||
/**
|
||||
* Created by litonglinux@qq.com on 10/21/2023_7:48 AM
|
||||
*/
|
||||
public class WhisperSegment {
|
||||
private long start, end;
|
||||
private String sentence;
|
||||
|
||||
public WhisperSegment() {
|
||||
}
|
||||
|
||||
public WhisperSegment(long start, long end, String sentence) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
public long getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public long getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
public String getSentence() {
|
||||
return sentence;
|
||||
}
|
||||
|
||||
public void setStart(long start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
public void setEnd(long end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
public void setSentence(String sentence) {
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "["+start+" --> "+end+"]:"+sentence;
|
||||
}
|
||||
}
|
@ -0,0 +1,101 @@
|
||||
package com.litongjava.whisper.android.java.services;
|
||||
|
||||
import android.content.Context;
|
||||
import android.os.Build;
|
||||
import android.os.Handler;
|
||||
import android.widget.TextView;
|
||||
import android.widget.Toast;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import com.blankj.utilcode.util.ToastUtils;
|
||||
import com.blankj.utilcode.util.Utils;
|
||||
import com.litongjava.android.utils.dialog.AlertDialogUtils;
|
||||
import com.litongjava.jfinal.aop.Aop;
|
||||
import com.litongjava.whisper.android.java.bean.WhisperSegment;
|
||||
import com.litongjava.whisper.android.java.single.LocalWhisper;
|
||||
import com.litongjava.whisper.android.java.utils.WaveEncoder;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
public class WhisperService {
|
||||
private Logger log = LoggerFactory.getLogger(this.getClass());
|
||||
|
||||
private final Object lock = new Object();
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public void loadModel(TextView tv) {
|
||||
String modelFilePath = LocalWhisper.modelFilePath;
|
||||
String msg = "load model from :" + modelFilePath + "\n";
|
||||
outputMsg(tv, msg);
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
LocalWhisper.INSTANCE.init();
|
||||
long end = System.currentTimeMillis();
|
||||
msg = "model load successful:" + (end - start) + "ms";
|
||||
outputMsg(tv, msg);
|
||||
ToastUtils.showLong(msg);
|
||||
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public void transcribeSample(TextView tv, File sampleFile) {
|
||||
String msg = "";
|
||||
msg = "transcribe file from :" + sampleFile.getAbsolutePath();
|
||||
outputMsg(tv, msg);
|
||||
|
||||
Long start = System.currentTimeMillis();
|
||||
float[] audioData = new float[0]; // 读取音频样本
|
||||
try {
|
||||
audioData = WaveEncoder.decodeWaveFile(sampleFile);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return;
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
msg = "decode wave file:" + (end - start) + "ms";
|
||||
outputMsg(tv, msg);
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
List<WhisperSegment> transcription = null;
|
||||
try {
|
||||
//transcription = LocalWhisper.INSTANCE.transcribeData(audioData);
|
||||
transcription = LocalWhisper.INSTANCE.transcribeDataWithTime(audioData);
|
||||
} catch (ExecutionException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
if(transcription!=null){
|
||||
ToastUtils.showLong(transcription.toString());
|
||||
msg = "Transcript successful:" + (end - start) + "ms";
|
||||
outputMsg(tv, msg);
|
||||
|
||||
outputMsg(tv, transcription.toString());
|
||||
|
||||
}else{
|
||||
msg = "Transcript failed:" + (end - start) + "ms";
|
||||
outputMsg(tv, msg);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void outputMsg(TextView tv, String msg) {
|
||||
log.info(msg);
|
||||
if(tv!=null){
|
||||
Aop.get(Handler.class).post(()->{ tv.append(msg + "\n");});
|
||||
}
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public void release() {
|
||||
//noting to do
|
||||
}
|
||||
}
|
@ -0,0 +1,66 @@
|
||||
package com.litongjava.whisper.android.java.single;
|
||||
|
||||
import android.app.Application;
|
||||
import android.os.Build;
|
||||
import android.os.Handler;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import com.blankj.utilcode.util.ToastUtils;
|
||||
import com.blankj.utilcode.util.Utils;
|
||||
import com.litongjava.jfinal.aop.Aop;
|
||||
import com.litongjava.whisper.android.java.bean.WhisperSegment;
|
||||
import com.litongjava.whisper.android.java.utils.AssetUtils;
|
||||
import com.whispercpp.java.whisper.WhisperContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public enum LocalWhisper {
|
||||
INSTANCE;
|
||||
|
||||
public static final String modelFilePath = "models/ggml-tiny.bin";
|
||||
private WhisperContext whisperContext;
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
LocalWhisper() {
|
||||
Application context = Utils.getApp();
|
||||
File filesDir = context.getFilesDir();
|
||||
File modelFile = AssetUtils.copyFileIfNotExists(context, filesDir, modelFilePath);
|
||||
String realModelFilePath = modelFile.getAbsolutePath();
|
||||
whisperContext = WhisperContext.createContextFromFile(realModelFilePath);
|
||||
}
|
||||
|
||||
public synchronized String transcribeData(float[] data) throws ExecutionException, InterruptedException {
|
||||
if(whisperContext==null){
|
||||
toastModelLoading();
|
||||
return null;
|
||||
}else{
|
||||
return whisperContext.transcribeData(data);
|
||||
}
|
||||
}
|
||||
|
||||
private static void toastModelLoading() {
|
||||
Aop.get(Handler.class).post(()->{
|
||||
ToastUtils.showShort("please wait for model loading");
|
||||
});
|
||||
}
|
||||
|
||||
public List<WhisperSegment> transcribeDataWithTime(float[] audioData) throws ExecutionException, InterruptedException {
|
||||
if(whisperContext==null){
|
||||
toastModelLoading();
|
||||
return null;
|
||||
}else{
|
||||
return whisperContext.transcribeDataWithTime(audioData);
|
||||
}
|
||||
}
|
||||
|
||||
public void init() {
|
||||
//noting to do.but init
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
package com.litongjava.whisper.android.java.task;
|
||||
|
||||
import android.content.Context;
|
||||
import android.os.Build;
|
||||
import android.os.Handler;
|
||||
import android.widget.TextView;
|
||||
|
||||
import com.blankj.utilcode.util.ThreadUtils;
|
||||
import com.litongjava.jfinal.aop.Aop;
|
||||
import com.litongjava.whisper.android.java.services.WhisperService;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public class LoadModelTask extends ThreadUtils.Task<Object> {
|
||||
private final TextView tv;
|
||||
public LoadModelTask(TextView tv) {
|
||||
this.tv = tv;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object doInBackground() {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
Aop.get(WhisperService.class).loadModel(tv);
|
||||
}else{
|
||||
Aop.get(Handler.class).post(()->{
|
||||
tv.append("not supported android devices");
|
||||
});
|
||||
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSuccess(Object result) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCancel() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFail(Throwable t) {
|
||||
}
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
package com.litongjava.whisper.android.java.task;
|
||||
|
||||
import android.content.Context;
|
||||
import android.os.Build;
|
||||
import android.widget.TextView;
|
||||
|
||||
import com.blankj.utilcode.util.ThreadUtils;
|
||||
import com.litongjava.jfinal.aop.Aop;
|
||||
import com.litongjava.whisper.android.java.services.WhisperService;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public class TranscriptionTask extends ThreadUtils.Task<Object> {
|
||||
private final TextView tv;
|
||||
private final File sampleFile;
|
||||
|
||||
public TranscriptionTask(TextView tv, File sampleFile) {
|
||||
this.tv = tv;
|
||||
this.sampleFile = sampleFile;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object doInBackground() {
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
Aop.get(WhisperService.class).transcribeSample(tv, sampleFile);
|
||||
}else{
|
||||
tv.append("not supported android devices");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSuccess(Object result) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCancel() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFail(Throwable t) {
|
||||
}
|
||||
}
|
@ -0,0 +1,91 @@
|
||||
package com.litongjava.whisper.android.java.utils;
|
||||
|
||||
import android.content.Context;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
public class AssetUtils {
|
||||
private static Logger log = LoggerFactory.getLogger(AssetUtils.class);
|
||||
|
||||
public static File copyFileIfNotExists(Context context, File distDir, String filename) {
|
||||
File dstFile = new File(distDir, filename);
|
||||
if (dstFile.exists()) {
|
||||
return dstFile;
|
||||
} else {
|
||||
File parentFile = dstFile.getParentFile();
|
||||
log.info("parentFile:{}", parentFile);
|
||||
if (!parentFile.exists()) {
|
||||
parentFile.mkdirs();
|
||||
}
|
||||
AssetUtils.copyFileFromAssets(context, filename, dstFile);
|
||||
}
|
||||
return dstFile;
|
||||
}
|
||||
|
||||
public static void copyDirectoryFromAssets(Context appCtx, String srcDir, String dstDir) {
|
||||
if (srcDir.isEmpty() || dstDir.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (!new File(dstDir).exists()) {
|
||||
new File(dstDir).mkdirs();
|
||||
}
|
||||
for (String fileName : appCtx.getAssets().list(srcDir)) {
|
||||
String srcSubPath = srcDir + File.separator + fileName;
|
||||
String dstSubPath = dstDir + File.separator + fileName;
|
||||
if (new File(srcSubPath).isDirectory()) {
|
||||
copyDirectoryFromAssets(appCtx, srcSubPath, dstSubPath);
|
||||
} else {
|
||||
copyFileFromAssets(appCtx, srcSubPath, dstSubPath);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void copyFileFromAssets(Context appCtx, String srcPath, String dstPath) {
|
||||
File dstFile = new File(dstPath);
|
||||
copyFileFromAssets(appCtx, srcPath, dstFile);
|
||||
}
|
||||
|
||||
public static void copyFileFromAssets(Context appCtx, String srcPath, File dstFile) {
|
||||
if (srcPath.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
InputStream is = null;
|
||||
OutputStream os = null;
|
||||
try {
|
||||
is = new BufferedInputStream(appCtx.getAssets().open(srcPath));
|
||||
|
||||
os = new BufferedOutputStream(new FileOutputStream(dstFile));
|
||||
byte[] buffer = new byte[1024];
|
||||
int length = 0;
|
||||
while ((length = is.read(buffer)) != -1) {
|
||||
os.write(buffer, 0, length);
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
os.close();
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
package com.litongjava.whisper.android.java.utils;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ShortBuffer;
|
||||
|
||||
public class WaveEncoder {
|
||||
|
||||
public static float[] decodeWaveFile(File file) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
try (FileInputStream fis = new FileInputStream(file)) {
|
||||
byte[] buffer = new byte[1024];
|
||||
int bytesRead;
|
||||
while ((bytesRead = fis.read(buffer)) != -1) {
|
||||
baos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
}
|
||||
ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
|
||||
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
int channel = byteBuffer.getShort(22);
|
||||
byteBuffer.position(44);
|
||||
|
||||
ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
|
||||
short[] shortArray = new short[shortBuffer.limit()];
|
||||
shortBuffer.get(shortArray);
|
||||
|
||||
float[] output = new float[shortArray.length / channel];
|
||||
|
||||
for (int index = 0; index < output.length; index++) {
|
||||
if (channel == 1) {
|
||||
output[index] = Math.max(-1f, Math.min(1f, shortArray[index] / 32767.0f));
|
||||
} else {
|
||||
output[index] = Math.max(-1f, Math.min(1f, (shortArray[2 * index] + shortArray[2 * index + 1]) / 32767.0f / 2.0f));
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
public static void encodeWaveFile(File file, short[] data) throws IOException {
|
||||
try (FileOutputStream fos = new FileOutputStream(file)) {
|
||||
fos.write(headerBytes(data.length * 2));
|
||||
|
||||
ByteBuffer buffer = ByteBuffer.allocate(data.length * 2);
|
||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
buffer.asShortBuffer().put(data);
|
||||
|
||||
byte[] bytes = new byte[buffer.limit()];
|
||||
buffer.get(bytes);
|
||||
|
||||
fos.write(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
private static byte[] headerBytes(int totalLength) {
|
||||
if (totalLength < 44)
|
||||
throw new IllegalArgumentException("Total length must be at least 44 bytes");
|
||||
|
||||
ByteBuffer buffer = ByteBuffer.allocate(44);
|
||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
buffer.put((byte) 'R');
|
||||
buffer.put((byte) 'I');
|
||||
buffer.put((byte) 'F');
|
||||
buffer.put((byte) 'F');
|
||||
|
||||
buffer.putInt(totalLength - 8);
|
||||
|
||||
buffer.put((byte) 'W');
|
||||
buffer.put((byte) 'A');
|
||||
buffer.put((byte) 'V');
|
||||
buffer.put((byte) 'E');
|
||||
|
||||
buffer.put((byte) 'f');
|
||||
buffer.put((byte) 'm');
|
||||
buffer.put((byte) 't');
|
||||
buffer.put((byte) ' ');
|
||||
|
||||
buffer.putInt(16);
|
||||
buffer.putShort((short) 1);
|
||||
buffer.putShort((short) 1);
|
||||
buffer.putInt(16000);
|
||||
buffer.putInt(32000);
|
||||
buffer.putShort((short) 2);
|
||||
buffer.putShort((short) 16);
|
||||
|
||||
buffer.put((byte) 'd');
|
||||
buffer.put((byte) 'a');
|
||||
buffer.put((byte) 't');
|
||||
buffer.put((byte) 'a');
|
||||
|
||||
buffer.putInt(totalLength - 44);
|
||||
buffer.position(0);
|
||||
|
||||
byte[] bytes = new byte[buffer.limit()];
|
||||
buffer.get(bytes);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
}
|
@ -0,0 +1,121 @@
|
||||
package com.whispercpp.java.whisper;
|
||||
|
||||
import android.os.Build;
|
||||
import android.util.Log;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class CpuInfo {
|
||||
private static final String LOG_TAG = "WhisperCpuConfig";
|
||||
|
||||
private List<String> lines;
|
||||
|
||||
public CpuInfo(List<String> lines) {
|
||||
this.lines = lines;
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
public int getHighPerfCpuCount0() {
|
||||
try {
|
||||
return getHighPerfCpuCountByFrequencies();
|
||||
} catch (Exception e) {
|
||||
Log.d(LOG_TAG, "Couldn't read CPU frequencies", e);
|
||||
return getHighPerfCpuCountByVariant();
|
||||
}
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
private int getHighPerfCpuCountByFrequencies() {
|
||||
List<Integer> frequencies = getCpuValues("processor", line -> {
|
||||
try {
|
||||
return getMaxCpuFrequency(Integer.parseInt(line.trim()));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
);
|
||||
Log.d(LOG_TAG, "Binned cpu frequencies (frequency, count): " + binnedValues(frequencies));
|
||||
return countDroppingMin(frequencies);
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
private int getHighPerfCpuCountByVariant() {
|
||||
List<Integer> variants = getCpuValues("CPU variant", line -> Integer.parseInt(line.trim().substring(line.indexOf("0x") + 2), 16));
|
||||
Log.d(LOG_TAG, "Binned cpu variants (variant, count): " + binnedValues(variants));
|
||||
return countKeepingMin(variants);
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
private Map<Integer, Integer> binnedValues(List<Integer> values) {
|
||||
Map<Integer, Integer> countMap = new HashMap<>();
|
||||
for (int value : values) {
|
||||
countMap.put(value, countMap.getOrDefault(value, 0) + 1);
|
||||
}
|
||||
return countMap;
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
private List<Integer> getCpuValues(String property, Mapper mapper) {
|
||||
List<Integer> values = new ArrayList<>();
|
||||
for (String line : lines) {
|
||||
if (line.startsWith(property)) {
|
||||
values.add(mapper.map(line.substring(line.indexOf(':') + 1)));
|
||||
}
|
||||
}
|
||||
values.sort(Integer::compareTo);
|
||||
return values;
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
private int countDroppingMin(List<Integer> values) {
|
||||
int min = values.stream().mapToInt(i -> i).min().orElse(Integer.MAX_VALUE);
|
||||
return (int) values.stream().filter(value -> value > min).count();
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
private int countKeepingMin(List<Integer> values) {
|
||||
int min = values.stream().mapToInt(i -> i).min().orElse(Integer.MAX_VALUE);
|
||||
return (int) values.stream().filter(value -> value.equals(min)).count();
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
public static int getHighPerfCpuCount() {
|
||||
try {
|
||||
return readCpuInfo().getHighPerfCpuCount0();
|
||||
} catch (Exception e) {
|
||||
Log.d(LOG_TAG, "Couldn't read CPU info", e);
|
||||
return Math.max(Runtime.getRuntime().availableProcessors() - 4, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private static CpuInfo readCpuInfo() throws IOException {
|
||||
try (BufferedReader reader = new BufferedReader(new FileReader("/proc/cpuinfo"))) {
|
||||
List<String> lines = new ArrayList<>();
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
lines.add(line);
|
||||
}
|
||||
return new CpuInfo(lines);
|
||||
}
|
||||
}
|
||||
|
||||
private static int getMaxCpuFrequency(int cpuIndex) throws IOException {
|
||||
String path = "/sys/devices/system/cpu/cpu" + cpuIndex + "/cpufreq/cpuinfo_max_freq";
|
||||
try (BufferedReader reader = new BufferedReader(new FileReader(path))) {
|
||||
return Integer.parseInt(reader.readLine());
|
||||
}
|
||||
}
|
||||
|
||||
private interface Mapper {
|
||||
int map(String line);
|
||||
}
|
||||
}
|
@ -0,0 +1,138 @@
|
||||
package com.whispercpp.java.whisper;
|
||||
|
||||
import android.content.res.AssetManager;
|
||||
import android.os.Build;
|
||||
import android.util.Log;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import com.litongjava.whisper.android.java.bean.WhisperSegment;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
public class WhisperContext {
|
||||
|
||||
private static final String LOG_TAG = "LibWhisper";
|
||||
private long ptr;
|
||||
private final ExecutorService executorService;
|
||||
|
||||
private WhisperContext(long ptr) {
|
||||
this.ptr = ptr;
|
||||
this.executorService = Executors.newSingleThreadExecutor();
|
||||
}
|
||||
|
||||
public String transcribeData(float[] data) throws ExecutionException, InterruptedException {
|
||||
return executorService.submit(new Callable<String>() {
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
if (ptr == 0L) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
int numThreads = WhisperCpuConfig.getPreferredThreadCount();
|
||||
Log.d(LOG_TAG, "Selecting " + numThreads + " threads");
|
||||
|
||||
StringBuilder result = new StringBuilder();
|
||||
synchronized (this) {
|
||||
|
||||
WhisperLib.fullTranscribe(ptr, numThreads, data);
|
||||
int textCount = WhisperLib.getTextSegmentCount(ptr);
|
||||
for (int i = 0; i < textCount; i++) {
|
||||
String sentence = WhisperLib.getTextSegment(ptr, i);
|
||||
result.append(sentence);
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
}).get();
|
||||
}
|
||||
|
||||
public List<WhisperSegment> transcribeDataWithTime(float[] data) throws ExecutionException, InterruptedException {
|
||||
return executorService.submit(new Callable<List<WhisperSegment>>() {
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
@Override
|
||||
public List<WhisperSegment> call() throws Exception {
|
||||
if (ptr == 0L) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
int numThreads = WhisperCpuConfig.getPreferredThreadCount();
|
||||
Log.d(LOG_TAG, "Selecting " + numThreads + " threads");
|
||||
|
||||
List<WhisperSegment> segments = new ArrayList<>();
|
||||
synchronized (this) {
|
||||
// StringBuilder result = new StringBuilder();
|
||||
WhisperLib.fullTranscribe(ptr, numThreads, data);
|
||||
int textCount = WhisperLib.getTextSegmentCount(ptr);
|
||||
for (int i = 0; i < textCount; i++) {
|
||||
long start = WhisperLib.getTextSegmentT0(ptr, i);
|
||||
String sentence = WhisperLib.getTextSegment(ptr, i);
|
||||
long end = WhisperLib.getTextSegmentT1(ptr, i);
|
||||
// result.append();
|
||||
segments.add(new WhisperSegment(start, end, sentence));
|
||||
|
||||
}
|
||||
// return result.toString();
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
}).get();
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public String benchMemory(int nthreads) throws ExecutionException, InterruptedException {
|
||||
return executorService.submit(() -> WhisperLib.benchMemcpy(nthreads)).get();
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public String benchGgmlMulMat(int nthreads) throws ExecutionException, InterruptedException {
|
||||
return executorService.submit(() -> WhisperLib.benchGgmlMulMat(nthreads)).get();
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public void release() throws ExecutionException, InterruptedException {
|
||||
executorService.submit(() -> {
|
||||
if (ptr != 0L) {
|
||||
WhisperLib.freeContext(ptr);
|
||||
ptr = 0;
|
||||
}
|
||||
}).get();
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public static WhisperContext createContextFromFile(String filePath) {
|
||||
long ptr = WhisperLib.initContext(filePath);
|
||||
if (ptr == 0L) {
|
||||
throw new RuntimeException("Couldn't create context with path " + filePath);
|
||||
}
|
||||
return new WhisperContext(ptr);
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public static WhisperContext createContextFromInputStream(InputStream stream) {
|
||||
long ptr = WhisperLib.initContextFromInputStream(stream);
|
||||
if (ptr == 0L) {
|
||||
throw new RuntimeException("Couldn't create context from input stream");
|
||||
}
|
||||
return new WhisperContext(ptr);
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public static WhisperContext createContextFromAsset(AssetManager assetManager, String assetPath) {
|
||||
long ptr = WhisperLib.initContextFromAsset(assetManager, assetPath);
|
||||
if (ptr == 0L) {
|
||||
throw new RuntimeException("Couldn't create context from asset " + assetPath);
|
||||
}
|
||||
return new WhisperContext(ptr);
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public static String getSystemInfo() {
|
||||
return WhisperLib.getSystemInfo();
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
package com.whispercpp.java.whisper;
|
||||
|
||||
import android.os.Build;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
public class WhisperCpuConfig {
|
||||
@RequiresApi(api = Build.VERSION_CODES.N)
|
||||
public static int getPreferredThreadCount() {
|
||||
return Math.max(CpuInfo.getHighPerfCpuCount(), 2);
|
||||
}
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
package com.whispercpp.java.whisper;
|
||||
|
||||
import android.content.res.AssetManager;
|
||||
import android.os.Build;
|
||||
import android.util.Log;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public class WhisperLib {
|
||||
private static final String LOG_TAG = "LibWhisper";
|
||||
|
||||
static {
|
||||
|
||||
Log.d(LOG_TAG, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
|
||||
boolean loadVfpv4 = false;
|
||||
boolean loadV8fp16 = false;
|
||||
if (WhisperUtils.isArmEabiV7a()) {
|
||||
String cpuInfo = WhisperUtils.cpuInfo();
|
||||
if (cpuInfo != null) {
|
||||
Log.d(LOG_TAG, "CPU info: " + cpuInfo);
|
||||
if (cpuInfo.contains("vfpv4")) {
|
||||
Log.d(LOG_TAG, "CPU supports vfpv4");
|
||||
loadVfpv4 = true;
|
||||
}
|
||||
}
|
||||
} else if (WhisperUtils.isArmEabiV8a()) {
|
||||
String cpuInfo = WhisperUtils.cpuInfo();
|
||||
if (cpuInfo != null) {
|
||||
Log.d(LOG_TAG, "CPU info: " + cpuInfo);
|
||||
if (cpuInfo.contains("fphp")) {
|
||||
Log.d(LOG_TAG, "CPU supports fp16 arithmetic");
|
||||
loadV8fp16 = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (loadVfpv4) {
|
||||
Log.d(LOG_TAG, "Loading libwhisper_vfpv4.so");
|
||||
System.loadLibrary("whisper_vfpv4");
|
||||
} else if (loadV8fp16) {
|
||||
Log.d(LOG_TAG, "Loading libwhisper_v8fp16_va.so");
|
||||
System.loadLibrary("whisper_v8fp16_va");
|
||||
} else {
|
||||
Log.d(LOG_TAG, "Loading libwhisper.so");
|
||||
System.loadLibrary("whisper");
|
||||
}
|
||||
}
|
||||
|
||||
public static native long initContextFromInputStream(InputStream inputStream);
|
||||
|
||||
public static native long initContextFromAsset(AssetManager assetManager, String assetPath);
|
||||
|
||||
public static native long initContext(String modelPath);
|
||||
|
||||
public static native void freeContext(long contextPtr);
|
||||
|
||||
public static native void fullTranscribe(long contextPtr, int numThreads, float[] audioData);
|
||||
|
||||
public static native int getTextSegmentCount(long contextPtr);
|
||||
|
||||
public static native String getTextSegment(long contextPtr, int index);
|
||||
|
||||
public static native long getTextSegmentT0(long contextPtr, int index);
|
||||
|
||||
public static native long getTextSegmentT1(long contextPtr, int index);
|
||||
|
||||
public static native String getSystemInfo();
|
||||
|
||||
public static native String benchMemcpy(int nthread);
|
||||
|
||||
public static native String benchGgmlMulMat(int nthread);
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package com.whispercpp.java.whisper;
|
||||
|
||||
import android.os.Build;
|
||||
import android.util.Log;
|
||||
|
||||
import androidx.annotation.RequiresApi;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class WhisperUtils {
|
||||
private static final String LOG_TAG = "LibWhisper";
|
||||
|
||||
|
||||
public static boolean isArmEabiV7a() {
|
||||
return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a");
|
||||
}
|
||||
|
||||
public static boolean isArmEabiV8a() {
|
||||
return Build.SUPPORTED_ABIS[0].equals("arm64-v8a");
|
||||
}
|
||||
|
||||
@RequiresApi(api = Build.VERSION_CODES.O)
|
||||
public static String cpuInfo() {
|
||||
try {
|
||||
Path path = new File("/proc/cpuinfo").toPath();
|
||||
return new String(java.nio.file.Files.readAllBytes(path));
|
||||
} catch (Exception e) {
|
||||
Log.w(LOG_TAG, "Couldn't read /proc/cpuinfo", e);
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(whisper.cpp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../)
|
||||
|
||||
set(
|
||||
SOURCE_FILES
|
||||
${WHISPER_LIB_DIR}/ggml.c
|
||||
${WHISPER_LIB_DIR}/ggml-alloc.c
|
||||
${WHISPER_LIB_DIR}/ggml-backend.c
|
||||
${WHISPER_LIB_DIR}/ggml-quants.c
|
||||
${WHISPER_LIB_DIR}/whisper.cpp
|
||||
${CMAKE_SOURCE_DIR}/jni.c
|
||||
)
|
||||
|
||||
find_library(LOG_LIB log)
|
||||
|
||||
function(build_library target_name)
|
||||
add_library(
|
||||
${target_name}
|
||||
SHARED
|
||||
${SOURCE_FILES}
|
||||
)
|
||||
|
||||
target_link_libraries(${target_name} ${LOG_LIB} android)
|
||||
|
||||
if (${target_name} STREQUAL "whisper_v8fp16_va")
|
||||
target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16)
|
||||
elseif (${target_name} STREQUAL "whisper_vfpv4")
|
||||
target_compile_options(${target_name} PRIVATE -mfpu=neon-vfpv4)
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
||||
|
||||
target_compile_options(${target_name} PRIVATE -O3)
|
||||
target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
||||
target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
|
||||
|
||||
#target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
|
||||
#target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
|
||||
#target_link_options(${target_name} PRIVATE -flto)
|
||||
|
||||
endif ()
|
||||
endfunction()
|
||||
|
||||
build_library("whisper") # Default target
|
||||
|
||||
if (${ANDROID_ABI} STREQUAL "arm64-v8a")
|
||||
build_library("whisper_v8fp16_va")
|
||||
elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a")
|
||||
build_library("whisper_vfpv4")
|
||||
endif ()
|
||||
|
||||
include_directories(${WHISPER_LIB_DIR})
|
257
examples/whisper.android.java/app/src/main/jni/whisper/jni.c
Normal file
@ -0,0 +1,257 @@
|
||||
#include <jni.h>
|
||||
#include <android/asset_manager.h>
|
||||
#include <android/asset_manager_jni.h>
|
||||
#include <android/log.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <string.h>
|
||||
#include "whisper.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#define UNUSED(x) (void)(x)
|
||||
#define TAG "JNI"
|
||||
|
||||
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
|
||||
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, TAG, __VA_ARGS__)
|
||||
|
||||
static inline int min(int a, int b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
|
||||
static inline int max(int a, int b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
|
||||
struct input_stream_context {
|
||||
size_t offset;
|
||||
JNIEnv * env;
|
||||
jobject thiz;
|
||||
jobject input_stream;
|
||||
|
||||
jmethodID mid_available;
|
||||
jmethodID mid_read;
|
||||
};
|
||||
|
||||
size_t inputStreamRead(void * ctx, void * output, size_t read_size) {
|
||||
struct input_stream_context* is = (struct input_stream_context*)ctx;
|
||||
|
||||
jint avail_size = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available);
|
||||
jint size_to_copy = read_size < avail_size ? (jint)read_size : avail_size;
|
||||
|
||||
jbyteArray byte_array = (*is->env)->NewByteArray(is->env, size_to_copy);
|
||||
|
||||
jint n_read = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_read, byte_array, 0, size_to_copy);
|
||||
|
||||
if (size_to_copy != read_size || size_to_copy != n_read) {
|
||||
LOGI("Insufficient Read: Req=%zu, ToCopy=%d, Available=%d", read_size, size_to_copy, n_read);
|
||||
}
|
||||
|
||||
jbyte* byte_array_elements = (*is->env)->GetByteArrayElements(is->env, byte_array, NULL);
|
||||
memcpy(output, byte_array_elements, size_to_copy);
|
||||
(*is->env)->ReleaseByteArrayElements(is->env, byte_array, byte_array_elements, JNI_ABORT);
|
||||
|
||||
(*is->env)->DeleteLocalRef(is->env, byte_array);
|
||||
|
||||
is->offset += size_to_copy;
|
||||
|
||||
return size_to_copy;
|
||||
}
|
||||
bool inputStreamEof(void * ctx) {
|
||||
struct input_stream_context* is = (struct input_stream_context*)ctx;
|
||||
|
||||
jint result = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available);
|
||||
return result <= 0;
|
||||
}
|
||||
void inputStreamClose(void * ctx) {
|
||||
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_initContextFromInputStream(
|
||||
JNIEnv *env, jobject thiz, jobject input_stream) {
|
||||
UNUSED(thiz);
|
||||
|
||||
struct whisper_context *context = NULL;
|
||||
struct whisper_model_loader loader = {};
|
||||
struct input_stream_context inp_ctx = {};
|
||||
|
||||
inp_ctx.offset = 0;
|
||||
inp_ctx.env = env;
|
||||
inp_ctx.thiz = thiz;
|
||||
inp_ctx.input_stream = input_stream;
|
||||
|
||||
jclass cls = (*env)->GetObjectClass(env, input_stream);
|
||||
inp_ctx.mid_available = (*env)->GetMethodID(env, cls, "available", "()I");
|
||||
inp_ctx.mid_read = (*env)->GetMethodID(env, cls, "read", "([BII)I");
|
||||
|
||||
loader.context = &inp_ctx;
|
||||
loader.read = inputStreamRead;
|
||||
loader.eof = inputStreamEof;
|
||||
loader.close = inputStreamClose;
|
||||
|
||||
loader.eof(loader.context);
|
||||
|
||||
context = whisper_init(&loader);
|
||||
return (jlong) context;
|
||||
}
|
||||
|
||||
static size_t asset_read(void *ctx, void *output, size_t read_size) {
|
||||
return AAsset_read((AAsset *) ctx, output, read_size);
|
||||
}
|
||||
|
||||
static bool asset_is_eof(void *ctx) {
|
||||
return AAsset_getRemainingLength64((AAsset *) ctx) <= 0;
|
||||
}
|
||||
|
||||
static void asset_close(void *ctx) {
|
||||
AAsset_close((AAsset *) ctx);
|
||||
}
|
||||
|
||||
static struct whisper_context *whisper_init_from_asset(
|
||||
JNIEnv *env,
|
||||
jobject assetManager,
|
||||
const char *asset_path
|
||||
) {
|
||||
LOGI("Loading model from asset '%s'\n", asset_path);
|
||||
AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager);
|
||||
AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING);
|
||||
if (!asset) {
|
||||
LOGW("Failed to open '%s'\n", asset_path);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
whisper_model_loader loader = {
|
||||
.context = asset,
|
||||
.read = &asset_read,
|
||||
.eof = &asset_is_eof,
|
||||
.close = &asset_close
|
||||
};
|
||||
|
||||
return whisper_init(&loader);
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_initContextFromAsset(
|
||||
JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = NULL;
|
||||
const char *asset_path_chars = (*env)->GetStringUTFChars(env, asset_path_str, NULL);
|
||||
context = whisper_init_from_asset(env, assetManager, asset_path_chars);
|
||||
(*env)->ReleaseStringUTFChars(env, asset_path_str, asset_path_chars);
|
||||
return (jlong) context;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_initContext(
|
||||
JNIEnv *env, jobject thiz, jstring model_path_str) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = NULL;
|
||||
const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL);
|
||||
context = whisper_init_from_file(model_path_chars);
|
||||
(*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars);
|
||||
return (jlong) context;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_freeContext(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
||||
UNUSED(env);
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
whisper_free(context);
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_fullTranscribe(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL);
|
||||
const jsize audio_data_length = (*env)->GetArrayLength(env, audio_data);
|
||||
|
||||
// The below adapted from the Objective-C iOS sample
|
||||
struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||
params.print_realtime = true;
|
||||
params.print_progress = false;
|
||||
params.print_timestamps = true;
|
||||
params.print_special = false;
|
||||
params.translate = false;
|
||||
params.language = "en";
|
||||
params.n_threads = num_threads;
|
||||
params.offset_ms = 0;
|
||||
params.no_context = true;
|
||||
params.single_segment = false;
|
||||
|
||||
whisper_reset_timings(context);
|
||||
|
||||
LOGI("About to run whisper_full");
|
||||
if (whisper_full(context, params, audio_data_arr, audio_data_length) != 0) {
|
||||
LOGI("Failed to run the model");
|
||||
} else {
|
||||
whisper_print_timings(context);
|
||||
}
|
||||
(*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT);
|
||||
}
|
||||
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_getTextSegmentCount(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr) {
|
||||
UNUSED(env);
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
return whisper_full_n_segments(context);
|
||||
}
|
||||
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_getTextSegment(
|
||||
JNIEnv *env, jobject thiz, jlong context_ptr, jint index) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
const char *text = whisper_full_get_segment_text(context, index);
|
||||
jstring string = (*env)->NewStringUTF(env, text);
|
||||
return string;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_getTextSegmentT0(JNIEnv *env, jobject thiz,jlong context_ptr, jint index) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
const int64_t t0 = whisper_full_get_segment_t0(context, index);
|
||||
return (jlong)t0;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_getTextSegmentT1(JNIEnv *env, jobject thiz,jlong context_ptr, jint index) {
|
||||
UNUSED(thiz);
|
||||
struct whisper_context *context = (struct whisper_context *) context_ptr;
|
||||
const int64_t t1 = whisper_full_get_segment_t1(context, index);
|
||||
return (jlong)t1;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_getSystemInfo(
|
||||
JNIEnv *env, jobject thiz
|
||||
) {
|
||||
UNUSED(thiz);
|
||||
const char *sysinfo = whisper_print_system_info();
|
||||
jstring string = (*env)->NewStringUTF(env, sysinfo);
|
||||
return string;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_benchMemcpy(JNIEnv *env, jobject thiz,
|
||||
jint n_threads) {
|
||||
UNUSED(thiz);
|
||||
const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads);
|
||||
jstring string = (*env)->NewStringUTF(env, bench_ggml_memcpy);
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_whispercpp_java_whisper_WhisperLib_benchGgmlMulMat(JNIEnv *env, jobject thiz,
|
||||
jint n_threads) {
|
||||
UNUSED(thiz);
|
||||
const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads);
|
||||
jstring string = (*env)->NewStringUTF(env, bench_ggml_mul_mat);
|
||||
}
|
||||
|
@ -0,0 +1,30 @@
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:aapt="http://schemas.android.com/aapt"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
|
||||
<aapt:attr name="android:fillColor">
|
||||
<gradient
|
||||
android:endX="85.84757"
|
||||
android:endY="92.4963"
|
||||
android:startX="42.9492"
|
||||
android:startY="49.59793"
|
||||
android:type="linear">
|
||||
<item
|
||||
android:color="#44000000"
|
||||
android:offset="0.0" />
|
||||
<item
|
||||
android:color="#00000000"
|
||||
android:offset="1.0" />
|
||||
</gradient>
|
||||
</aapt:attr>
|
||||
</path>
|
||||
<path
|
||||
android:fillColor="#FFFFFF"
|
||||
android:fillType="nonZero"
|
||||
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000" />
|
||||
</vector>
|
@ -0,0 +1,170 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillColor="#3DDC84"
|
||||
android:pathData="M0,0h108v108h-108z" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M9,0L9,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,0L19,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,0L29,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,0L39,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,0L49,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,0L59,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,0L69,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,0L79,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M89,0L89,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M99,0L99,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,9L108,9"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,19L108,19"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,29L108,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,39L108,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,49L108,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,59L108,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,69L108,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,79L108,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,89L108,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,99L108,99"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,29L89,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,39L89,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,49L89,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,59L89,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,69L89,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,79L89,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,19L29,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,19L39,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,19L49,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,19L59,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,19L69,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,19L79,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
</vector>
|
@ -0,0 +1,57 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
android:orientation="vertical"
|
||||
tools:context=".MainActivity">
|
||||
|
||||
<LinearLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content">
|
||||
|
||||
<Button
|
||||
android:id="@+id/systemInfoBtn"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="System Info" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/loadModelBtn"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Load model" />
|
||||
|
||||
</LinearLayout>
|
||||
|
||||
<LinearLayout
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content">
|
||||
|
||||
<Button
|
||||
android:id="@+id/transcriptSampleBtn"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Transcribe sample" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/clearBtn"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Clear" />
|
||||
</LinearLayout>
|
||||
|
||||
<TextView
|
||||
android:id="@+id/sample_text"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Hello World!"
|
||||
app:layout_constraintBottom_toBottomOf="parent"
|
||||
app:layout_constraintLeft_toLeftOf="parent"
|
||||
app:layout_constraintRight_toRightOf="parent"
|
||||
app:layout_constraintTop_toTopOf="parent"
|
||||
android:scrollbarAlwaysDrawHorizontalTrack="true"
|
||||
android:maxLines="999"/>
|
||||
|
||||
</LinearLayout>
|
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
After Width: | Height: | Size: 3.5 KiB |
After Width: | Height: | Size: 5.2 KiB |
After Width: | Height: | Size: 2.6 KiB |
After Width: | Height: | Size: 3.3 KiB |
After Width: | Height: | Size: 4.8 KiB |
After Width: | Height: | Size: 7.3 KiB |