diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 38e476b9..974ecda5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -396,6 +396,32 @@ jobs: cd examples/whisper.android ./gradlew assembleRelease --no-daemon + android_java: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v3 + + - name: set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'temurin' + cache: gradle + + - name: Setup Android SDK + uses: android-actions/setup-android@v2 + with: + api-level: 30 + build-tools-version: 30.0.3 + + - name: Build + run: | + cd examples/whisper.android.java + chmod +x ./gradlew + ./gradlew assembleRelease + java: needs: [ 'windows' ] runs-on: windows-latest diff --git a/.gitignore b/.gitignore index 9ff35d00..00325823 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,7 @@ bindings/java/.idea/ .idea/ benchmark_results.csv +cmake-build-debug/ +.cxx/ +.gradle/ +local.properties \ No newline at end of file diff --git a/bindings/java/build.gradle b/bindings/java/build.gradle index 8f7a5fd9..75f3a9cd 100644 --- a/bindings/java/build.gradle +++ b/bindings/java/build.gradle @@ -9,6 +9,7 @@ archivesBaseName = 'whispercpp' group = 'io.github.ggerganov' version = '1.4.0' + sourceCompatibility = 1.8 targetCompatibility = 1.8 diff --git a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java index 4a250403..4c1594d5 100644 --- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java +++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java @@ -2,6 +2,7 @@ package io.github.ggerganov.whispercpp; import com.sun.jna.Native; import com.sun.jna.Pointer; +import io.github.ggerganov.whispercpp.bean.WhisperSegment; import io.github.ggerganov.whispercpp.params.WhisperContextParams; import io.github.ggerganov.whispercpp.params.WhisperFullParams; import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy; @@ -9,6 +10,8 @@ import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer. @@ -160,6 +163,28 @@ public class WhisperCpp implements AutoCloseable { return str.toString().trim(); } + public List fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException { + if (ctx == null) { + throw new IllegalStateException("Model not initialised"); + } + + if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) { + throw new IOException("Failed to process audio"); + } + + int nSegments = lib.whisper_full_n_segments(ctx); + List segments= new ArrayList<>(nSegments); + + + for (int i = 0; i < nSegments; i++) { + long t0 = lib.whisper_full_get_segment_t0(ctx, i); + String text = lib.whisper_full_get_segment_text(ctx, i); + long t1 = lib.whisper_full_get_segment_t1(ctx, i); + segments.add(new WhisperSegment(t0,t1,text)); + } + + return segments; + } // public int getTextSegmentCount(Pointer ctx) { // return lib.whisper_full_n_segments(ctx); diff --git a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java new file mode 100644 index 00000000..da970b58 --- /dev/null +++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java @@ -0,0 +1,47 @@ +package io.github.ggerganov.whispercpp.bean; + +/** + * Created by litonglinux@qq.com on 10/21/2023_7:48 AM + */ +public class WhisperSegment { + private long start, end; + private String sentence; + + public WhisperSegment() { + } + + public WhisperSegment(long start, long end, String sentence) { + this.start = start; + this.end = end; + this.sentence = sentence; + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; + } + + public String getSentence() { + return sentence; + } + + public void setStart(long start) { + this.start = start; + } + + public void setEnd(long end) { + this.end = end; + } + + public void setSentence(String sentence) { + this.sentence = sentence; + } + + @Override + public String toString() { + return "[" + start + " --> " + end + "]:" + sentence; + } +} diff --git a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java index 66e18f9a..ccc3be89 100644 --- a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java +++ b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java @@ -2,6 +2,7 @@ package io.github.ggerganov.whispercpp; import static org.junit.jupiter.api.Assertions.*; +import io.github.ggerganov.whispercpp.bean.WhisperSegment; import io.github.ggerganov.whispercpp.params.CBool; import io.github.ggerganov.whispercpp.params.WhisperFullParams; import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy; @@ -11,6 +12,7 @@ import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import java.io.File; import java.io.FileNotFoundException; +import java.util.List; class WhisperCppTest { private static WhisperCpp whisper = new WhisperCpp(); @@ -20,7 +22,8 @@ class WhisperCppTest { static void init() throws FileNotFoundException { // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin" // or you can provide the absolute path to the model file. - String modelName = "../../models/ggml-tiny.en.bin"; + String modelName = "../../models/ggml-tiny.bin"; +// String modelName = "../../models/ggml-tiny.en.bin"; try { whisper.initContext(modelName); // whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY); @@ -99,4 +102,44 @@ class WhisperCppTest { audioInputStream.close(); } } + + @Test + void testFullTranscribeWithTime() throws Exception { + if (!modelInitialised) { + System.out.println("Model not initialised, skipping test"); + return; + } + + // Given + File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav"); + AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file); + + byte[] b = new byte[audioInputStream.available()]; + float[] floats = new float[b.length / 2]; + +// WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY); + WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH); + params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress)); + params.print_progress = CBool.FALSE; +// params.initial_prompt = "and so my fellow Americans um, like"; + + + try { + audioInputStream.read(b); + + for (int i = 0, j = 0; i < b.length; i += 2, j++) { + int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF; + floats[j] = intSample / 32767.0f; + } + + List segments = whisper.fullTranscribeWithTime(params, floats); + assertTrue(segments.size() > 0, "The size of segments should be greater than 0"); + for (WhisperSegment segment : segments) { + System.out.println(segment); + } + } finally { + audioInputStream.close(); + } + } + } diff --git a/examples/whisper.android.java/.gitignore b/examples/whisper.android.java/.gitignore new file mode 100644 index 00000000..aa724b77 --- /dev/null +++ b/examples/whisper.android.java/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/examples/whisper.android.java/README.md b/examples/whisper.android.java/README.md new file mode 100644 index 00000000..44675ab8 --- /dev/null +++ b/examples/whisper.android.java/README.md @@ -0,0 +1,20 @@ +A sample Android app using java code and [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions. + +To use: + +1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1] +2. Copy the model to the "app/src/main/assets/models" folder. +3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)). +4. Copy the sample to the "app/src/main/assets/samples" folder. +5. Modify the modelFilePath in the WhisperService.java +6. Modify the sampleFilePath in the WhisperService.java +7. Select the "release" active build variant, and use Android Studio to run and deploy to your device. +[^1]: I recommend the tiny or base models for running on an Android device. + +PS: +1. Do not move this android project folder individually to other folders, because this android project folder depends on the files of the whole project. +2. The cpp code is compiled during the build process +3. If you want to import a compiled cpp project in your Android project, please refer to the https://github.com/litongjava/whisper.cpp.android.java.demo + +![](README_files/1.jpg) + diff --git a/examples/whisper.android.java/README_files/1.jpg b/examples/whisper.android.java/README_files/1.jpg new file mode 100644 index 00000000..035cc105 Binary files /dev/null and b/examples/whisper.android.java/README_files/1.jpg differ diff --git a/examples/whisper.android.java/app/.gitignore b/examples/whisper.android.java/app/.gitignore new file mode 100644 index 00000000..42afabfd --- /dev/null +++ b/examples/whisper.android.java/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/examples/whisper.android.java/app/build.gradle b/examples/whisper.android.java/app/build.gradle new file mode 100644 index 00000000..532d780a --- /dev/null +++ b/examples/whisper.android.java/app/build.gradle @@ -0,0 +1,58 @@ +plugins { + id 'com.android.application' +} + +android { + compileSdkVersion 30 + buildToolsVersion '30.0.3' + + defaultConfig { + applicationId "com.litongjava.whisper.android.java" + minSdkVersion 21 + targetSdkVersion 30 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + externalNativeBuild { + cmake { + cppFlags "" + } + } + ndk { + abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64' + } + } + + buildTypes { + release { + signingConfig signingConfigs.debug + minifyEnabled true + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + externalNativeBuild { + cmake { + path "src/main/jni/whisper/CMakeLists.txt" + } + } + ndkVersion "25.2.9519653" + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } +} + +dependencies { + implementation 'androidx.appcompat:appcompat:1.1.0' + implementation 'com.google.android.material:material:1.1.0' + implementation 'androidx.constraintlayout:constraintlayout:1.1.3' + testImplementation 'junit:junit:4.+' + androidTestImplementation 'androidx.test.ext:junit:1.1.5' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1' + + //litongjava + implementation 'com.litongjava:android-view-inject:1.0' + implementation 'com.litongjava:jfinal-aop:1.0.1' + implementation 'com.litongjava:litongjava-android-utils:1.0.0' +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/proguard-rules.pro b/examples/whisper.android.java/app/proguard-rules.pro new file mode 100644 index 00000000..481bb434 --- /dev/null +++ b/examples/whisper.android.java/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/androidTest/java/com/litongjava/whisper/android/java/ExampleInstrumentedTest.java b/examples/whisper.android.java/app/src/androidTest/java/com/litongjava/whisper/android/java/ExampleInstrumentedTest.java new file mode 100644 index 00000000..7962acc2 --- /dev/null +++ b/examples/whisper.android.java/app/src/androidTest/java/com/litongjava/whisper/android/java/ExampleInstrumentedTest.java @@ -0,0 +1,26 @@ +package com.litongjava.whisper.android.java; + +import android.content.Context; + +import androidx.test.platform.app.InstrumentationRegistry; +import androidx.test.ext.junit.runners.AndroidJUnit4; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import static org.junit.Assert.*; + +/** + * Instrumented test, which will execute on an Android device. + * + * @see Testing documentation + */ +@RunWith(AndroidJUnit4.class) +public class ExampleInstrumentedTest { + @Test + public void useAppContext() { + // Context of the app under test. + Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext(); + assertEquals("com.litongjava.whisper.android.java", appContext.getPackageName()); + } +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/AndroidManifest.xml b/examples/whisper.android.java/app/src/main/AndroidManifest.xml new file mode 100644 index 00000000..f4980ad0 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/AndroidManifest.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/assets/logback.xml b/examples/whisper.android.java/app/src/main/assets/logback.xml new file mode 100644 index 00000000..1bd6d921 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/assets/logback.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + ${CONSOLE_LOG_PATTERN} + + + + + + + ${CONSOLE_LOG_PATTERN} + + + + ${LOG_HOME}/project-name-%d{yyyy-MM-dd}.log + + 180 + + + + 10MB + + + + + + + + \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/MainActivity.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/MainActivity.java new file mode 100644 index 00000000..b85d550d --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/MainActivity.java @@ -0,0 +1,107 @@ +package com.litongjava.whisper.android.java; + +import androidx.annotation.RequiresApi; +import androidx.appcompat.app.AppCompatActivity; + +import android.content.Context; +import android.os.Build; +import android.os.Bundle; +import android.os.Handler; +import android.os.Looper; +import android.view.View; +import android.widget.TextView; + +import com.blankj.utilcode.util.ThreadUtils; +import com.litongjava.android.view.inject.annotation.FindViewById; +import com.litongjava.android.view.inject.annotation.FindViewByIdLayout; +import com.litongjava.android.view.inject.annotation.OnClick; +import com.litongjava.android.view.inject.utils.ViewInjectUtils; +import com.litongjava.jfinal.aop.Aop; +import com.litongjava.jfinal.aop.AopManager; +import com.litongjava.whisper.android.java.services.WhisperService; +import com.litongjava.whisper.android.java.task.LoadModelTask; +import com.litongjava.whisper.android.java.task.TranscriptionTask; +import com.litongjava.whisper.android.java.utils.AssetUtils; +import com.whispercpp.java.whisper.WhisperLib; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; + + +@FindViewByIdLayout(R.layout.activity_main) +public class MainActivity extends AppCompatActivity { + + @FindViewById(R.id.sample_text) + private TextView tv; + + Logger log = LoggerFactory.getLogger(this.getClass()); + private WhisperService whisperService = Aop.get(WhisperService.class); + + @RequiresApi(api = Build.VERSION_CODES.O) + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + //setContentView(R.layout.activity_main); + ViewInjectUtils.injectActivity(this, this); + initAopBean(); + showSystemInfo(); + } + + private void initAopBean() { + Handler mainHandler = new Handler(Looper.getMainLooper()); + AopManager.me().addSingletonObject(mainHandler); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + @OnClick(R.id.loadModelBtn) + public void loadModelBtn_OnClick(View v) { + Context context = getBaseContext(); + ThreadUtils.executeByIo(new LoadModelTask(tv)); + } + + @OnClick(R.id.transcriptSampleBtn) + public void transcriptSampleBtn_OnClick(View v) { + Context context = getBaseContext(); + + long start = System.currentTimeMillis(); + String sampleFilePath = "samples/jfk.wav"; + File filesDir = context.getFilesDir(); + File sampleFile = AssetUtils.copyFileIfNotExists(context, filesDir, sampleFilePath); + long end = System.currentTimeMillis(); + String msg = "copy file:" + (end - start) + "ms"; + outputMsg(tv, msg); + ThreadUtils.executeByIo(new TranscriptionTask(tv, sampleFile)); + } + + private void outputMsg(TextView tv, String msg) { + tv.append(msg + "\n"); + log.info(msg); + } + + + @RequiresApi(api = Build.VERSION_CODES.O) + @OnClick(R.id.systemInfoBtn) + public void systemInfoBtn_OnClick(View v) { + showSystemInfo(); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public void showSystemInfo() { + String systemInfo = WhisperLib.getSystemInfo(); + tv.append(systemInfo + "\n"); + } + + @OnClick(R.id.clearBtn) + public void clearBtn_OnClick(View v) { + tv.setText(""); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + @Override + protected void onDestroy() { + super.onDestroy(); + whisperService.release(); + } +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/app/App.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/app/App.java new file mode 100644 index 00000000..afa3452b --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/app/App.java @@ -0,0 +1,13 @@ +package com.litongjava.whisper.android.java.app; + +import android.app.Application; + +import com.blankj.utilcode.util.Utils; + +public class App extends Application { + @Override + public void onCreate() { + super.onCreate(); + Utils.init(this); + } +} diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/bean/WhisperSegment.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/bean/WhisperSegment.java new file mode 100644 index 00000000..e529fed4 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/bean/WhisperSegment.java @@ -0,0 +1,47 @@ +package com.litongjava.whisper.android.java.bean; + +/** + * Created by litonglinux@qq.com on 10/21/2023_7:48 AM + */ +public class WhisperSegment { + private long start, end; + private String sentence; + + public WhisperSegment() { + } + + public WhisperSegment(long start, long end, String sentence) { + this.start = start; + this.end = end; + this.sentence = sentence; + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; + } + + public String getSentence() { + return sentence; + } + + public void setStart(long start) { + this.start = start; + } + + public void setEnd(long end) { + this.end = end; + } + + public void setSentence(String sentence) { + this.sentence = sentence; + } + + @Override + public String toString() { + return "["+start+" --> "+end+"]:"+sentence; + } +} diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/services/WhisperService.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/services/WhisperService.java new file mode 100644 index 00000000..7b97d3bd --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/services/WhisperService.java @@ -0,0 +1,101 @@ +package com.litongjava.whisper.android.java.services; + +import android.content.Context; +import android.os.Build; +import android.os.Handler; +import android.widget.TextView; +import android.widget.Toast; + +import androidx.annotation.RequiresApi; + +import com.blankj.utilcode.util.ToastUtils; +import com.blankj.utilcode.util.Utils; +import com.litongjava.android.utils.dialog.AlertDialogUtils; +import com.litongjava.jfinal.aop.Aop; +import com.litongjava.whisper.android.java.bean.WhisperSegment; +import com.litongjava.whisper.android.java.single.LocalWhisper; +import com.litongjava.whisper.android.java.utils.WaveEncoder; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.ExecutionException; + +public class WhisperService { + private Logger log = LoggerFactory.getLogger(this.getClass()); + + private final Object lock = new Object(); + + @RequiresApi(api = Build.VERSION_CODES.O) + public void loadModel(TextView tv) { + String modelFilePath = LocalWhisper.modelFilePath; + String msg = "load model from :" + modelFilePath + "\n"; + outputMsg(tv, msg); + + long start = System.currentTimeMillis(); + LocalWhisper.INSTANCE.init(); + long end = System.currentTimeMillis(); + msg = "model load successful:" + (end - start) + "ms"; + outputMsg(tv, msg); + ToastUtils.showLong(msg); + + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public void transcribeSample(TextView tv, File sampleFile) { + String msg = ""; + msg = "transcribe file from :" + sampleFile.getAbsolutePath(); + outputMsg(tv, msg); + + Long start = System.currentTimeMillis(); + float[] audioData = new float[0]; // 读取音频样本 + try { + audioData = WaveEncoder.decodeWaveFile(sampleFile); + } catch (IOException e) { + e.printStackTrace(); + return; + } + long end = System.currentTimeMillis(); + msg = "decode wave file:" + (end - start) + "ms"; + outputMsg(tv, msg); + + start = System.currentTimeMillis(); + List transcription = null; + try { + //transcription = LocalWhisper.INSTANCE.transcribeData(audioData); + transcription = LocalWhisper.INSTANCE.transcribeDataWithTime(audioData); + } catch (ExecutionException e) { + e.printStackTrace(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + end = System.currentTimeMillis(); + if(transcription!=null){ + ToastUtils.showLong(transcription.toString()); + msg = "Transcript successful:" + (end - start) + "ms"; + outputMsg(tv, msg); + + outputMsg(tv, transcription.toString()); + + }else{ + msg = "Transcript failed:" + (end - start) + "ms"; + outputMsg(tv, msg); + } + + } + + private void outputMsg(TextView tv, String msg) { + log.info(msg); + if(tv!=null){ + Aop.get(Handler.class).post(()->{ tv.append(msg + "\n");}); + } + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public void release() { + //noting to do + } +} diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/single/LocalWhisper.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/single/LocalWhisper.java new file mode 100644 index 00000000..bbf628ca --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/single/LocalWhisper.java @@ -0,0 +1,66 @@ +package com.litongjava.whisper.android.java.single; + +import android.app.Application; +import android.os.Build; +import android.os.Handler; + +import androidx.annotation.RequiresApi; + +import com.blankj.utilcode.util.ToastUtils; +import com.blankj.utilcode.util.Utils; +import com.litongjava.jfinal.aop.Aop; +import com.litongjava.whisper.android.java.bean.WhisperSegment; +import com.litongjava.whisper.android.java.utils.AssetUtils; +import com.whispercpp.java.whisper.WhisperContext; + +import java.io.File; +import java.util.List; +import java.util.concurrent.ExecutionException; + + +@RequiresApi(api = Build.VERSION_CODES.O) +public enum LocalWhisper { + INSTANCE; + + public static final String modelFilePath = "models/ggml-tiny.bin"; + private WhisperContext whisperContext; + + @RequiresApi(api = Build.VERSION_CODES.O) + LocalWhisper() { + Application context = Utils.getApp(); + File filesDir = context.getFilesDir(); + File modelFile = AssetUtils.copyFileIfNotExists(context, filesDir, modelFilePath); + String realModelFilePath = modelFile.getAbsolutePath(); + whisperContext = WhisperContext.createContextFromFile(realModelFilePath); + } + + public synchronized String transcribeData(float[] data) throws ExecutionException, InterruptedException { + if(whisperContext==null){ + toastModelLoading(); + return null; + }else{ + return whisperContext.transcribeData(data); + } + } + + private static void toastModelLoading() { + Aop.get(Handler.class).post(()->{ + ToastUtils.showShort("please wait for model loading"); + }); + } + + public List transcribeDataWithTime(float[] audioData) throws ExecutionException, InterruptedException { + if(whisperContext==null){ + toastModelLoading(); + return null; + }else{ + return whisperContext.transcribeDataWithTime(audioData); + } + } + + public void init() { + //noting to do.but init + } + + +} diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/LoadModelTask.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/LoadModelTask.java new file mode 100644 index 00000000..23fe4489 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/LoadModelTask.java @@ -0,0 +1,44 @@ +package com.litongjava.whisper.android.java.task; + +import android.content.Context; +import android.os.Build; +import android.os.Handler; +import android.widget.TextView; + +import com.blankj.utilcode.util.ThreadUtils; +import com.litongjava.jfinal.aop.Aop; +import com.litongjava.whisper.android.java.services.WhisperService; + +import java.io.File; + +public class LoadModelTask extends ThreadUtils.Task { + private final TextView tv; + public LoadModelTask(TextView tv) { + this.tv = tv; + } + + @Override + public Object doInBackground() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + Aop.get(WhisperService.class).loadModel(tv); + }else{ + Aop.get(Handler.class).post(()->{ + tv.append("not supported android devices"); + }); + + } + return null; + } + + @Override + public void onSuccess(Object result) { + } + + @Override + public void onCancel() { + } + + @Override + public void onFail(Throwable t) { + } +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/TranscriptionTask.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/TranscriptionTask.java new file mode 100644 index 00000000..7477f8ed --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/TranscriptionTask.java @@ -0,0 +1,44 @@ +package com.litongjava.whisper.android.java.task; + +import android.content.Context; +import android.os.Build; +import android.widget.TextView; + +import com.blankj.utilcode.util.ThreadUtils; +import com.litongjava.jfinal.aop.Aop; +import com.litongjava.whisper.android.java.services.WhisperService; + +import java.io.File; + +public class TranscriptionTask extends ThreadUtils.Task { + private final TextView tv; + private final File sampleFile; + + public TranscriptionTask(TextView tv, File sampleFile) { + this.tv = tv; + this.sampleFile = sampleFile; + + } + + @Override + public Object doInBackground() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + Aop.get(WhisperService.class).transcribeSample(tv, sampleFile); + }else{ + tv.append("not supported android devices"); + } + return null; + } + + @Override + public void onSuccess(Object result) { + } + + @Override + public void onCancel() { + } + + @Override + public void onFail(Throwable t) { + } +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/utils/AssetUtils.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/utils/AssetUtils.java new file mode 100644 index 00000000..d5ac5bc5 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/utils/AssetUtils.java @@ -0,0 +1,91 @@ +package com.litongjava.whisper.android.java.utils; + +import android.content.Context; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class AssetUtils { + private static Logger log = LoggerFactory.getLogger(AssetUtils.class); + + public static File copyFileIfNotExists(Context context, File distDir, String filename) { + File dstFile = new File(distDir, filename); + if (dstFile.exists()) { + return dstFile; + } else { + File parentFile = dstFile.getParentFile(); + log.info("parentFile:{}", parentFile); + if (!parentFile.exists()) { + parentFile.mkdirs(); + } + AssetUtils.copyFileFromAssets(context, filename, dstFile); + } + return dstFile; + } + + public static void copyDirectoryFromAssets(Context appCtx, String srcDir, String dstDir) { + if (srcDir.isEmpty() || dstDir.isEmpty()) { + return; + } + try { + if (!new File(dstDir).exists()) { + new File(dstDir).mkdirs(); + } + for (String fileName : appCtx.getAssets().list(srcDir)) { + String srcSubPath = srcDir + File.separator + fileName; + String dstSubPath = dstDir + File.separator + fileName; + if (new File(srcSubPath).isDirectory()) { + copyDirectoryFromAssets(appCtx, srcSubPath, dstSubPath); + } else { + copyFileFromAssets(appCtx, srcSubPath, dstSubPath); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static void copyFileFromAssets(Context appCtx, String srcPath, String dstPath) { + File dstFile = new File(dstPath); + copyFileFromAssets(appCtx, srcPath, dstFile); + } + + public static void copyFileFromAssets(Context appCtx, String srcPath, File dstFile) { + if (srcPath.isEmpty()) { + return; + } + InputStream is = null; + OutputStream os = null; + try { + is = new BufferedInputStream(appCtx.getAssets().open(srcPath)); + + os = new BufferedOutputStream(new FileOutputStream(dstFile)); + byte[] buffer = new byte[1024]; + int length = 0; + while ((length = is.read(buffer)) != -1) { + os.write(buffer, 0, length); + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + os.close(); + is.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + } +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/utils/WaveEncoder.java b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/utils/WaveEncoder.java new file mode 100644 index 00000000..fbe57d4a --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/utils/WaveEncoder.java @@ -0,0 +1,105 @@ +package com.litongjava.whisper.android.java.utils; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.ShortBuffer; + +public class WaveEncoder { + + public static float[] decodeWaveFile(File file) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (FileInputStream fis = new FileInputStream(file)) { + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = fis.read(buffer)) != -1) { + baos.write(buffer, 0, bytesRead); + } + } + ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + byteBuffer.order(ByteOrder.LITTLE_ENDIAN); + + int channel = byteBuffer.getShort(22); + byteBuffer.position(44); + + ShortBuffer shortBuffer = byteBuffer.asShortBuffer(); + short[] shortArray = new short[shortBuffer.limit()]; + shortBuffer.get(shortArray); + + float[] output = new float[shortArray.length / channel]; + + for (int index = 0; index < output.length; index++) { + if (channel == 1) { + output[index] = Math.max(-1f, Math.min(1f, shortArray[index] / 32767.0f)); + } else { + output[index] = Math.max(-1f, Math.min(1f, (shortArray[2 * index] + shortArray[2 * index + 1]) / 32767.0f / 2.0f)); + } + } + return output; + } + + public static void encodeWaveFile(File file, short[] data) throws IOException { + try (FileOutputStream fos = new FileOutputStream(file)) { + fos.write(headerBytes(data.length * 2)); + + ByteBuffer buffer = ByteBuffer.allocate(data.length * 2); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.asShortBuffer().put(data); + + byte[] bytes = new byte[buffer.limit()]; + buffer.get(bytes); + + fos.write(bytes); + } + } + + private static byte[] headerBytes(int totalLength) { + if (totalLength < 44) + throw new IllegalArgumentException("Total length must be at least 44 bytes"); + + ByteBuffer buffer = ByteBuffer.allocate(44); + buffer.order(ByteOrder.LITTLE_ENDIAN); + + buffer.put((byte) 'R'); + buffer.put((byte) 'I'); + buffer.put((byte) 'F'); + buffer.put((byte) 'F'); + + buffer.putInt(totalLength - 8); + + buffer.put((byte) 'W'); + buffer.put((byte) 'A'); + buffer.put((byte) 'V'); + buffer.put((byte) 'E'); + + buffer.put((byte) 'f'); + buffer.put((byte) 'm'); + buffer.put((byte) 't'); + buffer.put((byte) ' '); + + buffer.putInt(16); + buffer.putShort((short) 1); + buffer.putShort((short) 1); + buffer.putInt(16000); + buffer.putInt(32000); + buffer.putShort((short) 2); + buffer.putShort((short) 16); + + buffer.put((byte) 'd'); + buffer.put((byte) 'a'); + buffer.put((byte) 't'); + buffer.put((byte) 'a'); + + buffer.putInt(totalLength - 44); + buffer.position(0); + + byte[] bytes = new byte[buffer.limit()]; + buffer.get(bytes); + + return bytes; + } +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/CpuInfo.java b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/CpuInfo.java new file mode 100644 index 00000000..733ca354 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/CpuInfo.java @@ -0,0 +1,121 @@ +package com.whispercpp.java.whisper; + +import android.os.Build; +import android.util.Log; + +import androidx.annotation.RequiresApi; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CpuInfo { + private static final String LOG_TAG = "WhisperCpuConfig"; + + private List lines; + + public CpuInfo(List lines) { + this.lines = lines; + } + + @RequiresApi(api = Build.VERSION_CODES.N) + public int getHighPerfCpuCount0() { + try { + return getHighPerfCpuCountByFrequencies(); + } catch (Exception e) { + Log.d(LOG_TAG, "Couldn't read CPU frequencies", e); + return getHighPerfCpuCountByVariant(); + } + } + + @RequiresApi(api = Build.VERSION_CODES.N) + private int getHighPerfCpuCountByFrequencies() { + List frequencies = getCpuValues("processor", line -> { + try { + return getMaxCpuFrequency(Integer.parseInt(line.trim())); + } catch (IOException e) { + e.printStackTrace(); + } + return 0; + } + ); + Log.d(LOG_TAG, "Binned cpu frequencies (frequency, count): " + binnedValues(frequencies)); + return countDroppingMin(frequencies); + } + + @RequiresApi(api = Build.VERSION_CODES.N) + private int getHighPerfCpuCountByVariant() { + List variants = getCpuValues("CPU variant", line -> Integer.parseInt(line.trim().substring(line.indexOf("0x") + 2), 16)); + Log.d(LOG_TAG, "Binned cpu variants (variant, count): " + binnedValues(variants)); + return countKeepingMin(variants); + } + + @RequiresApi(api = Build.VERSION_CODES.N) + private Map binnedValues(List values) { + Map countMap = new HashMap<>(); + for (int value : values) { + countMap.put(value, countMap.getOrDefault(value, 0) + 1); + } + return countMap; + } + + @RequiresApi(api = Build.VERSION_CODES.N) + private List getCpuValues(String property, Mapper mapper) { + List values = new ArrayList<>(); + for (String line : lines) { + if (line.startsWith(property)) { + values.add(mapper.map(line.substring(line.indexOf(':') + 1))); + } + } + values.sort(Integer::compareTo); + return values; + } + + @RequiresApi(api = Build.VERSION_CODES.N) + private int countDroppingMin(List values) { + int min = values.stream().mapToInt(i -> i).min().orElse(Integer.MAX_VALUE); + return (int) values.stream().filter(value -> value > min).count(); + } + + @RequiresApi(api = Build.VERSION_CODES.N) + private int countKeepingMin(List values) { + int min = values.stream().mapToInt(i -> i).min().orElse(Integer.MAX_VALUE); + return (int) values.stream().filter(value -> value.equals(min)).count(); + } + + @RequiresApi(api = Build.VERSION_CODES.N) + public static int getHighPerfCpuCount() { + try { + return readCpuInfo().getHighPerfCpuCount0(); + } catch (Exception e) { + Log.d(LOG_TAG, "Couldn't read CPU info", e); + return Math.max(Runtime.getRuntime().availableProcessors() - 4, 0); + } + } + + private static CpuInfo readCpuInfo() throws IOException { + try (BufferedReader reader = new BufferedReader(new FileReader("/proc/cpuinfo"))) { + List lines = new ArrayList<>(); + String line; + while ((line = reader.readLine()) != null) { + lines.add(line); + } + return new CpuInfo(lines); + } + } + + private static int getMaxCpuFrequency(int cpuIndex) throws IOException { + String path = "/sys/devices/system/cpu/cpu" + cpuIndex + "/cpufreq/cpuinfo_max_freq"; + try (BufferedReader reader = new BufferedReader(new FileReader(path))) { + return Integer.parseInt(reader.readLine()); + } + } + + private interface Mapper { + int map(String line); + } +} diff --git a/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperContext.java b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperContext.java new file mode 100644 index 00000000..0e52ec12 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperContext.java @@ -0,0 +1,138 @@ +package com.whispercpp.java.whisper; + +import android.content.res.AssetManager; +import android.os.Build; +import android.util.Log; + +import androidx.annotation.RequiresApi; + +import com.litongjava.whisper.android.java.bean.WhisperSegment; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class WhisperContext { + + private static final String LOG_TAG = "LibWhisper"; + private long ptr; + private final ExecutorService executorService; + + private WhisperContext(long ptr) { + this.ptr = ptr; + this.executorService = Executors.newSingleThreadExecutor(); + } + + public String transcribeData(float[] data) throws ExecutionException, InterruptedException { + return executorService.submit(new Callable() { + @RequiresApi(api = Build.VERSION_CODES.O) + @Override + public String call() throws Exception { + if (ptr == 0L) { + throw new IllegalStateException(); + } + int numThreads = WhisperCpuConfig.getPreferredThreadCount(); + Log.d(LOG_TAG, "Selecting " + numThreads + " threads"); + + StringBuilder result = new StringBuilder(); + synchronized (this) { + + WhisperLib.fullTranscribe(ptr, numThreads, data); + int textCount = WhisperLib.getTextSegmentCount(ptr); + for (int i = 0; i < textCount; i++) { + String sentence = WhisperLib.getTextSegment(ptr, i); + result.append(sentence); + } + } + return result.toString(); + } + }).get(); + } + + public List transcribeDataWithTime(float[] data) throws ExecutionException, InterruptedException { + return executorService.submit(new Callable>() { + @RequiresApi(api = Build.VERSION_CODES.O) + @Override + public List call() throws Exception { + if (ptr == 0L) { + throw new IllegalStateException(); + } + int numThreads = WhisperCpuConfig.getPreferredThreadCount(); + Log.d(LOG_TAG, "Selecting " + numThreads + " threads"); + + List segments = new ArrayList<>(); + synchronized (this) { +// StringBuilder result = new StringBuilder(); + WhisperLib.fullTranscribe(ptr, numThreads, data); + int textCount = WhisperLib.getTextSegmentCount(ptr); + for (int i = 0; i < textCount; i++) { + long start = WhisperLib.getTextSegmentT0(ptr, i); + String sentence = WhisperLib.getTextSegment(ptr, i); + long end = WhisperLib.getTextSegmentT1(ptr, i); +// result.append(); + segments.add(new WhisperSegment(start, end, sentence)); + + } +// return result.toString(); + } + return segments; + } + }).get(); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public String benchMemory(int nthreads) throws ExecutionException, InterruptedException { + return executorService.submit(() -> WhisperLib.benchMemcpy(nthreads)).get(); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public String benchGgmlMulMat(int nthreads) throws ExecutionException, InterruptedException { + return executorService.submit(() -> WhisperLib.benchGgmlMulMat(nthreads)).get(); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public void release() throws ExecutionException, InterruptedException { + executorService.submit(() -> { + if (ptr != 0L) { + WhisperLib.freeContext(ptr); + ptr = 0; + } + }).get(); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public static WhisperContext createContextFromFile(String filePath) { + long ptr = WhisperLib.initContext(filePath); + if (ptr == 0L) { + throw new RuntimeException("Couldn't create context with path " + filePath); + } + return new WhisperContext(ptr); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public static WhisperContext createContextFromInputStream(InputStream stream) { + long ptr = WhisperLib.initContextFromInputStream(stream); + if (ptr == 0L) { + throw new RuntimeException("Couldn't create context from input stream"); + } + return new WhisperContext(ptr); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public static WhisperContext createContextFromAsset(AssetManager assetManager, String assetPath) { + long ptr = WhisperLib.initContextFromAsset(assetManager, assetPath); + if (ptr == 0L) { + throw new RuntimeException("Couldn't create context from asset " + assetPath); + } + return new WhisperContext(ptr); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public static String getSystemInfo() { + return WhisperLib.getSystemInfo(); + } +} diff --git a/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperCpuConfig.java b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperCpuConfig.java new file mode 100644 index 00000000..8cd2b888 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperCpuConfig.java @@ -0,0 +1,12 @@ +package com.whispercpp.java.whisper; + +import android.os.Build; + +import androidx.annotation.RequiresApi; + +public class WhisperCpuConfig { + @RequiresApi(api = Build.VERSION_CODES.N) + public static int getPreferredThreadCount() { + return Math.max(CpuInfo.getHighPerfCpuCount(), 2); + } +} diff --git a/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperLib.java b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperLib.java new file mode 100644 index 00000000..38dd47a3 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperLib.java @@ -0,0 +1,75 @@ +package com.whispercpp.java.whisper; + +import android.content.res.AssetManager; +import android.os.Build; +import android.util.Log; + +import androidx.annotation.RequiresApi; + +import java.io.InputStream; + +@RequiresApi(api = Build.VERSION_CODES.O) +public class WhisperLib { + private static final String LOG_TAG = "LibWhisper"; + + static { + + Log.d(LOG_TAG, "Primary ABI: " + Build.SUPPORTED_ABIS[0]); + boolean loadVfpv4 = false; + boolean loadV8fp16 = false; + if (WhisperUtils.isArmEabiV7a()) { + String cpuInfo = WhisperUtils.cpuInfo(); + if (cpuInfo != null) { + Log.d(LOG_TAG, "CPU info: " + cpuInfo); + if (cpuInfo.contains("vfpv4")) { + Log.d(LOG_TAG, "CPU supports vfpv4"); + loadVfpv4 = true; + } + } + } else if (WhisperUtils.isArmEabiV8a()) { + String cpuInfo = WhisperUtils.cpuInfo(); + if (cpuInfo != null) { + Log.d(LOG_TAG, "CPU info: " + cpuInfo); + if (cpuInfo.contains("fphp")) { + Log.d(LOG_TAG, "CPU supports fp16 arithmetic"); + loadV8fp16 = true; + } + } + } + + if (loadVfpv4) { + Log.d(LOG_TAG, "Loading libwhisper_vfpv4.so"); + System.loadLibrary("whisper_vfpv4"); + } else if (loadV8fp16) { + Log.d(LOG_TAG, "Loading libwhisper_v8fp16_va.so"); + System.loadLibrary("whisper_v8fp16_va"); + } else { + Log.d(LOG_TAG, "Loading libwhisper.so"); + System.loadLibrary("whisper"); + } + } + + public static native long initContextFromInputStream(InputStream inputStream); + + public static native long initContextFromAsset(AssetManager assetManager, String assetPath); + + public static native long initContext(String modelPath); + + public static native void freeContext(long contextPtr); + + public static native void fullTranscribe(long contextPtr, int numThreads, float[] audioData); + + public static native int getTextSegmentCount(long contextPtr); + + public static native String getTextSegment(long contextPtr, int index); + + public static native long getTextSegmentT0(long contextPtr, int index); + + public static native long getTextSegmentT1(long contextPtr, int index); + + public static native String getSystemInfo(); + + public static native String benchMemcpy(int nthread); + + public static native String benchGgmlMulMat(int nthread); +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperUtils.java b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperUtils.java new file mode 100644 index 00000000..8e803b7f --- /dev/null +++ b/examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperUtils.java @@ -0,0 +1,34 @@ +package com.whispercpp.java.whisper; + +import android.os.Build; +import android.util.Log; + +import androidx.annotation.RequiresApi; + +import java.io.File; +import java.nio.file.Path; + +public class WhisperUtils { + private static final String LOG_TAG = "LibWhisper"; + + + public static boolean isArmEabiV7a() { + return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a"); + } + + public static boolean isArmEabiV8a() { + return Build.SUPPORTED_ABIS[0].equals("arm64-v8a"); + } + + @RequiresApi(api = Build.VERSION_CODES.O) + public static String cpuInfo() { + try { + Path path = new File("/proc/cpuinfo").toPath(); + return new String(java.nio.file.Files.readAllBytes(path)); + } catch (Exception e) { + Log.w(LOG_TAG, "Couldn't read /proc/cpuinfo", e); + return null; + } + + } +} \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/jni/whisper/CMakeLists.txt b/examples/whisper.android.java/app/src/main/jni/whisper/CMakeLists.txt new file mode 100644 index 00000000..668cd4a7 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/jni/whisper/CMakeLists.txt @@ -0,0 +1,56 @@ +cmake_minimum_required(VERSION 3.10) + +project(whisper.cpp) + +set(CMAKE_CXX_STANDARD 11) +set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../) + +set( + SOURCE_FILES + ${WHISPER_LIB_DIR}/ggml.c + ${WHISPER_LIB_DIR}/ggml-alloc.c + ${WHISPER_LIB_DIR}/ggml-backend.c + ${WHISPER_LIB_DIR}/ggml-quants.c + ${WHISPER_LIB_DIR}/whisper.cpp + ${CMAKE_SOURCE_DIR}/jni.c +) + +find_library(LOG_LIB log) + +function(build_library target_name) + add_library( + ${target_name} + SHARED + ${SOURCE_FILES} + ) + + target_link_libraries(${target_name} ${LOG_LIB} android) + + if (${target_name} STREQUAL "whisper_v8fp16_va") + target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16) + elseif (${target_name} STREQUAL "whisper_vfpv4") + target_compile_options(${target_name} PRIVATE -mfpu=neon-vfpv4) + endif () + + if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") + + target_compile_options(${target_name} PRIVATE -O3) + target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden) + target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections) + + #target_link_options(${target_name} PRIVATE -Wl,--gc-sections) + #target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL) + #target_link_options(${target_name} PRIVATE -flto) + + endif () +endfunction() + +build_library("whisper") # Default target + +if (${ANDROID_ABI} STREQUAL "arm64-v8a") + build_library("whisper_v8fp16_va") +elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a") + build_library("whisper_vfpv4") +endif () + +include_directories(${WHISPER_LIB_DIR}) diff --git a/examples/whisper.android.java/app/src/main/jni/whisper/jni.c b/examples/whisper.android.java/app/src/main/jni/whisper/jni.c new file mode 100644 index 00000000..f8e7effe --- /dev/null +++ b/examples/whisper.android.java/app/src/main/jni/whisper/jni.c @@ -0,0 +1,257 @@ +#include +#include +#include +#include +#include +#include +#include +#include "whisper.h" +#include "ggml.h" + +#define UNUSED(x) (void)(x) +#define TAG "JNI" + +#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__) +#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, TAG, __VA_ARGS__) + +static inline int min(int a, int b) { + return (a < b) ? a : b; +} + +static inline int max(int a, int b) { + return (a > b) ? a : b; +} + +struct input_stream_context { + size_t offset; + JNIEnv * env; + jobject thiz; + jobject input_stream; + + jmethodID mid_available; + jmethodID mid_read; +}; + +size_t inputStreamRead(void * ctx, void * output, size_t read_size) { + struct input_stream_context* is = (struct input_stream_context*)ctx; + + jint avail_size = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available); + jint size_to_copy = read_size < avail_size ? (jint)read_size : avail_size; + + jbyteArray byte_array = (*is->env)->NewByteArray(is->env, size_to_copy); + + jint n_read = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_read, byte_array, 0, size_to_copy); + + if (size_to_copy != read_size || size_to_copy != n_read) { + LOGI("Insufficient Read: Req=%zu, ToCopy=%d, Available=%d", read_size, size_to_copy, n_read); + } + + jbyte* byte_array_elements = (*is->env)->GetByteArrayElements(is->env, byte_array, NULL); + memcpy(output, byte_array_elements, size_to_copy); + (*is->env)->ReleaseByteArrayElements(is->env, byte_array, byte_array_elements, JNI_ABORT); + + (*is->env)->DeleteLocalRef(is->env, byte_array); + + is->offset += size_to_copy; + + return size_to_copy; +} +bool inputStreamEof(void * ctx) { + struct input_stream_context* is = (struct input_stream_context*)ctx; + + jint result = (*is->env)->CallIntMethod(is->env, is->input_stream, is->mid_available); + return result <= 0; +} +void inputStreamClose(void * ctx) { + +} + +JNIEXPORT jlong JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_initContextFromInputStream( + JNIEnv *env, jobject thiz, jobject input_stream) { + UNUSED(thiz); + + struct whisper_context *context = NULL; + struct whisper_model_loader loader = {}; + struct input_stream_context inp_ctx = {}; + + inp_ctx.offset = 0; + inp_ctx.env = env; + inp_ctx.thiz = thiz; + inp_ctx.input_stream = input_stream; + + jclass cls = (*env)->GetObjectClass(env, input_stream); + inp_ctx.mid_available = (*env)->GetMethodID(env, cls, "available", "()I"); + inp_ctx.mid_read = (*env)->GetMethodID(env, cls, "read", "([BII)I"); + + loader.context = &inp_ctx; + loader.read = inputStreamRead; + loader.eof = inputStreamEof; + loader.close = inputStreamClose; + + loader.eof(loader.context); + + context = whisper_init(&loader); + return (jlong) context; +} + +static size_t asset_read(void *ctx, void *output, size_t read_size) { + return AAsset_read((AAsset *) ctx, output, read_size); +} + +static bool asset_is_eof(void *ctx) { + return AAsset_getRemainingLength64((AAsset *) ctx) <= 0; +} + +static void asset_close(void *ctx) { + AAsset_close((AAsset *) ctx); +} + +static struct whisper_context *whisper_init_from_asset( + JNIEnv *env, + jobject assetManager, + const char *asset_path +) { + LOGI("Loading model from asset '%s'\n", asset_path); + AAssetManager *asset_manager = AAssetManager_fromJava(env, assetManager); + AAsset *asset = AAssetManager_open(asset_manager, asset_path, AASSET_MODE_STREAMING); + if (!asset) { + LOGW("Failed to open '%s'\n", asset_path); + return NULL; + } + + whisper_model_loader loader = { + .context = asset, + .read = &asset_read, + .eof = &asset_is_eof, + .close = &asset_close + }; + + return whisper_init(&loader); +} + +JNIEXPORT jlong JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_initContextFromAsset( + JNIEnv *env, jobject thiz, jobject assetManager, jstring asset_path_str) { + UNUSED(thiz); + struct whisper_context *context = NULL; + const char *asset_path_chars = (*env)->GetStringUTFChars(env, asset_path_str, NULL); + context = whisper_init_from_asset(env, assetManager, asset_path_chars); + (*env)->ReleaseStringUTFChars(env, asset_path_str, asset_path_chars); + return (jlong) context; +} + +JNIEXPORT jlong JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_initContext( + JNIEnv *env, jobject thiz, jstring model_path_str) { + UNUSED(thiz); + struct whisper_context *context = NULL; + const char *model_path_chars = (*env)->GetStringUTFChars(env, model_path_str, NULL); + context = whisper_init_from_file(model_path_chars); + (*env)->ReleaseStringUTFChars(env, model_path_str, model_path_chars); + return (jlong) context; +} + +JNIEXPORT void JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_freeContext( + JNIEnv *env, jobject thiz, jlong context_ptr) { + UNUSED(env); + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + whisper_free(context); +} + +JNIEXPORT void JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_fullTranscribe( + JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) { + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL); + const jsize audio_data_length = (*env)->GetArrayLength(env, audio_data); + + // The below adapted from the Objective-C iOS sample + struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); + params.print_realtime = true; + params.print_progress = false; + params.print_timestamps = true; + params.print_special = false; + params.translate = false; + params.language = "en"; + params.n_threads = num_threads; + params.offset_ms = 0; + params.no_context = true; + params.single_segment = false; + + whisper_reset_timings(context); + + LOGI("About to run whisper_full"); + if (whisper_full(context, params, audio_data_arr, audio_data_length) != 0) { + LOGI("Failed to run the model"); + } else { + whisper_print_timings(context); + } + (*env)->ReleaseFloatArrayElements(env, audio_data, audio_data_arr, JNI_ABORT); +} + +JNIEXPORT jint JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_getTextSegmentCount( + JNIEnv *env, jobject thiz, jlong context_ptr) { + UNUSED(env); + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + return whisper_full_n_segments(context); +} + + +JNIEXPORT jstring JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_getTextSegment( + JNIEnv *env, jobject thiz, jlong context_ptr, jint index) { + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + const char *text = whisper_full_get_segment_text(context, index); + jstring string = (*env)->NewStringUTF(env, text); + return string; +} + +JNIEXPORT jlong JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_getTextSegmentT0(JNIEnv *env, jobject thiz,jlong context_ptr, jint index) { + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + const int64_t t0 = whisper_full_get_segment_t0(context, index); + return (jlong)t0; +} + +JNIEXPORT jlong JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_getTextSegmentT1(JNIEnv *env, jobject thiz,jlong context_ptr, jint index) { + UNUSED(thiz); + struct whisper_context *context = (struct whisper_context *) context_ptr; + const int64_t t1 = whisper_full_get_segment_t1(context, index); + return (jlong)t1; +} + +JNIEXPORT jstring JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_getSystemInfo( + JNIEnv *env, jobject thiz +) { + UNUSED(thiz); + const char *sysinfo = whisper_print_system_info(); + jstring string = (*env)->NewStringUTF(env, sysinfo); + return string; +} + +JNIEXPORT jstring JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_benchMemcpy(JNIEnv *env, jobject thiz, + jint n_threads) { + UNUSED(thiz); + const char *bench_ggml_memcpy = whisper_bench_memcpy_str(n_threads); + jstring string = (*env)->NewStringUTF(env, bench_ggml_memcpy); +} + +JNIEXPORT jstring JNICALL +Java_com_whispercpp_java_whisper_WhisperLib_benchGgmlMulMat(JNIEnv *env, jobject thiz, + jint n_threads) { + UNUSED(thiz); + const char *bench_ggml_mul_mat = whisper_bench_ggml_mul_mat_str(n_threads); + jstring string = (*env)->NewStringUTF(env, bench_ggml_mul_mat); +} + diff --git a/examples/whisper.android.java/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/examples/whisper.android.java/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 00000000..5c3bfcd6 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/examples/whisper.android.java/app/src/main/res/drawable/ic_launcher_background.xml b/examples/whisper.android.java/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 00000000..140f8294 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/whisper.android.java/app/src/main/res/layout/activity_main.xml b/examples/whisper.android.java/app/src/main/res/layout/activity_main.xml new file mode 100644 index 00000000..f78b4ce7 --- /dev/null +++ b/examples/whisper.android.java/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,57 @@ + + + + + +