From c4c2380cf1815ebf2d1833f9da2117abe7872e6e Mon Sep 17 00:00:00 2001 From: Ranjit <111440072+ranjitsingha@users.noreply.github.com> Date: Sat, 8 Feb 2025 13:00:13 +0530 Subject: [PATCH 1/6] Update jni.c for enabling word level timestamps in whisper.cpp android Updated jni.c for enabling word level timestamps in whisper.cpp android --- examples/whisper.android/lib/src/main/jni/whisper/jni.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/whisper.android/lib/src/main/jni/whisper/jni.c b/examples/whisper.android/lib/src/main/jni/whisper/jni.c index da54c814..02e4121d 100644 --- a/examples/whisper.android/lib/src/main/jni/whisper/jni.c +++ b/examples/whisper.android/lib/src/main/jni/whisper/jni.c @@ -181,6 +181,9 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe( params.offset_ms = 0; params.no_context = true; params.single_segment = false; + params.token_timestamps = true; // Enable for word level timestamps + params.split_on_word = true; // Enable for word level timestamps + params.max_len = 1; // Set number of words in a line whisper_reset_timings(context); From 667d2476a7b5c4626d45631531a7ebae253ed2ff Mon Sep 17 00:00:00 2001 From: Ranjit <111440072+ranjitsingha@users.noreply.github.com> Date: Sat, 8 Feb 2025 14:43:25 +0530 Subject: [PATCH 2/6] Update jni.c for enabling word timestamp (Default is off) Update jni.c for enabling word level timestamps in whisper.cpp Android Default is set to off. If anyone wants can turn it to true for enabling word Level Timestamps --- examples/whisper.android/lib/src/main/jni/whisper/jni.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/whisper.android/lib/src/main/jni/whisper/jni.c b/examples/whisper.android/lib/src/main/jni/whisper/jni.c index 02e4121d..1fa8a962 100644 --- a/examples/whisper.android/lib/src/main/jni/whisper/jni.c +++ b/examples/whisper.android/lib/src/main/jni/whisper/jni.c @@ -181,8 +181,8 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe( params.offset_ms = 0; params.no_context = true; params.single_segment = false; - params.token_timestamps = true; // Enable for word level timestamps - params.split_on_word = true; // Enable for word level timestamps + params.token_timestamps = false; // Set true for word level timestamps + params.split_on_word = false; // Set true for word level timestamps params.max_len = 1; // Set number of words in a line whisper_reset_timings(context); From 6b8b91ad814ca2d51f4939b2a1eb9a9817334742 Mon Sep 17 00:00:00 2001 From: Ranjit <111440072+ranjitsingha@users.noreply.github.com> Date: Sat, 15 Feb 2025 12:00:26 +0530 Subject: [PATCH 3/6] Update jni.c --- examples/whisper.android/lib/src/main/jni/whisper/jni.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/whisper.android/lib/src/main/jni/whisper/jni.c b/examples/whisper.android/lib/src/main/jni/whisper/jni.c index 1fa8a962..e244d4ac 100644 --- a/examples/whisper.android/lib/src/main/jni/whisper/jni.c +++ b/examples/whisper.android/lib/src/main/jni/whisper/jni.c @@ -163,7 +163,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext( JNIEXPORT void JNICALL Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe( - JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) { + JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data, jboolean token_timestamps, jboolean split_on_word, jint max_len) { UNUSED(thiz); struct whisper_context *context = (struct whisper_context *) context_ptr; jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL); @@ -181,9 +181,9 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe( params.offset_ms = 0; params.no_context = true; params.single_segment = false; - params.token_timestamps = false; // Set true for word level timestamps - params.split_on_word = false; // Set true for word level timestamps - params.max_len = 1; // Set number of words in a line + params.token_timestamps = token_timestamps; + params.split_on_word = split_on_word; + params.max_len = max_len; whisper_reset_timings(context); From 85f7c8f4d984d393ddda967263889c43552b1c4e Mon Sep 17 00:00:00 2001 From: Ranjit <111440072+ranjitsingha@users.noreply.github.com> Date: Sat, 15 Feb 2025 12:08:37 +0530 Subject: [PATCH 4/6] Update LibWhisper.kt --- .../src/main/java/com/whispercpp/whisper/LibWhisper.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt b/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt index 37ae0e9d..506fd73d 100644 --- a/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt +++ b/examples/whisper.android/lib/src/main/java/com/whispercpp/whisper/LibWhisper.kt @@ -16,11 +16,11 @@ class WhisperContext private constructor(private var ptr: Long) { Executors.newSingleThreadExecutor().asCoroutineDispatcher() ) - suspend fun transcribeData(data: FloatArray, printTimestamp: Boolean = true): String = withContext(scope.coroutineContext) { + suspend fun transcribeData(data: FloatArray, printTimestamp: Boolean = true, tokenTimestamps: Boolean = false, splitOnWord: Boolean = false, maxLen: Int): String = withContext(scope.coroutineContext) { require(ptr != 0L) val numThreads = WhisperCpuConfig.preferredThreadCount Log.d(LOG_TAG, "Selecting $numThreads threads") - WhisperLib.fullTranscribe(ptr, numThreads, data) + WhisperLib.fullTranscribe(ptr, numThreads, data, tokenTimestamps, splitOnWord, maxLen) val textCount = WhisperLib.getTextSegmentCount(ptr) return@withContext buildString { for (i in 0 until textCount) { @@ -134,7 +134,7 @@ private class WhisperLib { external fun initContextFromAsset(assetManager: AssetManager, assetPath: String): Long external fun initContext(modelPath: String): Long external fun freeContext(contextPtr: Long) - external fun fullTranscribe(contextPtr: Long, numThreads: Int, audioData: FloatArray) + external fun fullTranscribe(contextPtr: Long, numThreads: Int, audioData: FloatArray, tokenTimestamps: Boolean, splitOnWord: Boolean, maxLen: Int) external fun getTextSegmentCount(contextPtr: Long): Int external fun getTextSegment(contextPtr: Long, index: Int): String external fun getTextSegmentT0(contextPtr: Long, index: Int): Long @@ -177,4 +177,4 @@ private fun cpuInfo(): String? { Log.w(LOG_TAG, "Couldn't read /proc/cpuinfo", e) null } -} \ No newline at end of file +} From 5ff2a958d948d3ea666a9e4ce2df6a84acf7fb2a Mon Sep 17 00:00:00 2001 From: Ranjit <111440072+ranjitsingha@users.noreply.github.com> Date: Sat, 15 Feb 2025 12:17:18 +0530 Subject: [PATCH 5/6] Update MainScreenViewModel.kt --- .../java/com/whispercppdemo/ui/main/MainScreenViewModel.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt index 845b023a..f3bcfbcd 100644 --- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt @@ -143,7 +143,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { printMessage("${data.size / (16000 / 1000)} ms\n") printMessage("Transcribing data...\n") val start = System.currentTimeMillis() - val text = whisperContext?.transcribeData(data) + val text = whisperContext?.transcribeDataWithParams(data, tokenTimestamps = false, splitOnWord = false, maxLen = 1) val elapsed = System.currentTimeMillis() - start printMessage("Done ($elapsed ms): \n$text\n") } catch (e: Exception) { @@ -222,4 +222,4 @@ private suspend fun Context.copyData( } Log.v(LOG_TAG, "Copied $assetPath to $destination") } -} \ No newline at end of file +} From 5c76377b096a68c60070bd0bd0d32a0a53e0ef9d Mon Sep 17 00:00:00 2001 From: Ranjit <111440072+ranjitsingha@users.noreply.github.com> Date: Sat, 15 Feb 2025 15:52:10 +0530 Subject: [PATCH 6/6] Update MainScreenViewModel.kt --- .../main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt index f3bcfbcd..fedf18ee 100644 --- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt +++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt @@ -143,7 +143,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() { printMessage("${data.size / (16000 / 1000)} ms\n") printMessage("Transcribing data...\n") val start = System.currentTimeMillis() - val text = whisperContext?.transcribeDataWithParams(data, tokenTimestamps = false, splitOnWord = false, maxLen = 1) + val text = whisperContext?.transcribeData(data, tokenTimestamps = false, splitOnWord = false, maxLen = 1) val elapsed = System.currentTimeMillis() - start printMessage("Done ($elapsed ms): \n$text\n") } catch (e: Exception) {