Merge 5c76377b096a68c60070bd0bd0d32a0a53e0ef9d into 77e0c86ab62eda9392a8567f4c29ab8d140cb0ba

This commit is contained in:
Ranjit 2025-04-04 10:22:05 +05:30 committed by GitHub
commit 67d7716690
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 7 deletions

View File

@ -143,7 +143,7 @@ class MainScreenViewModel(private val application: Application) : ViewModel() {
printMessage("${data.size / (16000 / 1000)} ms\n")
printMessage("Transcribing data...\n")
val start = System.currentTimeMillis()
val text = whisperContext?.transcribeData(data)
val text = whisperContext?.transcribeData(data, tokenTimestamps = false, splitOnWord = false, maxLen = 1)
val elapsed = System.currentTimeMillis() - start
printMessage("Done ($elapsed ms): \n$text\n")
} catch (e: Exception) {
@ -222,4 +222,4 @@ private suspend fun Context.copyData(
}
Log.v(LOG_TAG, "Copied $assetPath to $destination")
}
}
}

View File

@ -16,11 +16,11 @@ class WhisperContext private constructor(private var ptr: Long) {
Executors.newSingleThreadExecutor().asCoroutineDispatcher()
)
suspend fun transcribeData(data: FloatArray, printTimestamp: Boolean = true): String = withContext(scope.coroutineContext) {
suspend fun transcribeData(data: FloatArray, printTimestamp: Boolean = true, tokenTimestamps: Boolean = false, splitOnWord: Boolean = false, maxLen: Int): String = withContext(scope.coroutineContext) {
require(ptr != 0L)
val numThreads = WhisperCpuConfig.preferredThreadCount
Log.d(LOG_TAG, "Selecting $numThreads threads")
WhisperLib.fullTranscribe(ptr, numThreads, data)
WhisperLib.fullTranscribe(ptr, numThreads, data, tokenTimestamps, splitOnWord, maxLen)
val textCount = WhisperLib.getTextSegmentCount(ptr)
return@withContext buildString {
for (i in 0 until textCount) {
@ -134,7 +134,7 @@ private class WhisperLib {
external fun initContextFromAsset(assetManager: AssetManager, assetPath: String): Long
external fun initContext(modelPath: String): Long
external fun freeContext(contextPtr: Long)
external fun fullTranscribe(contextPtr: Long, numThreads: Int, audioData: FloatArray)
external fun fullTranscribe(contextPtr: Long, numThreads: Int, audioData: FloatArray, tokenTimestamps: Boolean, splitOnWord: Boolean, maxLen: Int)
external fun getTextSegmentCount(contextPtr: Long): Int
external fun getTextSegment(contextPtr: Long, index: Int): String
external fun getTextSegmentT0(contextPtr: Long, index: Int): Long
@ -177,4 +177,4 @@ private fun cpuInfo(): String? {
Log.w(LOG_TAG, "Couldn't read /proc/cpuinfo", e)
null
}
}
}

View File

@ -163,7 +163,7 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_freeContext(
JNIEXPORT void JNICALL
Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data) {
JNIEnv *env, jobject thiz, jlong context_ptr, jint num_threads, jfloatArray audio_data, jboolean token_timestamps, jboolean split_on_word, jint max_len) {
UNUSED(thiz);
struct whisper_context *context = (struct whisper_context *) context_ptr;
jfloat *audio_data_arr = (*env)->GetFloatArrayElements(env, audio_data, NULL);
@ -181,6 +181,9 @@ Java_com_whispercpp_whisper_WhisperLib_00024Companion_fullTranscribe(
params.offset_ms = 0;
params.no_context = true;
params.single_segment = false;
params.token_timestamps = token_timestamps;
params.split_on_word = split_on_word;
params.max_len = max_len;
whisper_reset_timings(context);