Daniel Bevenius 83b14c357c
Some checks are pending
CI / ubuntu-22 (linux/amd64) (push) Waiting to run
CI / ubuntu-22 (linux/ppc64le) (push) Waiting to run
CI / ubuntu-22-arm64 (linux/arm64) (push) Waiting to run
CI / ubuntu-22-arm-v7 (linux/arm/v7) (push) Waiting to run
CI / macOS-latest (generic/platform=iOS) (push) Waiting to run
CI / macOS-latest (generic/platform=macOS) (push) Waiting to run
CI / macOS-latest (generic/platform=tvOS) (push) Waiting to run
CI / ubuntu-22-gcc (linux/amd64, Debug) (push) Waiting to run
CI / ubuntu-22-gcc (linux/amd64, Release) (push) Waiting to run
CI / ubuntu-22-gcc (linux/ppc64le, Debug) (push) Waiting to run
CI / ubuntu-22-gcc (linux/ppc64le, Release) (push) Waiting to run
CI / ubuntu-22-gcc-arm64 (linux/arm64, Debug) (push) Waiting to run
CI / ubuntu-22-gcc-arm64 (linux/arm64, Release) (push) Waiting to run
CI / ubuntu-22-gcc-arm-v7 (linux/arm/v7, Debug) (push) Waiting to run
CI / ubuntu-22-gcc-arm-v7 (linux/arm/v7, Release) (push) Waiting to run
CI / ubuntu-22-clang (linux/amd64, Debug) (push) Waiting to run
CI / ubuntu-22-clang (linux/amd64, Release) (push) Waiting to run
CI / ubuntu-22-clang (linux/arm64, Debug) (push) Waiting to run
CI / ubuntu-22-clang (linux/arm64, Release) (push) Waiting to run
CI / ubuntu-22-clang (linux/ppc64le, Debug) (push) Waiting to run
CI / ubuntu-22-clang (linux/ppc64le, Release) (push) Waiting to run
CI / ubuntu-22-gcc-sanitized (linux/amd64, ADDRESS) (push) Waiting to run
CI / ubuntu-22-gcc-sanitized (linux/amd64, THREAD) (push) Waiting to run
CI / ubuntu-22-gcc-sanitized (linux/amd64, UNDEFINED) (push) Waiting to run
CI / ubuntu-22-cmake-sycl (linux/amd64, icx, icpx, ON) (push) Waiting to run
CI / ubuntu-22-cmake-sycl (linux/arm/v7, icx, icpx, ON) (push) Waiting to run
CI / ubuntu-22-cmake-sycl (linux/arm64, icx, icpx, ON) (push) Waiting to run
CI / ubuntu-22-cmake-sycl (linux/ppc64le, icx, icpx, ON) (push) Waiting to run
CI / ubuntu-22-cmake-sycl-fp16 (linux/amd64, icx, icpx, ON) (push) Waiting to run
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm/v7, icx, icpx, ON) (push) Waiting to run
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm64, icx, icpx, ON) (push) Waiting to run
CI / ubuntu-22-cmake-sycl-fp16 (linux/ppc64le, icx, icpx, ON) (push) Waiting to run
CI / windows-msys2 (Release, clang-x86_64, CLANG64) (push) Waiting to run
CI / windows-msys2 (Release, ucrt-x86_64, UCRT64) (push) Waiting to run
CI / windows (Win32, Release, win32-x86, x86, 2.28.5, ON) (push) Waiting to run
CI / windows (x64, Release, win32-x86-64, x64, 2.28.5, ON) (push) Waiting to run
CI / windows-blas (Win32, ON, Release, x86, 2.28.5, ON) (push) Waiting to run
CI / windows-blas (x64, ON, Release, x64, 2.28.5, ON) (push) Waiting to run
CI / windows-cublas (x64, Release, ON, 11.8.0, ON, 2.28.5) (push) Waiting to run
CI / windows-cublas (x64, Release, ON, 12.2.0, ON, 2.28.5) (push) Waiting to run
CI / emscripten (Release) (push) Waiting to run
CI / ios-xcode-build (Release) (push) Waiting to run
CI / android (push) Waiting to run
CI / quantize (push) Waiting to run
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main.Dockerfile platform:linux/amd64 tag:main]) (push) Waiting to run
examples : use xcframework in whisper.objc example (#2882)
* examples : use xcframework in whisper.objc example

This commit updates the whisper.objc example to use the xcframework.

The motivation for this to be consistent with the swift example and to
also act as a reference for how to use the xcframework in an objc
project.

Resolves: https://github.com/ggerganov/whisper.cpp/issues/2881

* examples : setup audio session viewDidload

This commit adds the setup of the audio session in the viewDidload
method of the ViewController.m file. This is necessary to allow the app
to record audio.

The motivation for this is that without this it was not possible to
caputue audio from the microphone. It was possible to click on the
Capture button but nothing happened after that, and the button was not
marked red indicating that the button could be clicked again to stop
capturing. With this change it is possible to capture audio from the
microphone and get it transcribed.
2025-03-17 13:01:24 +01:00

318 lines
9.9 KiB
Objective-C

//
// ViewController.m
// whisper.objc
//
// Created by Georgi Gerganov on 23.10.22.
//
#import "ViewController.h"
#import <whisper/whisper.h>
#define NUM_BYTES_PER_BUFFER 16*1024
// callback used to process captured audio
void AudioInputCallback(void * inUserData,
AudioQueueRef inAQ,
AudioQueueBufferRef inBuffer,
const AudioTimeStamp * inStartTime,
UInt32 inNumberPacketDescriptions,
const AudioStreamPacketDescription * inPacketDescs);
@interface ViewController ()
@property (weak, nonatomic) IBOutlet UILabel *labelStatusInp;
@property (weak, nonatomic) IBOutlet UIButton *buttonToggleCapture;
@property (weak, nonatomic) IBOutlet UIButton *buttonTranscribe;
@property (weak, nonatomic) IBOutlet UIButton *buttonRealtime;
@property (weak, nonatomic) IBOutlet UITextView *textviewResult;
@end
@implementation ViewController
- (void)setupAudioFormat:(AudioStreamBasicDescription*)format
{
format->mSampleRate = WHISPER_SAMPLE_RATE;
format->mFormatID = kAudioFormatLinearPCM;
format->mFramesPerPacket = 1;
format->mChannelsPerFrame = 1;
format->mBytesPerFrame = 2;
format->mBytesPerPacket = 2;
format->mBitsPerChannel = 16;
format->mReserved = 0;
format->mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
}
- (void)viewDidLoad {
[super viewDidLoad];
// whisper.cpp initialization
{
// load the model
NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"ggml-base.en" ofType:@"bin"];
// check if the model exists
if (![[NSFileManager defaultManager] fileExistsAtPath:modelPath]) {
NSLog(@"Model file not found");
return;
}
NSLog(@"Loading model from %@", modelPath);
// create ggml context
struct whisper_context_params cparams = whisper_context_default_params();
#if TARGET_OS_SIMULATOR
cparams.use_gpu = false;
NSLog(@"Running on simulator, using CPU");
#endif
stateInp.ctx = whisper_init_from_file_with_params([modelPath UTF8String], cparams);
// check if the model was loaded successfully
if (stateInp.ctx == NULL) {
NSLog(@"Failed to load model");
return;
}
}
// initialize audio format and buffers
{
[self setupAudioFormat:&stateInp.dataFormat];
stateInp.n_samples = 0;
stateInp.audioBufferI16 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(int16_t));
stateInp.audioBufferF32 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(float));
// Set up audio session
NSError *error = nil;
[[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryRecord error:&error];
if (error) {
NSLog(@"Error setting audio session category: %@", error);
}
[[AVAudioSession sharedInstance] setActive:YES error:&error];
if (error) {
NSLog(@"Error activating audio session: %@", error);
}
}
stateInp.isTranscribing = false;
stateInp.isRealtime = false;
}
-(IBAction) stopCapturing {
NSLog(@"Stop capturing");
_labelStatusInp.text = @"Status: Idle";
[_buttonToggleCapture setTitle:@"Start capturing" forState:UIControlStateNormal];
[_buttonToggleCapture setBackgroundColor:[UIColor grayColor]];
stateInp.isCapturing = false;
AudioQueueStop(stateInp.queue, true);
for (int i = 0; i < NUM_BUFFERS; i++) {
AudioQueueFreeBuffer(stateInp.queue, stateInp.buffers[i]);
}
AudioQueueDispose(stateInp.queue, true);
}
- (IBAction)toggleCapture:(id)sender {
if (stateInp.isCapturing) {
// stop capturing
[self stopCapturing];
return;
}
// initiate audio capturing
NSLog(@"Start capturing");
stateInp.n_samples = 0;
stateInp.vc = (__bridge void *)(self);
OSStatus status = AudioQueueNewInput(&stateInp.dataFormat,
AudioInputCallback,
&stateInp,
CFRunLoopGetCurrent(),
kCFRunLoopCommonModes,
0,
&stateInp.queue);
if (status == 0) {
for (int i = 0; i < NUM_BUFFERS; i++) {
AudioQueueAllocateBuffer(stateInp.queue, NUM_BYTES_PER_BUFFER, &stateInp.buffers[i]);
AudioQueueEnqueueBuffer (stateInp.queue, stateInp.buffers[i], 0, NULL);
}
stateInp.isCapturing = true;
status = AudioQueueStart(stateInp.queue, NULL);
if (status == 0) {
_labelStatusInp.text = @"Status: Capturing";
[sender setTitle:@"Stop Capturing" forState:UIControlStateNormal];
[_buttonToggleCapture setBackgroundColor:[UIColor redColor]];
}
}
if (status != 0) {
[self stopCapturing];
}
}
- (IBAction)onTranscribePrepare:(id)sender {
_textviewResult.text = @"Processing - please wait ...";
if (stateInp.isRealtime) {
[self onRealtime:(id)sender];
}
if (stateInp.isCapturing) {
[self stopCapturing];
}
}
- (IBAction)onRealtime:(id)sender {
stateInp.isRealtime = !stateInp.isRealtime;
if (stateInp.isRealtime) {
[_buttonRealtime setBackgroundColor:[UIColor greenColor]];
} else {
[_buttonRealtime setBackgroundColor:[UIColor grayColor]];
}
NSLog(@"Realtime: %@", stateInp.isRealtime ? @"ON" : @"OFF");
}
- (IBAction)onTranscribe:(id)sender {
if (stateInp.isTranscribing) {
return;
}
NSLog(@"Processing %d samples", stateInp.n_samples);
stateInp.isTranscribing = true;
// dispatch the model to a background thread
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
// process captured audio
// convert I16 to F32
for (int i = 0; i < self->stateInp.n_samples; i++) {
self->stateInp.audioBufferF32[i] = (float)self->stateInp.audioBufferI16[i] / 32768.0f;
}
// run the model
struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
// get maximum number of threads on this device (max 8)
const int max_threads = MIN(8, (int)[[NSProcessInfo processInfo] processorCount]);
params.print_realtime = true;
params.print_progress = false;
params.print_timestamps = true;
params.print_special = false;
params.translate = false;
params.language = "en";
params.n_threads = max_threads;
params.offset_ms = 0;
params.no_context = true;
params.single_segment = self->stateInp.isRealtime;
params.no_timestamps = params.single_segment;
CFTimeInterval startTime = CACurrentMediaTime();
whisper_reset_timings(self->stateInp.ctx);
if (whisper_full(self->stateInp.ctx, params, self->stateInp.audioBufferF32, self->stateInp.n_samples) != 0) {
NSLog(@"Failed to run the model");
self->_textviewResult.text = @"Failed to run the model";
return;
}
whisper_print_timings(self->stateInp.ctx);
CFTimeInterval endTime = CACurrentMediaTime();
NSLog(@"\nProcessing time: %5.3f, on %d threads", endTime - startTime, params.n_threads);
// result text
NSString *result = @"";
int n_segments = whisper_full_n_segments(self->stateInp.ctx);
for (int i = 0; i < n_segments; i++) {
const char * text_cur = whisper_full_get_segment_text(self->stateInp.ctx, i);
// append the text to the result
result = [result stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
}
const float tRecording = (float)self->stateInp.n_samples / (float)self->stateInp.dataFormat.mSampleRate;
// append processing time
result = [result stringByAppendingString:[NSString stringWithFormat:@"\n\n[recording time: %5.3f s]", tRecording]];
result = [result stringByAppendingString:[NSString stringWithFormat:@" \n[processing time: %5.3f s]", endTime - startTime]];
// dispatch the result to the main thread
dispatch_async(dispatch_get_main_queue(), ^{
self->_textviewResult.text = result;
self->stateInp.isTranscribing = false;
});
});
}
//
// Callback implementation
//
void AudioInputCallback(void * inUserData,
AudioQueueRef inAQ,
AudioQueueBufferRef inBuffer,
const AudioTimeStamp * inStartTime,
UInt32 inNumberPacketDescriptions,
const AudioStreamPacketDescription * inPacketDescs)
{
StateInp * stateInp = (StateInp*)inUserData;
if (!stateInp->isCapturing) {
NSLog(@"Not capturing, ignoring audio");
return;
}
const int n = inBuffer->mAudioDataByteSize / 2;
NSLog(@"Captured %d new samples", n);
if (stateInp->n_samples + n > MAX_AUDIO_SEC*SAMPLE_RATE) {
NSLog(@"Too much audio data, ignoring");
dispatch_async(dispatch_get_main_queue(), ^{
ViewController * vc = (__bridge ViewController *)(stateInp->vc);
[vc stopCapturing];
});
return;
}
for (int i = 0; i < n; i++) {
stateInp->audioBufferI16[stateInp->n_samples + i] = ((short*)inBuffer->mAudioData)[i];
}
stateInp->n_samples += n;
// put the buffer back in the queue
AudioQueueEnqueueBuffer(stateInp->queue, inBuffer, 0, NULL);
if (stateInp->isRealtime) {
// dipatch onTranscribe() to the main thread
dispatch_async(dispatch_get_main_queue(), ^{
ViewController * vc = (__bridge ViewController *)(stateInp->vc);
[vc onTranscribe:nil];
});
}
}
@end