mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-12 20:18:08 +00:00
examples : add whisper.swiftui demo app (#308)
* Add SwiftUI demo project. * Add -DGGML_USE_ACCELERATE
This commit is contained in:
@ -0,0 +1,162 @@
|
||||
import Foundation
|
||||
import SwiftUI
|
||||
import AVFoundation
|
||||
|
||||
@MainActor
|
||||
class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
||||
@Published var isModelLoaded = false
|
||||
@Published var messageLog = ""
|
||||
@Published var canTranscribe = false
|
||||
@Published var isRecording = false
|
||||
|
||||
private var whisperContext: WhisperContext?
|
||||
private let recorder = Recorder()
|
||||
private var recordedFile: URL? = nil
|
||||
private var audioPlayer: AVAudioPlayer?
|
||||
|
||||
private var modelUrl: URL? {
|
||||
Bundle.main.url(forResource: "ggml-tiny.en", withExtension: "bin", subdirectory: "models")
|
||||
}
|
||||
|
||||
private var sampleUrl: URL? {
|
||||
Bundle.main.url(forResource: "jfk", withExtension: "wav", subdirectory: "samples")
|
||||
}
|
||||
|
||||
private enum LoadError: Error {
|
||||
case couldNotLocateModel
|
||||
}
|
||||
|
||||
override init() {
|
||||
super.init()
|
||||
do {
|
||||
try loadModel()
|
||||
canTranscribe = true
|
||||
} catch {
|
||||
print(error.localizedDescription)
|
||||
messageLog += "\(error.localizedDescription)\n"
|
||||
}
|
||||
}
|
||||
|
||||
private func loadModel() throws {
|
||||
messageLog += "Loading model...\n"
|
||||
if let modelUrl {
|
||||
whisperContext = try WhisperContext.createContext(path: modelUrl.path())
|
||||
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
||||
} else {
|
||||
messageLog += "Could not locate model\n"
|
||||
}
|
||||
}
|
||||
|
||||
func transcribeSample() async {
|
||||
if let sampleUrl {
|
||||
await transcribeAudio(sampleUrl)
|
||||
} else {
|
||||
messageLog += "Could not locate sample\n"
|
||||
}
|
||||
}
|
||||
|
||||
private func transcribeAudio(_ url: URL) async {
|
||||
if (!canTranscribe) {
|
||||
return
|
||||
}
|
||||
guard let whisperContext else {
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
canTranscribe = false
|
||||
messageLog += "Reading wave samples...\n"
|
||||
let data = try readAudioSamples(url)
|
||||
messageLog += "Transcribing data...\n"
|
||||
await whisperContext.fullTranscribe(samples: data)
|
||||
let text = await whisperContext.getTranscription()
|
||||
messageLog += "Done: \(text)\n"
|
||||
} catch {
|
||||
print(error.localizedDescription)
|
||||
messageLog += "\(error.localizedDescription)\n"
|
||||
}
|
||||
|
||||
canTranscribe = true
|
||||
}
|
||||
|
||||
private func readAudioSamples(_ url: URL) throws -> [Float] {
|
||||
stopPlayback()
|
||||
try startPlayback(url)
|
||||
return try decodeWaveFile(url)
|
||||
}
|
||||
|
||||
func toggleRecord() async {
|
||||
if isRecording {
|
||||
await recorder.stopRecording()
|
||||
isRecording = false
|
||||
if let recordedFile {
|
||||
await transcribeAudio(recordedFile)
|
||||
}
|
||||
} else {
|
||||
requestRecordPermission { granted in
|
||||
if granted {
|
||||
Task {
|
||||
do {
|
||||
self.stopPlayback()
|
||||
let file = try FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true)
|
||||
.appending(path: "output.wav")
|
||||
try await self.recorder.startRecording(toOutputFile: file, delegate: self)
|
||||
self.isRecording = true
|
||||
self.recordedFile = file
|
||||
} catch {
|
||||
print(error.localizedDescription)
|
||||
self.messageLog += "\(error.localizedDescription)\n"
|
||||
self.isRecording = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func requestRecordPermission(response: @escaping (Bool) -> Void) {
|
||||
#if os(macOS)
|
||||
response(true)
|
||||
#else
|
||||
AVAudioSession.sharedInstance().requestRecordPermission { granted in
|
||||
response(granted)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
private func startPlayback(_ url: URL) throws {
|
||||
audioPlayer = try AVAudioPlayer(contentsOf: url)
|
||||
audioPlayer?.play()
|
||||
}
|
||||
|
||||
private func stopPlayback() {
|
||||
audioPlayer?.stop()
|
||||
audioPlayer = nil
|
||||
}
|
||||
|
||||
// MARK: AVAudioRecorderDelegate
|
||||
|
||||
nonisolated func audioRecorderEncodeErrorDidOccur(_ recorder: AVAudioRecorder, error: Error?) {
|
||||
if let error {
|
||||
Task {
|
||||
await handleRecError(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func handleRecError(_ error: Error) {
|
||||
print(error.localizedDescription)
|
||||
messageLog += "\(error.localizedDescription)\n"
|
||||
isRecording = false
|
||||
}
|
||||
|
||||
nonisolated func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
|
||||
Task {
|
||||
await onDidFinishRecording()
|
||||
}
|
||||
}
|
||||
|
||||
private func onDidFinishRecording() {
|
||||
isRecording = false
|
||||
}
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
{
|
||||
"colors" : [
|
||||
{
|
||||
"idiom" : "universal"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
{
|
||||
"images" : [
|
||||
{
|
||||
"idiom" : "universal",
|
||||
"platform" : "ios",
|
||||
"size" : "1024x1024"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "16x16"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "16x16"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "32x32"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "32x32"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "128x128"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "128x128"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "256x256"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "256x256"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "1x",
|
||||
"size" : "512x512"
|
||||
},
|
||||
{
|
||||
"idiom" : "mac",
|
||||
"scale" : "2x",
|
||||
"size" : "512x512"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
{
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
{
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.app-sandbox</key>
|
||||
<true/>
|
||||
<key>com.apple.security.device.audio-input</key>
|
||||
<true/>
|
||||
<key>com.apple.security.files.user-selected.read-only</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,43 @@
|
||||
import SwiftUI
|
||||
import AVFoundation
|
||||
|
||||
struct ContentView: View {
|
||||
@StateObject var whisperState = WhisperState()
|
||||
|
||||
var body: some View {
|
||||
NavigationStack {
|
||||
VStack {
|
||||
HStack {
|
||||
Button("Transcribe", action: {
|
||||
Task {
|
||||
await whisperState.transcribeSample()
|
||||
}
|
||||
})
|
||||
.buttonStyle(.bordered)
|
||||
.disabled(!whisperState.canTranscribe)
|
||||
|
||||
Button(whisperState.isRecording ? "Stop recording" : "Start recording", action: {
|
||||
Task {
|
||||
await whisperState.toggleRecord()
|
||||
}
|
||||
})
|
||||
.buttonStyle(.bordered)
|
||||
.disabled(!whisperState.canTranscribe)
|
||||
}
|
||||
|
||||
ScrollView {
|
||||
Text(verbatim: whisperState.messageLog)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
}
|
||||
}
|
||||
.navigationTitle("Whisper SwiftUI Demo")
|
||||
.padding()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ContentView_Previews: PreviewProvider {
|
||||
static var previews: some View {
|
||||
ContentView()
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
|
||||
actor Recorder {
|
||||
private var recorder: AVAudioRecorder?
|
||||
|
||||
enum RecorderError: Error {
|
||||
case couldNotStartRecording
|
||||
}
|
||||
|
||||
func startRecording(toOutputFile url: URL, delegate: AVAudioRecorderDelegate?) throws {
|
||||
let recordSettings: [String : Any] = [
|
||||
AVFormatIDKey: Int(kAudioFormatLinearPCM),
|
||||
AVSampleRateKey: 16000.0,
|
||||
AVNumberOfChannelsKey: 1,
|
||||
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
|
||||
]
|
||||
#if !os(macOS)
|
||||
let session = AVAudioSession.sharedInstance()
|
||||
try session.setCategory(.playAndRecord, mode: .default)
|
||||
#endif
|
||||
let recorder = try AVAudioRecorder(url: url, settings: recordSettings)
|
||||
recorder.delegate = delegate
|
||||
if recorder.record() == false {
|
||||
print("Could not start recording")
|
||||
throw RecorderError.couldNotStartRecording
|
||||
}
|
||||
self.recorder = recorder
|
||||
}
|
||||
|
||||
func stopRecording() {
|
||||
recorder?.stop()
|
||||
recorder = nil
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
import Foundation
|
||||
|
||||
func decodeWaveFile(_ url: URL) throws -> [Float] {
|
||||
let data = try Data(contentsOf: url)
|
||||
let floats = stride(from: 44, to: data.count, by: 2).map {
|
||||
return data[$0..<$0 + 2].withUnsafeBytes {
|
||||
let short = Int16(littleEndian: $0.load(as: Int16.self))
|
||||
return max(-1.0, min(Float(short) / 32767.0, 1.0))
|
||||
}
|
||||
}
|
||||
return floats
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
import SwiftUI
|
||||
|
||||
@main
|
||||
struct WhisperCppDemoApp: App {
|
||||
var body: some Scene {
|
||||
WindowGroup {
|
||||
ContentView()
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user