2022-12-23 08:56:18 +00:00
|
|
|
import Foundation
|
|
|
|
import SwiftUI
|
|
|
|
import AVFoundation
|
|
|
|
|
|
|
|
@MainActor
|
|
|
|
class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
|
|
|
@Published var isModelLoaded = false
|
|
|
|
@Published var messageLog = ""
|
|
|
|
@Published var canTranscribe = false
|
|
|
|
@Published var isRecording = false
|
|
|
|
|
|
|
|
private var whisperContext: WhisperContext?
|
|
|
|
private let recorder = Recorder()
|
|
|
|
private var recordedFile: URL? = nil
|
|
|
|
private var audioPlayer: AVAudioPlayer?
|
|
|
|
|
|
|
|
private var modelUrl: URL? {
|
2024-06-26 16:34:09 +00:00
|
|
|
Bundle.main.url(forResource: "ggml-base.en", withExtension: "bin", subdirectory: "models")
|
2022-12-23 08:56:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private var sampleUrl: URL? {
|
|
|
|
Bundle.main.url(forResource: "jfk", withExtension: "wav", subdirectory: "samples")
|
|
|
|
}
|
|
|
|
|
|
|
|
private enum LoadError: Error {
|
|
|
|
case couldNotLocateModel
|
|
|
|
}
|
|
|
|
|
|
|
|
override init() {
|
|
|
|
super.init()
|
|
|
|
do {
|
|
|
|
try loadModel()
|
|
|
|
canTranscribe = true
|
|
|
|
} catch {
|
|
|
|
print(error.localizedDescription)
|
|
|
|
messageLog += "\(error.localizedDescription)\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private func loadModel() throws {
|
|
|
|
messageLog += "Loading model...\n"
|
|
|
|
if let modelUrl {
|
|
|
|
whisperContext = try WhisperContext.createContext(path: modelUrl.path())
|
|
|
|
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
|
|
|
} else {
|
|
|
|
messageLog += "Could not locate model\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func transcribeSample() async {
|
|
|
|
if let sampleUrl {
|
|
|
|
await transcribeAudio(sampleUrl)
|
|
|
|
} else {
|
|
|
|
messageLog += "Could not locate sample\n"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private func transcribeAudio(_ url: URL) async {
|
|
|
|
if (!canTranscribe) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
guard let whisperContext else {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
canTranscribe = false
|
|
|
|
messageLog += "Reading wave samples...\n"
|
|
|
|
let data = try readAudioSamples(url)
|
|
|
|
messageLog += "Transcribing data...\n"
|
|
|
|
await whisperContext.fullTranscribe(samples: data)
|
|
|
|
let text = await whisperContext.getTranscription()
|
|
|
|
messageLog += "Done: \(text)\n"
|
|
|
|
} catch {
|
|
|
|
print(error.localizedDescription)
|
|
|
|
messageLog += "\(error.localizedDescription)\n"
|
|
|
|
}
|
|
|
|
|
|
|
|
canTranscribe = true
|
|
|
|
}
|
|
|
|
|
|
|
|
private func readAudioSamples(_ url: URL) throws -> [Float] {
|
|
|
|
stopPlayback()
|
|
|
|
try startPlayback(url)
|
|
|
|
return try decodeWaveFile(url)
|
|
|
|
}
|
|
|
|
|
|
|
|
func toggleRecord() async {
|
|
|
|
if isRecording {
|
|
|
|
await recorder.stopRecording()
|
|
|
|
isRecording = false
|
|
|
|
if let recordedFile {
|
|
|
|
await transcribeAudio(recordedFile)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
requestRecordPermission { granted in
|
|
|
|
if granted {
|
|
|
|
Task {
|
|
|
|
do {
|
|
|
|
self.stopPlayback()
|
|
|
|
let file = try FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true)
|
|
|
|
.appending(path: "output.wav")
|
|
|
|
try await self.recorder.startRecording(toOutputFile: file, delegate: self)
|
|
|
|
self.isRecording = true
|
|
|
|
self.recordedFile = file
|
|
|
|
} catch {
|
|
|
|
print(error.localizedDescription)
|
|
|
|
self.messageLog += "\(error.localizedDescription)\n"
|
|
|
|
self.isRecording = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private func requestRecordPermission(response: @escaping (Bool) -> Void) {
|
|
|
|
#if os(macOS)
|
|
|
|
response(true)
|
|
|
|
#else
|
|
|
|
AVAudioSession.sharedInstance().requestRecordPermission { granted in
|
|
|
|
response(granted)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
private func startPlayback(_ url: URL) throws {
|
|
|
|
audioPlayer = try AVAudioPlayer(contentsOf: url)
|
|
|
|
audioPlayer?.play()
|
|
|
|
}
|
|
|
|
|
|
|
|
private func stopPlayback() {
|
|
|
|
audioPlayer?.stop()
|
|
|
|
audioPlayer = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// MARK: AVAudioRecorderDelegate
|
|
|
|
|
|
|
|
nonisolated func audioRecorderEncodeErrorDidOccur(_ recorder: AVAudioRecorder, error: Error?) {
|
|
|
|
if let error {
|
|
|
|
Task {
|
|
|
|
await handleRecError(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private func handleRecError(_ error: Error) {
|
|
|
|
print(error.localizedDescription)
|
|
|
|
messageLog += "\(error.localizedDescription)\n"
|
|
|
|
isRecording = false
|
|
|
|
}
|
|
|
|
|
|
|
|
nonisolated func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
|
|
|
|
Task {
|
|
|
|
await onDidFinishRecording()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private func onDidFinishRecording() {
|
|
|
|
isRecording = false
|
|
|
|
}
|
|
|
|
}
|