whisper : experiments with Flash Attention in the decoder

ggml : fix bug in new soft max computation
cmake : add -Wno-unused-function + update whisper.js
2025-06-24 17:15:19 +00:00 · 2023-01-07 21:00:51 +02:00 · 2023-01-07 21:00:07 +02:00 · 2023-01-07 20:18:34 +02:00 · 2023-01-07 19:48:56 +02:00 · 2023-01-07 19:20:18 +02:00
78 changed files with 4609 additions and 1505 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -235,3 +235,33 @@ jobs:
              with:
                name: whisper-blas-bin-${{ matrix.arch }}
                path: build/bin/${{ matrix.build }}
+
+    emscripten:
+        runs-on: ubuntu-latest
+
+        strategy:
+            matrix:
+                build: [Release]
+
+        steps:
+            - name: Clone
+              uses: actions/checkout@v1
+
+            - name: Dependencies
+              run: |
+                wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
+                tar -xvf master.tar.gz
+                emsdk-master/emsdk update
+                emsdk-master/emsdk install latest
+                emsdk-master/emsdk activate latest
+
+            - name: Configure
+              run: echo "tmp"
+
+            - name: Build
+              run: |
+                pushd emsdk-master
+                source ./emsdk_env.sh
+                popd
+                emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+                make
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,14 +2,15 @@ cmake_minimum_required (VERSION 3.0)

 project(whisper.cpp VERSION 1.0.4)

-set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
+# Add path to modules
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
+
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
-set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
    set(WHISPER_STANDALONE ON)
-    include(cmake/GitVars.cmake)
-    include(cmake/BuildTypes.cmake)
+    include(GitVars)
+    include(BuildTypes)

    # configure project version
    if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
@ -52,6 +53,7 @@ if (APPLE)
    option(WHISPER_NO_ACCELERATE       "whisper: disable Accelerate framework" OFF)
    option(WHISPER_NO_AVX              "whisper: disable AVX" OFF)
    option(WHISPER_NO_AVX2             "whisper: disable AVX2" OFF)
+    option(WHISPER_NO_FMA              "whisper: disable FMA" OFF)
 else()
    option(WHISPER_SUPPORT_OPENBLAS    "whisper: support for OpenBLAS" OFF)
 endif()
@ -82,9 +84,6 @@ endif()

 # dependencies

-set(CMAKE_C_STANDARD   11)
-set(CMAKE_CXX_STANDARD 11)
-
 find_package(Threads REQUIRED)

 # on APPLE - include Accelerate framework
@ -131,6 +130,13 @@ if (WHISPER_ALL_WARNINGS)
            -Wcast-qual                     \
            -Wstrict-prototypes             \
            -Wpointer-arith                 \
+            -Wno-unused-function            \
+        ")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
+            -Wall                           \
+            -Wextra                         \
+            -Wpedantic                      \
+            -Wcast-qual                     \
        ")
    else()
        # todo : msvc
@ -151,6 +157,7 @@ else()
    if (MSVC)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
    else()
        if (EMSCRIPTEN)
            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread")
@ -162,7 +169,10 @@ else()
            if(NOT WHISPER_NO_AVX2)
                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
            endif()
-            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma -mf16c")
+            if(NOT WHISPER_NO_FMA)
+                set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
+            endif()
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
        endif()
    endif()
 endif()
@ -178,10 +188,14 @@ endif()
 set(TARGET whisper)

 add_library(${TARGET}
+    ggml.h
    ggml.c
+    whisper.h
    whisper.cpp
    )

+include(DefaultTargetOptions)
+
 target_include_directories(${TARGET} PUBLIC
    .
    )
@ -215,6 +229,7 @@ target_compile_definitions(${TARGET} PUBLIC
 install(TARGETS ${TARGET}
    LIBRARY DESTINATION lib
    ARCHIVE DESTINATION lib/static
+    RUNTIME DESTINATION bin
    )

 #
--- a/35
+++ b/35
@ -10,6 +10,9 @@ ifndef UNAME_M
 UNAME_M := $(shell uname -m)
 endif

+CCV := $(shell $(CC) --version | head -n 1)
+CXXV := $(shell $(CXX) --version | head -n 1)
+
 # Mac OS + Arm can report x86_64
 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
 ifeq ($(UNAME_S),Darwin)
@ -53,10 +56,13 @@ endif
 # Architecture specific
 # TODO: probably these flags need to be tweaked on some architectures
 #       feel free to update the Makefile for your architecture and send a pull request or issue
-ifeq ($(UNAME_M),x86_64)
+ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
 	ifeq ($(UNAME_S),Darwin)
-		CFLAGS += -mfma -mf16c
+		CFLAGS += -mf16c
 		AVX1_M := $(shell sysctl machdep.cpu.features)
+		ifneq (,$(findstring FMA,$(AVX1_M)))
+			CFLAGS += -mfma
+		endif
 		ifneq (,$(findstring AVX1.0,$(AVX1_M)))
 			CFLAGS += -mavx
 		endif
@ -81,6 +87,10 @@ ifeq ($(UNAME_M),x86_64)
 		ifneq (,$(findstring f16c,$(F16C_M)))
 			CFLAGS += -mf16c
 		endif
+		SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
+		ifneq (,$(findstring sse3,$(SSE3_M)))
+			CFLAGS += -msse3
+		endif
 	else ifeq ($(UNAME_S),Haiku)
 		AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
 		ifneq (,$(findstring avx,$(AVX1_M)))
@ -105,6 +115,12 @@ endif
 ifeq ($(UNAME_M),amd64)
 	CFLAGS += -mavx -mavx2 -mfma -mf16c
 endif
+ifeq ($(UNAME_M),ppc64le)
+	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
+	ifneq (,$(findstring POWER9,$(POWER9_M)))
+		CFLAGS += -mpower9-vector
+	endif
+endif
 ifndef WHISPER_NO_ACCELERATE
 	# Mac M1 - include Accelerate framework
 	ifeq ($(UNAME_S),Darwin)
@ -135,6 +151,21 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
 	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
 endif

+#
+# Print build information
+#
+
+$(info I whisper.cpp build info: )
+$(info I UNAME_S:  $(UNAME_S))
+$(info I UNAME_P:  $(UNAME_P))
+$(info I UNAME_M:  $(UNAME_M))
+$(info I CFLAGS:   $(CFLAGS))
+$(info I CXXFLAGS: $(CXXFLAGS))
+$(info I LDFLAGS:  $(LDFLAGS))
+$(info I CC:       $(CCV))
+$(info I CXX:      $(CXXV))
+$(info )
+
 default: main

 #
--- a/README.md
+++ b/README.md
@ -11,6 +11,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - Plain C/C++ implementation without dependencies
 - Apple silicon first-class citizen - optimized via Arm Neon and Accelerate framework
 - AVX intrinsics support for x86 architectures
+- VSX intrinsics support for POWER architectures
 - Mixed F16 / F32 precision
 - Low memory usage (Flash Attention + Flash Forward)
 - Zero memory allocations at runtime
@ -447,12 +448,13 @@ or manually from here:
 For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README
 in [models](models).

-## Bindings
+## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)

- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs)
- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm)
- [X] Javascript: [bindings/javascript](bindings/javascript)
- [ ] Python: soon
+- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
+- [X] Javascript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
+- [X] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
+- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
+- [ ] Python: soon | [WIP](https://github.com/ggerganov/whisper.cpp/issues/9)

 ## Examples

@ -467,6 +469,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
 | [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
 | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
+| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
 | [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
 | [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
 | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
--- a/bindings/go/.gitignore
+++ b/bindings/go/.gitignore
@ -0,0 +1,2 @@
+build
+models
--- a/bindings/go/LICENSE
+++ b/bindings/go/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 David Thorpe
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/bindings/go/Makefile
+++ b/bindings/go/Makefile
@ -0,0 +1,38 @@
+BUILD_DIR := build
+MODELS_DIR := models
+EXAMPLES_DIR := $(wildcard examples/*)
+INCLUDE_PATH := $(abspath ../..)
+LIBRARY_PATH := $(abspath ../..)
+
+all: clean whisper examples
+
+whisper: mkdir
+	@echo Build whisper
+	@${MAKE} -C ../.. libwhisper.a
+
+test: model-small whisper modtidy
+	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
+	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
+
+examples: $(EXAMPLES_DIR)
+
+model-small: mkdir examples/go-model-download
+	@${BUILD_DIR}/go-model-download -out models ggml-small.en.bin
+
+$(EXAMPLES_DIR): mkdir whisper modtidy
+	@echo Build example $(notdir $@)
+	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
+
+mkdir:
+	@echo Mkdir ${BUILD_DIR}
+	@install -d ${BUILD_DIR}
+	@echo Mkdir ${MODELS_DIR}
+	@install -d ${MODELS_DIR}
+
+modtidy:
+	@go mod tidy
+
+clean: 
+	@echo Clean
+	@rm -fr $(BUILD_DIR)
+	@go clean
--- a/bindings/go/README.md
+++ b/bindings/go/README.md
@ -0,0 +1,100 @@
+# Go bindings for Whisper
+
+This package provides Go bindings for whisper.cpp. They have been tested on:
+
+  * Darwin (OS X) 12.6 on x64_64
+  * Debian Linux on arm64
+  * Fedora Linux on x86_64
+
+The "low level" bindings are in the `bindings/go` directory and there is a more
+Go-style package in the `bindings/go/pkg/whisper` directory. The most simple usage
+is as follows:
+
+```go
+import (
+	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+)
+
+func main() {
+	var modelpath string // Path to the model
+	var samples []float32 // Samples to process
+
+	// Load the model
+	model, err := whisper.New(modelpath)
+	if err != nil {
+		panic(err)
+	}
+	defer model.Close()
+
+	// Process samples
+	context, err := model.NewContext()
+	if err != nil {
+		panic(err)
+	}
+	if err := context.Process(samples, nil); err != nil {
+		return err
+	}
+
+	// Print out the results
+	for {
+		segment, err := context.NextSegment()
+		if err != nil {
+			break
+		}
+		fmt.Printf("[%6s->%6s] %s\n", segment.Start, segment.End, segment.Text)
+	}
+}
+```
+
+## Building & Testing
+
+In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
+
+```bash
+git clone https://github.com/ggerganov/whisper.cpp.git
+cd whisper.cpp/bindings/go
+make test
+```
+
+This will compile a static `libwhisper.a` in a `build` folder, download a model file, then run the tests. To build the examples:
+
+```bash
+make examples
+```
+
+The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
+
+```bash
+./build/go-model-download -out models
+```
+
+And you can then test a model against samples with the following command:
+
+```bash
+./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav 
+```
+
+## Using the bindings
+
+To use the bindings in your own software,
+
+  1. Import `github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper` (or `github.com/ggerganov/whisper.cpp/bindings/go` into your package;
+  2. Compile `libwhisper.a` (you can use `make whisper` in the `bindings/go` directory);
+  3. Link your go binary against whisper by setting the environment variables `C_INCLUDE_PATH` and `LIBRARY_PATH`
+     to point to the `whisper.h` file directory and `libwhisper.a` file directory respectively.
+
+Look at the `Makefile` in the `bindings/go` directory for an example.
+
+The API Documentation:
+
+  * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go
+  * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper
+
+Getting help:
+
+  * Follow the discussion for the go bindings [here](https://github.com/ggerganov/whisper.cpp/discussions/312)
+
+## License
+
+The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
+
--- a/bindings/go/doc.go
+++ b/bindings/go/doc.go
@ -0,0 +1,5 @@
+/*
+github.com/ggerganov/whisper.cpp/bindings/go
+provides a speech-to-text service bindings for the Go programming language.
+*/
+package whisper
--- a/bindings/go/examples/go-model-download/context.go
+++ b/bindings/go/examples/go-model-download/context.go
@ -0,0 +1,30 @@
+package main
+
+import (
+	"context"
+	"os"
+	"os/signal"
+)
+
+// ContextForSignal returns a context object which is cancelled when a signal
+// is received. It returns nil if no signal parameter is provided
+func ContextForSignal(signals ...os.Signal) context.Context {
+	if len(signals) == 0 {
+		return nil
+	}
+
+	ch := make(chan os.Signal)
+	ctx, cancel := context.WithCancel(context.Background())
+
+	// Send message on channel when signal received
+	signal.Notify(ch, signals...)
+
+	// When any signal received, call cancel
+	go func() {
+		<-ch
+		cancel()
+	}()
+
+	// Return success
+	return ctx
+}
--- a/bindings/go/examples/go-model-download/main.go
+++ b/bindings/go/examples/go-model-download/main.go
@ -0,0 +1,208 @@
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"syscall"
+	"time"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// CONSTANTS
+
+const (
+	srcUrl  = "https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main" // The location of the models
+	srcExt  = ".bin"                                                               // Filename extension
+	bufSize = 1024 * 64                                                            // Size of the buffer used for downloading the model
+)
+
+var (
+	// The models which will be downloaded, if no model is specified as an argument
+	modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
+)
+
+var (
+	// The output folder. When not set, use current working directory.
+	flagOut = flag.String("out", "", "Output folder")
+
+	// HTTP timeout parameter - will timeout if takes longer than this to download a model
+	flagTimeout = flag.Duration("timeout", 30*time.Minute, "HTTP timeout")
+
+	// Quiet parameter - will not print progress if set
+	flagQuiet = flag.Bool("quiet", false, "Quiet mode")
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// MAIN
+
+func main() {
+	flag.Usage = func() {
+		name := filepath.Base(flag.CommandLine.Name())
+		fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
+		flag.PrintDefaults()
+	}
+	flag.Parse()
+
+	// Get output path
+	out, err := GetOut()
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "Error:", err)
+		os.Exit(-1)
+	}
+
+	// Create context which quits on SIGINT or SIGQUIT
+	ctx := ContextForSignal(os.Interrupt, syscall.SIGQUIT)
+
+	// Progress filehandle
+	progress := os.Stdout
+	if *flagQuiet {
+		progress, err = os.Open(os.DevNull)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, "Error:", err)
+			os.Exit(-1)
+		}
+		defer progress.Close()
+	}
+
+	// Download models - exit on error or interrupt
+	for _, model := range GetModels() {
+		url, err := URLForModel(model)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, "Error:", err)
+			continue
+		} else if path, err := Download(ctx, progress, url, out); err == nil || err == io.EOF {
+			continue
+		} else if err == context.Canceled {
+			os.Remove(path)
+			fmt.Fprintln(progress, "\nInterrupted")
+			break
+		} else if err == context.DeadlineExceeded {
+			os.Remove(path)
+			fmt.Fprintln(progress, "Timeout downloading model")
+			continue
+		} else {
+			os.Remove(path)
+			fmt.Fprintln(os.Stderr, "Error:", err)
+			break
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC METHODS
+
+// GetOut returns the path to the output directory
+func GetOut() (string, error) {
+	if *flagOut == "" {
+		return os.Getwd()
+	}
+	if info, err := os.Stat(*flagOut); err != nil {
+		return "", err
+	} else if !info.IsDir() {
+		return "", fmt.Errorf("not a directory: %s", info.Name())
+	} else {
+		return *flagOut, nil
+	}
+}
+
+// GetModels returns the list of models to download
+func GetModels() []string {
+	if flag.NArg() == 0 {
+		return modelNames
+	} else {
+		return flag.Args()
+	}
+}
+
+// URLForModel returns the URL for the given model on huggingface.co
+func URLForModel(model string) (string, error) {
+	if filepath.Ext(model) != srcExt {
+		model += srcExt
+	}
+	url, err := url.Parse(srcUrl)
+	if err != nil {
+		return "", err
+	} else {
+		url.Path = filepath.Join(url.Path, model)
+	}
+	return url.String(), nil
+}
+
+// Download downloads the model from the given URL to the given output directory
+func Download(ctx context.Context, p io.Writer, model, out string) (string, error) {
+	// Create HTTP client
+	client := http.Client{
+		Timeout: *flagTimeout,
+	}
+
+	// Initiate the download
+	req, err := http.NewRequest("GET", model, nil)
+	if err != nil {
+		return "", err
+	}
+	resp, err := client.Do(req)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("%s: %s", model, resp.Status)
+	}
+
+	// If output file exists and is the same size as the model, skip
+	path := filepath.Join(out, filepath.Base(model))
+	if info, err := os.Stat(path); err == nil && info.Size() == resp.ContentLength {
+		fmt.Fprintln(p, "Skipping", model, "as it already exists")
+		return "", nil
+	}
+
+	// Create file
+	w, err := os.Create(path)
+	if err != nil {
+		return "", err
+	}
+	defer w.Close()
+
+	// Report
+	fmt.Fprintln(p, "Downloading", model, "to", out)
+
+	// Progressively download the model
+	data := make([]byte, bufSize)
+	count, pct := int64(0), int64(0)
+	ticker := time.NewTicker(5 * time.Second)
+	for {
+		select {
+		case <-ctx.Done():
+			// Cancelled, return error
+			return path, ctx.Err()
+		case <-ticker.C:
+			pct = DownloadReport(p, pct, count, resp.ContentLength)
+		default:
+			// Read body
+			n, err := resp.Body.Read(data)
+			if err != nil {
+				DownloadReport(p, pct, count, resp.ContentLength)
+				return path, err
+			} else if m, err := w.Write(data[:n]); err != nil {
+				return path, err
+			} else {
+				count += int64(m)
+			}
+		}
+	}
+}
+
+// Report periodically reports the download progress when percentage changes
+func DownloadReport(w io.Writer, pct, count, total int64) int64 {
+	pct_ := count * 100 / total
+	if pct_ > pct {
+		fmt.Fprintf(w, "  ...%d MB written (%d%%)\n", count/1e6, pct_)
+	}
+	return pct_
+}
--- a/bindings/go/examples/go-whisper/flags.go
+++ b/bindings/go/examples/go-whisper/flags.go
@ -0,0 +1,61 @@
+package main
+
+import (
+	"flag"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+type Flags struct {
+	*flag.FlagSet
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// LIFECYCLE
+
+func NewFlags(name string, args []string) (*Flags, error) {
+	flags := &Flags{
+		FlagSet: flag.NewFlagSet(name, flag.ContinueOnError),
+	}
+
+	// Register the command line arguments
+	registerFlags(flags)
+
+	// Parse command line
+	if err := flags.Parse(args); err != nil {
+		return nil, err
+	}
+
+	// Return success
+	return flags, nil
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC METHODS
+
+func (flags *Flags) GetModel() string {
+	return flags.Lookup("model").Value.String()
+}
+
+func (flags *Flags) GetLanguage() string {
+	return flags.Lookup("language").Value.String()
+}
+
+func (flags *Flags) IsSpeedup() bool {
+	return flags.Lookup("speedup").Value.String() == "true"
+}
+
+func (flags *Flags) IsTokens() bool {
+	return flags.Lookup("tokens").Value.String() == "true"
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PRIVATE METHODS
+
+func registerFlags(flag *Flags) {
+	flag.String("model", "", "Path to the model file")
+	flag.String("language", "", "Language")
+	flag.Bool("speedup", false, "Enable speedup")
+	flag.Bool("tokens", false, "Display tokens")
+}
--- a/bindings/go/examples/go-whisper/main.go
+++ b/bindings/go/examples/go-whisper/main.go
@ -0,0 +1,44 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	// Packages
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+)
+
+func main() {
+	flags, err := NewFlags(filepath.Base(os.Args[0]), os.Args[1:])
+	if err == flag.ErrHelp {
+		os.Exit(0)
+	} else if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	} else if flags.GetModel() == "" {
+		fmt.Fprintln(os.Stderr, "Use -model flag to specify which model file to use")
+		os.Exit(1)
+	} else if flags.NArg() == 0 {
+		fmt.Fprintln(os.Stderr, "No input files specified")
+		os.Exit(1)
+	}
+
+	// Load model
+	model, err := whisper.New(flags.GetModel())
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+	defer model.Close()
+
+	// Process files
+	for _, filename := range flags.Args() {
+		fmt.Println("Processing", filename)
+		if err := Process(model, filename, flags.GetLanguage(), flags.IsSpeedup(), flags.IsTokens()); err != nil {
+			fmt.Fprintln(os.Stderr, err)
+			continue
+		}
+	}
+}
--- a/bindings/go/examples/go-whisper/process.go
+++ b/bindings/go/examples/go-whisper/process.go
@ -0,0 +1,80 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"time"
+
+	// Package imports
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	wav "github.com/go-audio/wav"
+)
+
+func Process(model whisper.Model, path string, lang string, speedup, tokens bool) error {
+	var data []float32
+
+	// Create processing context
+	context, err := model.NewContext()
+	if err != nil {
+		return err
+	}
+
+	// Open the file
+	fh, err := os.Open(path)
+	if err != nil {
+		return err
+	}
+	defer fh.Close()
+
+	// Decode the WAV file
+	dec := wav.NewDecoder(fh)
+	if buf, err := dec.FullPCMBuffer(); err != nil {
+		return err
+	} else if dec.SampleRate != whisper.SampleRate {
+		return fmt.Errorf("unsupported sample rate: %d", dec.SampleRate)
+	} else if dec.NumChans != 1 {
+		return fmt.Errorf("unsupported number of channels: %d", dec.NumChans)
+	} else {
+		data = buf.AsFloat32Buffer().Data
+	}
+
+	// Set the parameters
+	var cb whisper.SegmentCallback
+	if lang != "" {
+		if err := context.SetLanguage(lang); err != nil {
+			return err
+		}
+	}
+	if speedup {
+		context.SetSpeedup(true)
+	}
+	if tokens {
+		cb = func(segment whisper.Segment) {
+			fmt.Printf("%02d [%6s->%6s] ", segment.Num, segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
+			for _, token := range segment.Tokens {
+				fmt.Printf("%q ", token.Text)
+			}
+			fmt.Println("")
+		}
+	}
+
+	// Process the data
+	if err := context.Process(data, cb); err != nil {
+		return err
+	}
+
+	// Print out the results
+	for {
+		segment, err := context.NextSegment()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			return err
+		}
+		fmt.Printf("[%6s->%6s] %s\n", segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond), segment.Text)
+	}
+
+	// Return success
+	return nil
+}
--- a/bindings/go/go.mod
+++ b/bindings/go/go.mod
@ -0,0 +1,16 @@
+module github.com/ggerganov/whisper.cpp/bindings/go
+
+go 1.19
+
+require (
+	github.com/go-audio/wav v1.1.0
+	github.com/stretchr/testify v1.8.1
+)
+
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/go-audio/audio v1.0.0 // indirect
+	github.com/go-audio/riff v1.0.0 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
--- a/bindings/go/go.sum
+++ b/bindings/go/go.sum
@ -0,0 +1,23 @@
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
+github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
+github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
+github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
+github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
+github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/bindings/go/params.go
+++ b/bindings/go/params.go
@ -0,0 +1,131 @@
+package whisper
+
+import (
+	"fmt"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// CGO
+
+/*
+#include <whisper.h>
+*/
+import "C"
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC METHODS
+
+func (p *Params) SetTranslate(v bool) {
+	p.translate = toBool(v)
+}
+
+func (p *Params) SetNoContext(v bool) {
+	p.no_context = toBool(v)
+}
+
+func (p *Params) SetSingleSegment(v bool) {
+	p.single_segment = toBool(v)
+}
+
+func (p *Params) SetPrintSpecial(v bool) {
+	p.print_special = toBool(v)
+}
+
+func (p *Params) SetPrintProgress(v bool) {
+	p.print_progress = toBool(v)
+}
+
+func (p *Params) SetPrintRealtime(v bool) {
+	p.print_realtime = toBool(v)
+}
+
+func (p *Params) SetPrintTimestamps(v bool) {
+	p.print_timestamps = toBool(v)
+}
+
+func (p *Params) SetSpeedup(v bool) {
+	p.speed_up = toBool(v)
+}
+
+func (p *Params) SetLanguage(lang int) error {
+	str := C.whisper_lang_str(C.int(lang))
+	if str == nil {
+		return ErrInvalidLanguage
+	} else {
+		p.language = str
+	}
+	return nil
+}
+
+func (p *Params) Language() int {
+	if p.language == nil {
+		return -1
+	}
+	return int(C.whisper_lang_id(p.language))
+}
+
+func (p *Params) SetThreads(threads int) {
+	p.n_threads = C.int(threads)
+}
+
+func (p *Params) SetOffset(offset_ms int) {
+	p.offset_ms = C.int(offset_ms)
+}
+
+func (p *Params) SetDuration(duration_ms int) {
+	p.duration_ms = C.int(duration_ms)
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PRIVATE METHODS
+
+func toBool(v bool) C.bool {
+	if v {
+		return C.bool(true)
+	}
+	return C.bool(false)
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// STRINGIFY
+
+func (p *Params) String() string {
+	str := "<whisper.params"
+	str += fmt.Sprintf(" strategy=%v", p.strategy)
+	str += fmt.Sprintf(" n_threads=%d", p.n_threads)
+	if p.language != nil {
+		str += fmt.Sprintf(" language=%s", C.GoString(p.language))
+	}
+	str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
+	str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
+	str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
+	if p.translate {
+		str += " translate"
+	}
+	if p.no_context {
+		str += " no_context"
+	}
+	if p.single_segment {
+		str += " single_segment"
+	}
+	if p.print_special {
+		str += " print_special"
+	}
+	if p.print_progress {
+		str += " print_progress"
+	}
+	if p.print_realtime {
+		str += " print_realtime"
+	}
+	if p.print_timestamps {
+		str += " print_timestamps"
+	}
+	if p.token_timestamps {
+		str += " token_timestamps"
+	}
+	if p.speed_up {
+		str += " speed_up"
+	}
+
+	return str + ">"
+}
--- a/bindings/go/pkg/whisper/consts.go
+++ b/bindings/go/pkg/whisper/consts.go
@ -0,0 +1,27 @@
+package whisper
+
+import (
+	"errors"
+
+	// Bindings
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// ERRORS
+
+var (
+	ErrUnableToLoadModel   = errors.New("unable to load model")
+	ErrInternalAppError    = errors.New("internal application error")
+	ErrProcessingFailed    = errors.New("processing failed")
+	ErrUnsupportedLanguage = errors.New("unsupported language")
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// CONSTANTS
+
+// SampleRate is the sample rate of the audio data.
+const SampleRate = whisper.SampleRate
+
+// SampleBits is the number of bytes per sample.
+const SampleBits = whisper.SampleBits
--- a/bindings/go/pkg/whisper/context.go
+++ b/bindings/go/pkg/whisper/context.go
@ -0,0 +1,145 @@
+package whisper
+
+import (
+	"io"
+	"strings"
+	"time"
+
+	// Bindings
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+type context struct {
+	n      int
+	model  *model
+	params whisper.Params
+}
+
+// Make sure context adheres to the interface
+var _ Context = (*context)(nil)
+
+///////////////////////////////////////////////////////////////////////////////
+// LIFECYCLE
+
+func NewContext(model *model, params whisper.Params) (Context, error) {
+	context := new(context)
+	context.model = model
+	context.params = params
+
+	// Return success
+	return context, nil
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC METHODS
+
+// Set the language to use for speech recognition.
+func (context *context) SetLanguage(lang string) error {
+	if context.model.ctx == nil {
+		return ErrInternalAppError
+	}
+	if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
+		return ErrUnsupportedLanguage
+	} else if err := context.params.SetLanguage(id); err != nil {
+		return err
+	}
+	// Return success
+	return nil
+}
+
+// Get language
+func (context *context) Language() string {
+	return whisper.Whisper_lang_str(context.params.Language())
+}
+
+// Set speedup flag
+func (context *context) SetSpeedup(v bool) {
+	context.params.SetSpeedup(v)
+}
+
+// Process new sample data and return any errors
+func (context *context) Process(data []float32, cb SegmentCallback) error {
+	if context.model.ctx == nil {
+		return ErrInternalAppError
+	}
+	// If the callback is defined then we force on single_segment mode
+	if cb != nil {
+		context.params.SetSingleSegment(true)
+	}
+
+	// We don't do parallel processing at the moment
+	processors := 0
+	if processors > 1 {
+		if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
+			if cb != nil {
+				num_segments := context.model.ctx.Whisper_full_n_segments()
+				s0 := num_segments - new
+				for i := s0; i < num_segments; i++ {
+					cb(toSegment(context.model.ctx, i))
+				}
+			}
+		}); err != nil {
+			return err
+		}
+	} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
+		if cb != nil {
+			num_segments := context.model.ctx.Whisper_full_n_segments()
+			s0 := num_segments - new
+			for i := s0; i < num_segments; i++ {
+				cb(toSegment(context.model.ctx, i))
+			}
+		}
+	}); err != nil {
+		return err
+	}
+
+	// Return success
+	return nil
+}
+
+// Return the next segment of tokens
+func (context *context) NextSegment() (Segment, error) {
+	if context.model.ctx == nil {
+		return Segment{}, ErrInternalAppError
+	}
+	if context.n >= context.model.ctx.Whisper_full_n_segments() {
+		return Segment{}, io.EOF
+	}
+
+	// Populate result
+	result := toSegment(context.model.ctx, context.n)
+
+	// Increment the cursor
+	context.n++
+
+	// Return success
+	return result, nil
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PRIVATE METHODS
+
+func toSegment(ctx *whisper.Context, n int) Segment {
+	return Segment{
+		Num:    n,
+		Text:   strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)),
+		Start:  time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10,
+		End:    time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10,
+		Tokens: toTokens(ctx, n),
+	}
+}
+
+func toTokens(ctx *whisper.Context, n int) []Token {
+	result := make([]Token, ctx.Whisper_full_n_tokens(n))
+	for i := 0; i < len(result); i++ {
+		result[i] = Token{
+			Id:   int(ctx.Whisper_full_get_token_id(n, i)),
+			Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
+			P:    ctx.Whisper_full_get_token_p(n, i),
+		}
+	}
+	return result
+}
--- a/bindings/go/pkg/whisper/context_test.go
+++ b/bindings/go/pkg/whisper/context_test.go
@ -0,0 +1,55 @@
+package whisper_test
+
+import (
+	"os"
+	"testing"
+
+	// Packages
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	assert "github.com/stretchr/testify/assert"
+)
+
+const (
+	ModelPath  = "../../models/ggml-tiny.bin"
+	SamplePath = "../../samples/jfk.wav"
+)
+
+func Test_Whisper_000(t *testing.T) {
+	assert := assert.New(t)
+	if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
+		t.Skip("Skipping test, model not found:", ModelPath)
+	}
+	if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
+		t.Skip("Skipping test, sample not found:", SamplePath)
+	}
+
+	// Load model
+	model, err := whisper.New(ModelPath)
+	assert.NoError(err)
+	assert.NotNil(model)
+	assert.NoError(model.Close())
+
+	t.Log("languages=", model.Languages())
+}
+
+func Test_Whisper_001(t *testing.T) {
+	assert := assert.New(t)
+	if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
+		t.Skip("Skipping test, model not found:", ModelPath)
+	}
+	if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
+		t.Skip("Skipping test, sample not found:", SamplePath)
+	}
+
+	// Load model
+	model, err := whisper.New(ModelPath)
+	assert.NoError(err)
+	assert.NotNil(model)
+	defer model.Close()
+
+	// Get context for decoding
+	ctx, err := model.NewContext()
+	assert.NoError(err)
+	assert.NotNil(ctx)
+
+}
--- a/bindings/go/pkg/whisper/doc.go
+++ b/bindings/go/pkg/whisper/doc.go
@ -0,0 +1,4 @@
+/*
+This is the higher-level speech-to-text whisper.cpp API for go
+*/
+package whisper
--- a/bindings/go/pkg/whisper/interface.go
+++ b/bindings/go/pkg/whisper/interface.go
@ -0,0 +1,63 @@
+package whisper
+
+import (
+	"io"
+	"time"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+// SegmentCallback is the callback function for processing segments in real
+// time. It is called during the Process function
+type SegmentCallback func(Segment)
+
+// Model is the interface to a whisper model. Create a new model with the
+// function whisper.New(string)
+type Model interface {
+	io.Closer
+
+	// Return a new speech-to-text context.
+	NewContext() (Context, error)
+
+	// Return all languages supported.
+	Languages() []string
+}
+
+// Context is the speach recognition context.
+type Context interface {
+	SetLanguage(string) error // Set the language to use for speech recognition.
+	Language() string         // Get language
+	SetSpeedup(bool)          // Set speedup flag
+
+	// Process mono audio data and return any errors.
+	// If defined, newly generated segments are passed to the
+	// callback function during processing.
+	Process([]float32, SegmentCallback) error
+
+	// After process is called, return segments until the end of the stream
+	// is reached, when io.EOF is returned.
+	NextSegment() (Segment, error)
+}
+
+// Segment is the text result of a speech recognition.
+type Segment struct {
+	// Segment Number
+	Num int
+
+	// Time beginning and end timestamps for the segment.
+	Start, End time.Duration
+
+	// The text of the segment.
+	Text string
+
+	// The tokens of the segment.
+	Tokens []Token
+}
+
+// Token is a text or special token
+type Token struct {
+	Id   int
+	Text string
+	P    float32
+}
--- a/bindings/go/pkg/whisper/model.go
+++ b/bindings/go/pkg/whisper/model.go
@ -0,0 +1,95 @@
+package whisper
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+
+	// Bindings
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+type model struct {
+	path string
+	ctx  *whisper.Context
+}
+
+// Make sure model adheres to the interface
+var _ Model = (*model)(nil)
+
+///////////////////////////////////////////////////////////////////////////////
+// LIFECYCLE
+
+func New(path string) (*model, error) {
+	model := new(model)
+	if _, err := os.Stat(path); err != nil {
+		return nil, err
+	} else if ctx := whisper.Whisper_init(path); ctx == nil {
+		return nil, ErrUnableToLoadModel
+	} else {
+		model.ctx = ctx
+		model.path = path
+	}
+
+	// Return success
+	return model, nil
+}
+
+func (model *model) Close() error {
+	if model.ctx != nil {
+		model.ctx.Whisper_free()
+	}
+
+	// Release resources
+	model.ctx = nil
+
+	// Return success
+	return nil
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// STRINGIFY
+
+func (model *model) String() string {
+	str := "<whisper.model"
+	if model.ctx != nil {
+		str += fmt.Sprintf(" model=%q", model.path)
+	}
+	return str + ">"
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC METHODS
+
+// Return all recognized languages. Initially it is set to auto-detect
+func (model *model) Languages() []string {
+	result := make([]string, 0, whisper.Whisper_lang_max_id())
+	for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
+		str := whisper.Whisper_lang_str(i)
+		if model.ctx.Whisper_lang_id(str) >= 0 {
+			result = append(result, str)
+		}
+	}
+	return result
+}
+
+func (model *model) NewContext() (Context, error) {
+	if model.ctx == nil {
+		return nil, ErrInternalAppError
+	}
+
+	// Create new context
+	params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
+	params.SetTranslate(false)
+	params.SetPrintSpecial(false)
+	params.SetPrintProgress(false)
+	params.SetPrintRealtime(false)
+	params.SetPrintTimestamps(false)
+	params.SetThreads(runtime.NumCPU())
+
+	// Return new context
+	return NewContext(model, params)
+}
--- a/bindings/go/samples/jfk.wav
+++ b/bindings/go/samples/jfk.wav
--- a/bindings/go/whisper.go
+++ b/bindings/go/whisper.go
@ -0,0 +1,419 @@
+package whisper
+
+import (
+	"errors"
+	"unsafe"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// CGO
+
+/*
+#cgo LDFLAGS: -lwhisper -lm -lstdc++
+#cgo darwin LDFLAGS: -framework Accelerate
+#include <whisper.h>
+#include <stdlib.h>
+
+extern void callNewSegment(void* user_data, int new);
+extern bool callEncoderBegin(void* user_data);
+
+// Text segment callback
+// Called on every newly generated text segment
+// Use the whisper_full_...() functions to obtain the text segments
+static void whisper_new_segment_cb(struct whisper_context* ctx, int n_new, void* user_data) {
+    if(user_data != NULL && ctx != NULL) {
+        callNewSegment(user_data, n_new);
+    }
+}
+
+// Encoder begin callback
+// If not NULL, called before the encoder starts
+// If it returns false, the computation is aborted
+static bool whisper_encoder_begin_cb(struct whisper_context* ctx, void* user_data) {
+    if(user_data != NULL && ctx != NULL) {
+        return callEncoderBegin(user_data);
+    }
+    return false;
+}
+
+// Get default parameters and set callbacks
+static struct whisper_full_params whisper_full_default_params_cb(struct whisper_context* ctx, enum whisper_sampling_strategy strategy) {
+	struct whisper_full_params params = whisper_full_default_params(strategy);
+	params.new_segment_callback = whisper_new_segment_cb;
+	params.new_segment_callback_user_data = (void*)(ctx);
+	params.encoder_begin_callback = whisper_encoder_begin_cb;
+	params.encoder_begin_callback_user_data = (void*)(ctx);
+	return params;
+}
+*/
+import "C"
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+type (
+	Context          C.struct_whisper_context
+	Token            C.whisper_token
+	TokenData        C.struct_whisper_token_data
+	SamplingStrategy C.enum_whisper_sampling_strategy
+	Params           C.struct_whisper_full_params
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// GLOBALS
+
+const (
+	SAMPLING_GREEDY      SamplingStrategy = C.WHISPER_SAMPLING_GREEDY
+	SAMPLING_BEAM_SEARCH SamplingStrategy = C.WHISPER_SAMPLING_BEAM_SEARCH
+)
+
+const (
+	SampleRate = C.WHISPER_SAMPLE_RATE                 // Expected sample rate, samples per second
+	SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
+	NumFFT     = C.WHISPER_N_FFT
+	NumMEL     = C.WHISPER_N_MEL
+	HopLength  = C.WHISPER_HOP_LENGTH
+	ChunkSize  = C.WHISPER_CHUNK_SIZE
+)
+
+var (
+	ErrTokenizerFailed  = errors.New("whisper_tokenize failed")
+	ErrAutoDetectFailed = errors.New("whisper_lang_auto_detect failed")
+	ErrConversionFailed = errors.New("whisper_convert failed")
+	ErrInvalidLanguage  = errors.New("invalid language")
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC METHODS
+
+// Allocates all memory needed for the model and loads the model from the given file.
+// Returns NULL on failure.
+func Whisper_init(path string) *Context {
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+	if ctx := C.whisper_init(cPath); ctx != nil {
+		return (*Context)(ctx)
+	} else {
+		return nil
+	}
+}
+
+// Frees all memory allocated by the model.
+func (ctx *Context) Whisper_free() {
+	C.whisper_free((*C.struct_whisper_context)(ctx))
+}
+
+// Convert RAW PCM audio to log mel spectrogram.
+// The resulting spectrogram is stored inside the provided whisper context.
+func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error {
+	if C.whisper_pcm_to_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(threads)) == 0 {
+		return nil
+	} else {
+		return ErrConversionFailed
+	}
+}
+
+// This can be used to set a custom log mel spectrogram inside the provided whisper context.
+// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
+// n_mel must be 80
+func (ctx *Context) Whisper_set_mel(data []float32, n_mel int) error {
+	if C.whisper_set_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(n_mel)) == 0 {
+		return nil
+	} else {
+		return ErrConversionFailed
+	}
+}
+
+// Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
+// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
+// offset can be used to specify the offset of the first frame in the spectrogram.
+func (ctx *Context) Whisper_encode(offset, threads int) error {
+	if C.whisper_encode((*C.struct_whisper_context)(ctx), C.int(offset), C.int(threads)) == 0 {
+		return nil
+	} else {
+		return ErrConversionFailed
+	}
+}
+
+// Run the Whisper decoder to obtain the logits and probabilities for the next token.
+// Make sure to call whisper_encode() first.
+// tokens + n_tokens is the provided context for the decoder.
+// n_past is the number of tokens to use from previous decoder calls.
+func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
+	if C.whisper_decode((*C.struct_whisper_context)(ctx), (*C.whisper_token)(&tokens[0]), C.int(len(tokens)), C.int(past), C.int(threads)) == 0 {
+		return nil
+	} else {
+		return ErrConversionFailed
+	}
+}
+
+// whisper_sample_best() returns the token with the highest probability
+func (ctx *Context) Whisper_sample_best() TokenData {
+	return TokenData(C.whisper_sample_best((*C.struct_whisper_context)(ctx)))
+}
+
+// whisper_sample_timestamp() returns the most probable timestamp token
+func (ctx *Context) Whisper_sample_timestamp(is_initial bool) TokenData {
+	return TokenData(C.whisper_sample_timestamp((*C.struct_whisper_context)(ctx), C.bool(is_initial)))
+}
+
+// Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
+// Returns the number of tokens on success
+func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
+	cText := C.CString(text)
+	defer C.free(unsafe.Pointer(cText))
+	if n := C.whisper_tokenize((*C.struct_whisper_context)(ctx), cText, (*C.whisper_token)(&tokens[0]), C.int(len(tokens))); n >= 0 {
+		return int(n), nil
+	} else {
+		return 0, ErrTokenizerFailed
+	}
+}
+
+// Return the id of the specified language, returns -1 if not found
+// Examples:
+//
+//	"de" -> 2
+//	"german" -> 2
+func (ctx *Context) Whisper_lang_id(lang string) int {
+	return int(C.whisper_lang_id(C.CString(lang)))
+}
+
+// Largest language id (i.e. number of available languages - 1)
+func Whisper_lang_max_id() int {
+	return int(C.whisper_lang_max_id())
+}
+
+// Return the short string of the specified language id (e.g. 2 -> "de"),
+// returns empty string if not found
+func Whisper_lang_str(id int) string {
+	return C.GoString(C.whisper_lang_str(C.int(id)))
+}
+
+// Use mel data at offset_ms to try and auto-detect the spoken language
+// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
+// Returns the probabilities of all languages.
+// ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
+func (ctx *Context) Whisper_lang_auto_detect(offset_ms, n_threads int) ([]float32, error) {
+	probs := make([]float32, Whisper_lang_max_id()+1)
+	if n := int(C.whisper_lang_auto_detect((*C.struct_whisper_context)(ctx), C.int(offset_ms), C.int(n_threads), (*C.float)(&probs[0]))); n < 0 {
+		return nil, ErrAutoDetectFailed
+	} else {
+		return probs, nil
+	}
+}
+
+func (ctx *Context) Whisper_n_len() int {
+	return int(C.whisper_n_len((*C.struct_whisper_context)(ctx)))
+}
+
+func (ctx *Context) Whisper_n_vocab() int {
+	return int(C.whisper_n_vocab((*C.struct_whisper_context)(ctx)))
+}
+
+func (ctx *Context) Whisper_n_text_ctx() int {
+	return int(C.whisper_n_text_ctx((*C.struct_whisper_context)(ctx)))
+}
+
+func (ctx *Context) Whisper_n_audio_ctx() int {
+	return int(C.whisper_n_audio_ctx((*C.struct_whisper_context)(ctx)))
+}
+
+func (ctx *Context) Whisper_is_multilingual() int {
+	return int(C.whisper_is_multilingual((*C.struct_whisper_context)(ctx)))
+}
+
+// The probabilities for the next token
+//func (ctx *Whisper_context) Whisper_get_probs() []float32 {
+//	return (*[1 << 30]float32)(unsafe.Pointer(C.whisper_get_probs((*C.struct_whisper_context)(ctx))))[:ctx.Whisper_n_vocab()]
+//}
+
+// Token Id -> String. Uses the vocabulary in the provided context
+func (ctx *Context) Whisper_token_to_str(token Token) string {
+	return C.GoString(C.whisper_token_to_str((*C.struct_whisper_context)(ctx), C.whisper_token(token)))
+}
+
+// Special tokens
+func (ctx *Context) Whisper_token_eot() Token {
+	return Token(C.whisper_token_eot((*C.struct_whisper_context)(ctx)))
+}
+
+// Special tokens
+func (ctx *Context) Whisper_token_sot() Token {
+	return Token(C.whisper_token_sot((*C.struct_whisper_context)(ctx)))
+}
+
+// Special tokens
+func (ctx *Context) Whisper_token_prev() Token {
+	return Token(C.whisper_token_prev((*C.struct_whisper_context)(ctx)))
+}
+
+// Special tokens
+func (ctx *Context) Whisper_token_solm() Token {
+	return Token(C.whisper_token_solm((*C.struct_whisper_context)(ctx)))
+}
+
+// Special tokens
+func (ctx *Context) Whisper_token_not() Token {
+	return Token(C.whisper_token_not((*C.struct_whisper_context)(ctx)))
+}
+
+// Special tokens
+func (ctx *Context) Whisper_token_beg() Token {
+	return Token(C.whisper_token_beg((*C.struct_whisper_context)(ctx)))
+}
+
+// Special tokens
+func (ctx *Context) Whisper_token_lang(lang_id int) Token {
+	return Token(C.whisper_token_lang((*C.struct_whisper_context)(ctx), C.int(lang_id)))
+}
+
+// Task tokens
+func Whisper_token_translate() Token {
+	return Token(C.whisper_token_translate())
+}
+
+// Task tokens
+func Whisper_token_transcribe() Token {
+	return Token(C.whisper_token_transcribe())
+}
+
+// Performance information
+func (ctx *Context) Whisper_print_timings() {
+	C.whisper_print_timings((*C.struct_whisper_context)(ctx))
+}
+
+// Performance information
+func (ctx *Context) Whisper_reset_timings() {
+	C.whisper_reset_timings((*C.struct_whisper_context)(ctx))
+}
+
+// Print system information
+func Whisper_print_system_info() string {
+	return C.GoString(C.whisper_print_system_info())
+}
+
+// Return default parameters for a strategy
+func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Params {
+	// Get default parameters
+	return Params(C.whisper_full_default_params_cb((*C.struct_whisper_context)(ctx), C.enum_whisper_sampling_strategy(strategy)))
+}
+
+// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
+// Uses the specified decoding strategy to obtain the text.
+func (ctx *Context) Whisper_full(params Params, samples []float32, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
+	registerEncoderBeginCallback(ctx, encoderBeginCallback)
+	registerNewSegmentCallback(ctx, newSegmentCallback)
+	defer registerEncoderBeginCallback(ctx, nil)
+	defer registerNewSegmentCallback(ctx, nil)
+	if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
+		return nil
+	} else {
+		return ErrConversionFailed
+	}
+}
+
+// Split the input audio in chunks and process each chunk separately using whisper_full()
+// It seems this approach can offer some speedup in some cases.
+// However, the transcription accuracy can be worse at the beginning and end of each chunk.
+func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, processors int, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
+	registerEncoderBeginCallback(ctx, encoderBeginCallback)
+	registerNewSegmentCallback(ctx, newSegmentCallback)
+	defer registerEncoderBeginCallback(ctx, nil)
+	defer registerNewSegmentCallback(ctx, nil)
+
+	if C.whisper_full_parallel((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples)), C.int(processors)) == 0 {
+		return nil
+	} else {
+		return ErrConversionFailed
+	}
+}
+
+// Number of generated text segments.
+// A segment can be a few words, a sentence, or even a paragraph.
+func (ctx *Context) Whisper_full_n_segments() int {
+	return int(C.whisper_full_n_segments((*C.struct_whisper_context)(ctx)))
+}
+
+// Get the start and end time of the specified segment.
+func (ctx *Context) Whisper_full_get_segment_t0(segment int) int64 {
+	return int64(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(segment)))
+}
+
+// Get the start and end time of the specified segment.
+func (ctx *Context) Whisper_full_get_segment_t1(segment int) int64 {
+	return int64(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(segment)))
+}
+
+// Get the text of the specified segment.
+func (ctx *Context) Whisper_full_get_segment_text(segment int) string {
+	return C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(segment)))
+}
+
+// Get number of tokens in the specified segment.
+func (ctx *Context) Whisper_full_n_tokens(segment int) int {
+	return int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(segment)))
+}
+
+// Get the token text of the specified token index in the specified segment.
+func (ctx *Context) Whisper_full_get_token_text(segment int, token int) string {
+	return C.GoString(C.whisper_full_get_token_text((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
+}
+
+// Get the token of the specified token index in the specified segment.
+func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
+	return Token(C.whisper_full_get_token_id((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
+}
+
+// Get token data for the specified token in the specified segment.
+// This contains probabilities, timestamps, etc.
+func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
+	return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
+}
+
+// Get the probability of the specified token in the specified segment.
+func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
+	return float32(C.whisper_full_get_token_p((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// CALLBACKS
+
+var (
+	cbNewSegment   = make(map[unsafe.Pointer]func(int))
+	cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
+)
+
+func registerNewSegmentCallback(ctx *Context, fn func(int)) {
+	if fn == nil {
+		delete(cbNewSegment, unsafe.Pointer(ctx))
+	} else {
+		cbNewSegment[unsafe.Pointer(ctx)] = fn
+	}
+}
+
+func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
+	if fn == nil {
+		delete(cbEncoderBegin, unsafe.Pointer(ctx))
+	} else {
+		cbEncoderBegin[unsafe.Pointer(ctx)] = fn
+	}
+}
+
+//export callNewSegment
+func callNewSegment(user_data unsafe.Pointer, new C.int) {
+	if fn, ok := cbNewSegment[user_data]; ok {
+		fn(int(new))
+	}
+}
+
+//export callEncoderBegin
+func callEncoderBegin(user_data unsafe.Pointer) C.bool {
+	if fn, ok := cbEncoderBegin[user_data]; ok {
+		if fn() {
+			return C.bool(true)
+		} else {
+			return C.bool(false)
+		}
+	}
+	return true
+}
--- a/bindings/go/whisper_test.go
+++ b/bindings/go/whisper_test.go
@ -0,0 +1,113 @@
+package whisper_test
+
+import (
+	"os"
+	"runtime"
+	"testing"
+	"time"
+
+	// Packages
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go"
+	wav "github.com/go-audio/wav"
+	assert "github.com/stretchr/testify/assert"
+)
+
+const (
+	ModelPath  = "models/ggml-small.en.bin"
+	SamplePath = "samples/jfk.wav"
+)
+
+func Test_Whisper_000(t *testing.T) {
+	assert := assert.New(t)
+	if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
+		t.Skip("Skipping test, model not found:", ModelPath)
+	}
+	ctx := whisper.Whisper_init(ModelPath)
+	assert.NotNil(ctx)
+	ctx.Whisper_free()
+}
+
+func Test_Whisper_001(t *testing.T) {
+	assert := assert.New(t)
+	if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
+		t.Skip("Skipping test, model not found:", ModelPath)
+	}
+	if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
+		t.Skip("Skipping test, sample not found:", SamplePath)
+	}
+
+	// Open samples
+	fh, err := os.Open(SamplePath)
+	assert.NoError(err)
+	defer fh.Close()
+
+	// Read samples
+	d := wav.NewDecoder(fh)
+	buf, err := d.FullPCMBuffer()
+	assert.NoError(err)
+
+	// Run whisper
+	ctx := whisper.Whisper_init(ModelPath)
+	assert.NotNil(ctx)
+	defer ctx.Whisper_free()
+	params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
+	data := buf.AsFloat32Buffer().Data
+	err = ctx.Whisper_full(params, data, nil, nil)
+	assert.NoError(err)
+
+	// Print out tokens
+	num_segments := ctx.Whisper_full_n_segments()
+	assert.GreaterOrEqual(num_segments, 1)
+	for i := 0; i < num_segments; i++ {
+		str := ctx.Whisper_full_get_segment_text(i)
+		assert.NotEmpty(str)
+		t0 := time.Duration(ctx.Whisper_full_get_segment_t0(i)) * time.Millisecond
+		t1 := time.Duration(ctx.Whisper_full_get_segment_t1(i)) * time.Millisecond
+		t.Logf("[%6s->%-6s] %q", t0, t1, str)
+	}
+}
+
+func Test_Whisper_002(t *testing.T) {
+	assert := assert.New(t)
+	for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
+		str := whisper.Whisper_lang_str(i)
+		assert.NotEmpty(str)
+		t.Log(str)
+	}
+}
+
+func Test_Whisper_003(t *testing.T) {
+	threads := runtime.NumCPU()
+	assert := assert.New(t)
+	if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
+		t.Skip("Skipping test, model not found:", ModelPath)
+	}
+	if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
+		t.Skip("Skipping test, sample not found:", SamplePath)
+	}
+
+	// Open samples
+	fh, err := os.Open(SamplePath)
+	assert.NoError(err)
+	defer fh.Close()
+
+	// Read samples
+	d := wav.NewDecoder(fh)
+	buf, err := d.FullPCMBuffer()
+	assert.NoError(err)
+
+	// Make the model
+	ctx := whisper.Whisper_init(ModelPath)
+	assert.NotNil(ctx)
+	defer ctx.Whisper_free()
+
+	// Get MEL
+	assert.NoError(ctx.Whisper_pcm_to_mel(buf.AsFloat32Buffer().Data, threads))
+
+	// Get Languages
+	languages, err := ctx.Whisper_lang_auto_detect(0, threads)
+	assert.NoError(err)
+	for i, p := range languages {
+		t.Logf("%s: %f", whisper.Whisper_lang_str(i), p)
+	}
+}
--- a/bindings/ios
+++ b/bindings/ios
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/cmake/DefaultTargetOptions.cmake
+++ b/cmake/DefaultTargetOptions.cmake
@ -0,0 +1,17 @@
+# Set the default compile features and properties for a target.
+
+if (NOT TARGET)
+    message(FATAL_ERROR "TARGET not set before including DefaultTargetOptions")
+endif()
+
+target_compile_features(${TARGET}
+    PRIVATE
+        cxx_std_11
+    )
+
+set_target_properties(${TARGET}
+    PROPERTIES
+        EXPORT_COMPILE_COMMANDS ON
+        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
+        INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib"
+)
--- a/examples/bench.wasm/CMakeLists.txt
+++ b/examples/bench.wasm/CMakeLists.txt
@ -8,6 +8,8 @@ add_executable(${TARGET}
    emscripten.cpp
    )

+include(DefaultTargetOptions)
+
 target_link_libraries(${TARGET} PRIVATE
    whisper
    )
--- a/examples/bench/CMakeLists.txt
+++ b/examples/bench/CMakeLists.txt
@ -1,3 +1,6 @@
 set(TARGET bench)
 add_executable(${TARGET} bench.cpp)
+
+include(DefaultTargetOptions)
+
 target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@ -33,7 +33,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
    return true;
 }

-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options]\n", argv[0]);
    fprintf(stderr, "\n");
--- a/examples/command.wasm/CMakeLists.txt
+++ b/examples/command.wasm/CMakeLists.txt
@ -8,6 +8,8 @@ add_executable(${TARGET}
    emscripten.cpp
    )

+include(DefaultTargetOptions)
+
 target_link_libraries(${TARGET} PRIVATE
    whisper
    )
--- a/examples/command/CMakeLists.txt
+++ b/examples/command/CMakeLists.txt
@ -2,6 +2,9 @@ if (WHISPER_SUPPORT_SDL2)
    # command
    set(TARGET command)
    add_executable(${TARGET} command.cpp)
+
+    include(DefaultTargetOptions)
+
    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
    target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 endif ()
--- a/examples/command/README.md
+++ b/examples/command/README.md
@ -9,7 +9,19 @@ More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/

 # On Raspberry Pi, use tiny or base models + "-ac 768" for better performance
 ./command -m ./models/ggml-tiny.en.bin -ac 768 -t 3 -c 0
+```

+https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
+
+Web version: [examples/command.wasm](/examples/command.wasm)
+
+## Guided mode
+
+"Guided mode" allows you to specify a list of commands (i.e. strings) and the transcription will be guided to classify your command into one from the list. This can be useful in situations where a device is listening only for a small subset of commands.
+
+Initial tests show that this approach might be extremely efficient in terms of performance, since it integrates very well with the "partial Encoder" idea from #137.
+
+```bash
 # Run in guided mode, the list of allowed commands is in commands.txt
 ./command -m ./models/ggml-base.en.bin -cmd ./examples/command/commands.txt

@ -17,9 +29,8 @@ More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/
 ./command -m ./models/ggml-tiny.en.bin -cmd ./examples/command/commands.txt -ac 128 -t 3 -c 0
 ```

-https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
+https://user-images.githubusercontent.com/1991296/207435352-8fc4ed3f-bde5-4555-9b8b-aeeb76bee969.mp4

-Web version: [examples/command.wasm](/examples/command.wasm)

 ## Building

--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@ -41,8 +41,8 @@ struct whisper_params {

    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
-    std::string fname_out = "";
-    std::string commands  = "";
+    std::string fname_out;
+    std::string commands;
 };

 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -81,7 +81,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
    return true;
 }

-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options]\n", argv[0]);
    fprintf(stderr, "\n");
@ -387,7 +387,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
    float energy_all  = 0.0f;
    float energy_last = 0.0f;

-    for (size_t i = 0; i < n_samples; i++) {
+    for (int i = 0; i < n_samples; i++) {
        energy_all += fabsf(pcmf32[i]);

        if (i >= n_samples - n_samples_last) {
@ -510,6 +510,333 @@ std::vector<std::string> read_allowed_commands(const std::string & fname) {
    return allowed_commands;
 }

+// command-list mode
+// guide the transcription to match the most likely command from a provided list
+int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
+   fprintf(stderr, "\n");
+   fprintf(stderr, "%s: guided mode\n", __func__);
+
+   std::vector<std::string> allowed_commands = read_allowed_commands(params.commands);
+
+   if (allowed_commands.empty()) {
+      fprintf(stderr, "%s: error: failed to read allowed commands from '%s'\n", __func__, params.commands.c_str());
+      return 2;
+   }
+
+   int max_len = 0;
+
+   std::vector<std::vector<whisper_token>> allowed_tokens;
+
+   for (const auto & cmd : allowed_commands) {
+      whisper_token tokens[1024];
+      allowed_tokens.emplace_back();
+
+      for (int l = 0; l < (int) cmd.size(); ++l) {
+         // NOTE: very important to add the whitespace !
+         //       the reason is that the first decoded token starts with a whitespace too!
+         std::string ss = std::string(" ") + cmd.substr(0, l + 1);
+
+         const int n = whisper_tokenize(ctx, ss.c_str(), tokens, 1024);
+         if (n < 0) {
+            fprintf(stderr, "%s: error: failed to tokenize command '%s'\n", __func__, cmd.c_str());
+            return 3;
+         }
+
+         if (n == 1) {
+            allowed_tokens.back().push_back(tokens[0]);
+         }
+      }
+
+      max_len = std::max(max_len, (int) cmd.size());
+   }
+
+   fprintf(stderr, "%s: allowed commands [ tokens ]:\n", __func__);
+   fprintf(stderr, "\n");
+   for (int i = 0; i < (int) allowed_commands.size(); ++i) {
+      fprintf(stderr, "  - \033[1m%-*s\033[0m = [", max_len, allowed_commands[i].c_str());
+      for (const auto & token : allowed_tokens[i]) {
+         fprintf(stderr, " %5d", token);
+      }
+      fprintf(stderr, " ]\n");
+   }
+
+   std::string  k_prompt = "select one from the available words: ";
+   for (int i = 0; i < (int) allowed_commands.size(); ++i) {
+      if (i > 0) {
+         k_prompt += ", ";
+      }
+      k_prompt += allowed_commands[i];
+   }
+   k_prompt += ". selected word: ";
+
+   // tokenize prompt
+   std::vector<whisper_token> k_tokens;
+   {
+      k_tokens.resize(1024);
+      const int n = whisper_tokenize(ctx, k_prompt.c_str(), k_tokens.data(), 1024);
+      if (n < 0) {
+         fprintf(stderr, "%s: error: failed to tokenize prompt '%s'\n", __func__, k_prompt.c_str());
+         return 4;
+      }
+      k_tokens.resize(n);
+   }
+
+   fprintf(stderr, "\n");
+   fprintf(stderr, "%s: prompt: '%s'\n", __func__, k_prompt.c_str());
+   fprintf(stderr, "%s: tokens: [", __func__);
+   for (const auto & token : k_tokens) {
+      fprintf(stderr, " %d", token);
+   }
+   fprintf(stderr, " ]\n");
+
+   fprintf(stderr, "\n");
+   fprintf(stderr, "%s: listening for a command ...\n", __func__);
+   fprintf(stderr, "\n");
+
+   bool is_running  = true;
+
+   std::vector<float> pcmf32_cur;
+   std::vector<float> pcmf32_prompt;
+
+   // main loop
+   while (is_running) {
+      // handle Ctrl + C
+      {
+         SDL_Event event;
+         while (SDL_PollEvent(&event)) {
+            switch (event.type) {
+               case SDL_QUIT:
+               {
+                  is_running = false;
+               } break;
+               default:
+                  break;
+            }
+         }
+
+         if (!is_running) {
+            return 0;
+         }
+      }
+
+      // delay
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+      audio.get(2000, pcmf32_cur);
+
+      if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
+         fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
+
+         const auto t_start = std::chrono::high_resolution_clock::now();
+
+         whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+
+         wparams.print_progress   = false;
+         wparams.print_special    = params.print_special;
+         wparams.print_realtime   = false;
+         wparams.print_timestamps = !params.no_timestamps;
+         wparams.translate        = params.translate;
+         wparams.no_context       = true;
+         wparams.single_segment   = true;
+         wparams.max_tokens       = 1;
+         wparams.language         = params.language.c_str();
+         wparams.n_threads        = params.n_threads;
+
+         wparams.audio_ctx        = params.audio_ctx;
+         wparams.speed_up         = params.speed_up;
+
+         wparams.prompt_tokens    = k_tokens.data();
+         wparams.prompt_n_tokens  = k_tokens.size();
+
+         // run the transformer and a single decoding pass
+         if (whisper_full(ctx, wparams, pcmf32_cur.data(), pcmf32_cur.size()) != 0) {
+            fprintf(stderr, "%s: ERROR: whisper_full() failed\n", __func__);
+            break;
+         }
+
+         const auto * probs = whisper_get_probs(ctx);
+         std::vector<std::pair<float, int>> probs_id;
+
+         double psum = 0.0;
+         for (int i = 0; i < (int) allowed_commands.size(); ++i) {
+            probs_id.emplace_back(probs[allowed_tokens[i][0]], i);
+            for (int j = 1; j < (int) allowed_tokens[i].size(); ++j) {
+               probs_id.back().first += probs[allowed_tokens[i][j]];
+            }
+            probs_id.back().first /= allowed_tokens[i].size();
+            psum += probs_id.back().first;
+         }
+
+         // normalize
+         for (auto & p : probs_id) {
+            p.first /= psum;
+         }
+
+         // sort descending
+         {
+            using pair_type = decltype(probs_id)::value_type;
+            std::sort(probs_id.begin(), probs_id.end(), [](const pair_type & a, const pair_type & b) {
+               return a.first > b.first;
+            });
+         }
+
+         // print the commands and the respective probabilities
+         {
+            fprintf(stdout, "\n");
+            for (const auto & cmd : probs_id) {
+               fprintf(stdout, "%s: %s%-*s%s = %f | ", __func__, "\033[1m", max_len, allowed_commands[cmd.second].c_str(), "\033[0m", cmd.first);
+               for (int token : allowed_tokens[cmd.second]) {
+                  fprintf(stdout, "'%4s' %f ", whisper_token_to_str(ctx, token), probs[token]);
+               }
+               fprintf(stdout, "\n");
+            }
+         }
+
+         // best command
+         {
+            const auto t_end = std::chrono::high_resolution_clock::now();
+
+            const float prob = probs_id[0].first;
+            const int index = probs_id[0].second;
+
+            fprintf(stdout, "\n");
+            fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
+                    "\033[1m", allowed_commands[index].c_str(), "\033[0m", prob,
+                  (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
+            fprintf(stdout, "\n");
+         }
+
+         audio.clear();
+      }
+   }
+
+   return 0;
+}
+
+// general-purpose mode
+// freely transcribe the voice into text
+int process_general_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
+   bool is_running  = true;
+   bool have_prompt = false;
+   bool ask_prompt  = true;
+
+   float prob0 = 0.0f;
+   float prob  = 0.0f;
+
+   std::vector<float> pcmf32_cur;
+   std::vector<float> pcmf32_prompt;
+
+   const std::string k_prompt = "Ok Whisper, start listening for commands.";
+
+   fprintf(stderr, "\n");
+   fprintf(stderr, "%s: general-purpose mode\n", __func__);
+
+   // main loop
+   while (is_running) {
+      // handle Ctrl + C
+      {
+         SDL_Event event;
+         while (SDL_PollEvent(&event)) {
+            switch (event.type) {
+               case SDL_QUIT:
+               {
+                  is_running = false;
+               } break;
+               default:
+                  break;
+            }
+         }
+
+         if (!is_running) {
+            return 0;
+         }
+      }
+
+      // delay
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+      if (ask_prompt) {
+         fprintf(stdout, "\n");
+         fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
+         fprintf(stdout, "\n");
+
+         ask_prompt = false;
+      }
+
+      {
+         audio.get(2000, pcmf32_cur);
+
+         if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
+            fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
+
+            int64_t t_ms = 0;
+
+            if (!have_prompt) {
+               // wait for activation phrase
+               audio.get(params.prompt_ms, pcmf32_cur);
+
+               const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob0, t_ms));
+
+               fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
+
+               const float sim = similarity(txt, k_prompt);
+
+               if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
+                  fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
+                  ask_prompt = true;
+               } else {
+                  fprintf(stdout, "\n");
+                  fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
+                  fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
+                  fprintf(stdout, "\n");
+
+                  // save the audio for the prompt
+                  pcmf32_prompt = pcmf32_cur;
+                  have_prompt = true;
+               }
+            } else {
+               // we have heard the activation phrase, now detect the commands
+               audio.get(params.command_ms, pcmf32_cur);
+
+               // prepend the prompt audio
+               pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
+
+               const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
+
+               prob = 100.0f*(prob - prob0);
+
+               //fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
+
+               // find the prompt in the text
+               float best_sim = 0.0f;
+               size_t best_len = 0;
+               for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
+                  const auto prompt = txt.substr(0, n);
+
+                  const float sim = similarity(prompt, k_prompt);
+
+                  //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
+
+                  if (sim > best_sim) {
+                     best_sim = sim;
+                     best_len = n;
+                  }
+               }
+
+               const std::string command = ::trim(txt.substr(best_len));
+
+               fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
+               fprintf(stdout, "\n");
+            }
+
+            audio.clear();
+         }
+      }
+   }
+
+   return 0;
+}
+
 int main(int argc, char ** argv) {
    whisper_params params;

@ -561,300 +888,12 @@ int main(int argc, char ** argv) {
    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
    audio.clear();

-    int max_len = 0;
-
-    bool is_running  = true;
-    bool have_prompt = false;
-    bool ask_prompt  = true;
-
-    float prob0 = 0.0f;
-    float prob  = 0.0f;
-
-    std::vector<float> pcmf32_cur;
-    std::vector<float> pcmf32_prompt;
-
-    std::vector<std::string> allowed_commands;
-    std::vector<std::vector<whisper_token>> allowed_tokens;
-
-    std::string k_prompt = "";
-    std::vector<whisper_token> k_tokens;
-
-    if (params.commands != "") {
-        fprintf(stderr, "\n");
-        fprintf(stderr, "%s: guided mode\n", __func__);
-
-        allowed_commands = read_allowed_commands(params.commands);
-
-        if (allowed_commands.empty()) {
-            fprintf(stderr, "%s: error: failed to read allowed commands from '%s'\n", __func__, params.commands.c_str());
-            return 2;
-        }
-
-        for (const auto & cmd : allowed_commands) {
-            whisper_token tokens[1024];
-            allowed_tokens.emplace_back();
-
-            for (int l = 0; l < cmd.size(); ++l) {
-                // NOTE: very important to add the whitespace !
-                //       the reason is that the first decoded token starts with a whitespace too!
-                std::string ss = std::string(" ") + cmd.substr(0, l + 1);
-
-                const int n = whisper_tokenize(ctx, ss.c_str(), tokens, 1024);
-                if (n < 0) {
-                    fprintf(stderr, "%s: error: failed to tokenize command '%s'\n", __func__, cmd.c_str());
-                    return 3;
-                }
-
-                if (n == 1) {
-                    allowed_tokens.back().push_back(tokens[0]);
-                }
-            }
-
-            max_len = std::max(max_len, (int) cmd.size());
-        }
-
-        fprintf(stderr, "%s: allowed commands [ tokens ]:\n", __func__);
-        fprintf(stderr, "\n");
-        for (int i = 0; i < (int) allowed_commands.size(); ++i) {
-            fprintf(stderr, "  - \033[1m%-*s\033[0m = [", max_len, allowed_commands[i].c_str());
-            for (const auto & token : allowed_tokens[i]) {
-                fprintf(stderr, " %5d", token);
-            }
-            fprintf(stderr, " ]\n");
-        }
-
-        k_prompt = "select one from the available words: ";
-        for (int i = 0; i < (int) allowed_commands.size(); ++i) {
-            if (i > 0) {
-                k_prompt += ", ";
-            }
-            k_prompt += allowed_commands[i];
-        }
-        k_prompt += ". selected word: ";
-
-        // tokenize prompt
-        {
-            k_tokens.resize(1024);
-            const int n = whisper_tokenize(ctx, k_prompt.c_str(), k_tokens.data(), 1024);
-            if (n < 0) {
-                fprintf(stderr, "%s: error: failed to tokenize prompt '%s'\n", __func__, k_prompt.c_str());
-                return 4;
-            }
-            k_tokens.resize(n);
-        }
-
-        fprintf(stderr, "\n");
-        fprintf(stderr, "%s: prompt: '%s'\n", __func__, k_prompt.c_str());
-        fprintf(stderr, "%s: tokens: [", __func__);
-        for (const auto & token : k_tokens) {
-            fprintf(stderr, " %d", token);
-        }
-        fprintf(stderr, " ]\n");
-
-        fprintf(stderr, "\n");
-        fprintf(stderr, "%s: listening for a command ...\n", __func__);
-        fprintf(stderr, "\n");
+    int  ret_val = 0;

+    if (!params.commands.empty()) {
+       ret_val = process_command_list(ctx, audio, params);
    } else {
-        fprintf(stderr, "\n");
-        fprintf(stderr, "%s: general-purpose mode\n", __func__);
-
-        k_prompt = "Ok Whisper, start listening for commands.";
-    }
-
-    // main loop
-    while (is_running) {
-        // handle Ctrl + C
-        {
-            SDL_Event event;
-            while (SDL_PollEvent(&event)) {
-                switch (event.type) {
-                    case SDL_QUIT:
-                        {
-                            is_running = false;
-                        } break;
-                    default:
-                        break;
-                }
-            }
-
-            if (!is_running) {
-                break;
-            }
-        }
-
-        // delay
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-        if (allowed_commands.empty()) {
-            // general-purpose mode
-            // freely transcribe the voice into text
-
-            if (ask_prompt) {
-                fprintf(stdout, "\n");
-                fprintf(stdout, "%s: Say the following phrase: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
-                fprintf(stdout, "\n");
-
-                ask_prompt = false;
-            }
-
-            {
-                int64_t t_ms = 0;
-
-                audio.get(2000, pcmf32_cur);
-
-                if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
-                    fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
-
-                    if (!have_prompt) {
-                        // wait for activation phrase
-                        audio.get(params.prompt_ms, pcmf32_cur);
-
-                        const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob0, t_ms));
-
-                        fprintf(stdout, "%s: Heard '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", txt.c_str(), "\033[0m", (int) t_ms);
-
-                        const float sim = similarity(txt, k_prompt);
-
-                        if (txt.length() < 0.8*k_prompt.length() || txt.length() > 1.2*k_prompt.length() || sim < 0.8f) {
-                            fprintf(stdout, "%s: WARNING: prompt not recognized, try again\n", __func__);
-                            ask_prompt = true;
-                        } else {
-                            fprintf(stdout, "\n");
-                            fprintf(stdout, "%s: The prompt has been recognized!\n", __func__);
-                            fprintf(stdout, "%s: Waiting for voice commands ...\n", __func__);
-                            fprintf(stdout, "\n");
-
-                            // save the audio for the prompt
-                            pcmf32_prompt = pcmf32_cur;
-                            have_prompt = true;
-                        }
-                    } else {
-                        // we have heard the activation phrase, now detect the commands
-                        audio.get(params.command_ms, pcmf32_cur);
-
-                        // prepend the prompt audio
-                        pcmf32_cur.insert(pcmf32_cur.begin(), pcmf32_prompt.begin(), pcmf32_prompt.end());
-
-                        const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
-
-                        prob = 100.0f*(prob - prob0);
-
-                        //fprintf(stdout, "%s: heard '%s'\n", __func__, txt.c_str());
-
-                        // find the prompt in the text
-                        float best_sim = 0.0f;
-                        size_t best_len = 0;
-                        for (int n = 0.8*k_prompt.size(); n <= 1.2*k_prompt.size(); ++n) {
-                            const auto prompt = txt.substr(0, n);
-
-                            const float sim = similarity(prompt, k_prompt);
-
-                            //fprintf(stderr, "%s: prompt = '%s', sim = %f\n", __func__, prompt.c_str(), sim);
-
-                            if (sim > best_sim) {
-                                best_sim = sim;
-                                best_len = n;
-                            }
-                        }
-
-                        const std::string command = ::trim(txt.substr(best_len));
-
-                        fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
-                        fprintf(stdout, "\n");
-                    }
-
-                    audio.clear();
-                }
-            }
-        } else {
-            // command-list mode
-            // guide the transcription to match the most likely command from a provided list
-
-            audio.get(2000, pcmf32_cur);
-
-            if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
-                fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
-
-                const auto t_start = std::chrono::high_resolution_clock::now();
-
-                whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
-
-                wparams.print_progress   = false;
-                wparams.print_special    = params.print_special;
-                wparams.print_realtime   = false;
-                wparams.print_timestamps = !params.no_timestamps;
-                wparams.translate        = params.translate;
-                wparams.no_context       = true;
-                wparams.single_segment   = true;
-                wparams.max_tokens       = 1;
-                wparams.language         = params.language.c_str();
-                wparams.n_threads        = params.n_threads;
-
-                wparams.audio_ctx        = params.audio_ctx;
-                wparams.speed_up         = params.speed_up;
-
-                wparams.prompt_tokens    = k_tokens.data();
-                wparams.prompt_n_tokens  = k_tokens.size();
-
-                // run the transformer and a single decoding pass
-                if (whisper_full(ctx, wparams, pcmf32_cur.data(), pcmf32_cur.size()) != 0) {
-                    fprintf(stderr, "%s: ERROR: whisper_full() failed\n", __func__);
-                    break;
-                }
-
-                const auto * probs = whisper_get_probs(ctx);
-                std::vector<std::pair<float, int>> probs_id;
-
-                double psum = 0.0;
-                for (int i = 0; i < (int) allowed_commands.size(); ++i) {
-                    probs_id.push_back(std::make_pair(probs[allowed_tokens[i][0]], i));
-                    for (int j = 1; j < (int) allowed_tokens[i].size(); ++j) {
-                        probs_id.back().first += probs[allowed_tokens[i][j]];
-                    }
-                    probs_id.back().first /= allowed_tokens[i].size();
-                    psum += probs_id.back().first;
-                }
-
-                // normalize
-                for (auto & p : probs_id) {
-                    p.first /= psum;
-                }
-
-                // sort descending
-                {
-                    using pair_type = decltype(probs_id)::value_type;
-                    std::sort(probs_id.begin(), probs_id.end(), [](const pair_type & a, const pair_type & b) {
-                        return a.first > b.first;
-                    });
-                }
-
-                // print the commands and the respective probabilities
-                {
-                    fprintf(stdout, "\n");
-                    for (const auto & cmd : probs_id) {
-                        fprintf(stdout, "%s: %s%-*s%s = %f | ", __func__, "\033[1m", max_len, allowed_commands[cmd.second].c_str(), "\033[0m", cmd.first);
-                        for (int i = 0; i < (int) allowed_tokens[cmd.second].size(); ++i) {
-                            fprintf(stdout, "'%4s' %f ", whisper_token_to_str(ctx, allowed_tokens[cmd.second][i]), probs[allowed_tokens[cmd.second][i]]);
-                        }
-                        fprintf(stdout, "\n");
-                    }
-                }
-
-                // best command
-                {
-                    fprintf(stdout, "\n");
-                    fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
-                            "\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first,
-                            (int) std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t_start).count());
-                    fprintf(stdout, "\n");
-                }
-
-                const auto t_end = std::chrono::high_resolution_clock::now();
-
-                audio.clear();
-            }
-        }
+       ret_val = process_general_transcription(ctx, audio, params);
    }

    audio.pause();
@ -862,5 +901,5 @@ int main(int argc, char ** argv) {
    whisper_print_timings(ctx);
    whisper_free(ctx);

-    return 0;
+    return ret_val;
 }
--- a/examples/main/CMakeLists.txt
+++ b/examples/main/CMakeLists.txt
@ -1,3 +1,6 @@
 set(TARGET main)
 add_executable(${TARGET} main.cpp)
+
+include(DefaultTargetOptions)
+
 target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -69,13 +69,14 @@ struct whisper_params {
    bool output_vtt     = false;
    bool output_srt     = false;
    bool output_wts     = false;
+    bool output_csv     = false;
    bool print_special  = false;
    bool print_colors   = false;
    bool print_progress = false;
    bool no_timestamps  = false;

    std::string language = "en";
-    std::string prompt   = "";
+    std::string prompt;
    std::string model    = "models/ggml-base.en.bin";

    std::vector<std::string> fname_inp = {};
@ -111,6 +112,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-ovtt" || arg == "--output-vtt")     { params.output_vtt     = true; }
        else if (arg == "-osrt" || arg == "--output-srt")     { params.output_srt     = true; }
        else if (arg == "-owts" || arg == "--output-words")   { params.output_wts     = true; }
+        else if (arg == "-ocsv" || arg == "--output-csv")     { params.output_csv     = true; }
        else if (arg == "-ps"   || arg == "--print-special")  { params.print_special  = true; }
        else if (arg == "-pc"   || arg == "--print-colors")   { params.print_colors   = true; }
        else if (arg == "-pp"   || arg == "--print-progress") { params.print_progress = true; }
@ -118,7 +120,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-l"    || arg == "--language")       { params.language       = argv[++i]; }
        else if (                  arg == "--prompt")         { params.prompt         = argv[++i]; }
        else if (arg == "-m"    || arg == "--model")          { params.model          = argv[++i]; }
-        else if (arg == "-f"    || arg == "--file")           { params.fname_inp.push_back(argv[++i]); }
+        else if (arg == "-f"    || arg == "--file")           { params.fname_inp.emplace_back(argv[++i]); }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
@ -129,7 +131,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
    return true;
 }

-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
    fprintf(stderr, "\n");
@ -150,6 +152,7 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
    fprintf(stderr, "  -ovtt,    --output-vtt     [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
    fprintf(stderr, "  -osrt,    --output-srt     [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
    fprintf(stderr, "  -owts,    --output-words   [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
+    fprintf(stderr, "  -ocsv,    --output-csv     [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
    fprintf(stderr, "  -ps,      --print-special  [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
    fprintf(stderr, "  -pc,      --print-colors   [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
    fprintf(stderr, "  -pp,      --print-progress [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
@ -173,90 +176,81 @@ void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, voi

    const int n_segments = whisper_full_n_segments(ctx);

+    std::string speaker = "";
+
+    int64_t t0;
+    int64_t t1;
+
    // print the last n_new segments
    const int s0 = n_segments - n_new;
+
    if (s0 == 0) {
        printf("\n");
    }

    for (int i = s0; i < n_segments; i++) {
-        if (params.no_timestamps) {
-            if (params.print_colors) {
-                for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
-                    if (params.print_special == false) {
-                        const whisper_token id = whisper_full_get_token_id(ctx, i, j);
-                        if (id >= whisper_token_eot(ctx)) {
-                            continue;
-                        }
-                    }
-
-                    const char * text = whisper_full_get_token_text(ctx, i, j);
-                    const float  p    = whisper_full_get_token_p   (ctx, i, j);
-
-                    const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
-
-                    printf("%s%s%s", k_colors[col].c_str(), text, "\033[0m");
-                }
-            } else {
-                const char * text = whisper_full_get_segment_text(ctx, i);
-                printf("%s", text);
-            }
-            fflush(stdout);
-        } else {
-            const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-            const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-            std::string speaker = "";
-
-            if (params.diarize && pcmf32s.size() == 2) {
-                const int64_t n_samples = pcmf32s[0].size();
-
-                const int64_t is0 = timestamp_to_sample(t0, n_samples);
-                const int64_t is1 = timestamp_to_sample(t1, n_samples);
-
-                double energy0 = 0.0f;
-                double energy1 = 0.0f;
-
-                for (int64_t j = is0; j < is1; j++) {
-                    energy0 += fabs(pcmf32s[0][j]);
-                    energy1 += fabs(pcmf32s[1][j]);
-                }
-
-                if (energy0 > 1.1*energy1) {
-                    speaker = "(speaker 0)";
-                } else if (energy1 > 1.1*energy0) {
-                    speaker = "(speaker 1)";
-                } else {
-                    speaker = "(speaker ?)";
-                }
-
-                //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, %s\n", is0, is1, energy0, energy1, speaker.c_str());
-            }
-
-            if (params.print_colors) {
-                printf("[%s --> %s]  ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
-                for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
-                    if (params.print_special == false) {
-                        const whisper_token id = whisper_full_get_token_id(ctx, i, j);
-                        if (id >= whisper_token_eot(ctx)) {
-                            continue;
-                        }
-                    }
-
-                    const char * text = whisper_full_get_token_text(ctx, i, j);
-                    const float  p    = whisper_full_get_token_p   (ctx, i, j);
-
-                    const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
-
-                    printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
-                }
-                printf("\n");
-            } else {
-                const char * text = whisper_full_get_segment_text(ctx, i);
-
-                printf("[%s --> %s]  %s%s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), speaker.c_str(), text);
-            }
+        if (!params.no_timestamps || params.diarize) {
+            t0 = whisper_full_get_segment_t0(ctx, i);
+            t1 = whisper_full_get_segment_t1(ctx, i);
        }
+
+        if (!params.no_timestamps) {
+            printf("[%s --> %s]  ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
+        }
+
+        if (params.diarize && pcmf32s.size() == 2) {
+            const int64_t n_samples = pcmf32s[0].size();
+
+            const int64_t is0 = timestamp_to_sample(t0, n_samples);
+            const int64_t is1 = timestamp_to_sample(t1, n_samples);
+
+            double energy0 = 0.0f;
+            double energy1 = 0.0f;
+
+            for (int64_t j = is0; j < is1; j++) {
+                energy0 += fabs(pcmf32s[0][j]);
+                energy1 += fabs(pcmf32s[1][j]);
+            }
+
+            if (energy0 > 1.1*energy1) {
+                speaker = "(speaker 0)";
+            } else if (energy1 > 1.1*energy0) {
+                speaker = "(speaker 1)";
+            } else {
+                speaker = "(speaker ?)";
+            }
+
+            //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, %s\n", is0, is1, energy0, energy1, speaker.c_str());
+        }
+
+        if (params.print_colors) {
+            for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
+                if (params.print_special == false) {
+                    const whisper_token id = whisper_full_get_token_id(ctx, i, j);
+                    if (id >= whisper_token_eot(ctx)) {
+                        continue;
+                    }
+                }
+
+                const char * text = whisper_full_get_token_text(ctx, i, j);
+                const float  p    = whisper_full_get_token_p   (ctx, i, j);
+
+                const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
+
+                printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
+            }
+        } else {
+            const char * text = whisper_full_get_segment_text(ctx, i);
+
+            printf("%s%s", speaker.c_str(), text);
+        }
+
+        // with timestamps or speakers: each segment on new line
+        if (!params.no_timestamps || params.diarize) {
+            printf("\n");
+        }
+
+        fflush(stdout);
    }
 }

@ -325,10 +319,36 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
    return true;
 }

+bool output_csv(struct whisper_context * ctx, const char * fname) {
+    std::ofstream fout(fname);
+    if (!fout.is_open()) {
+        fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
+        return false;
+    }
+
+    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
+
+    const int n_segments = whisper_full_n_segments(ctx);
+    for (int i = 0; i < n_segments; ++i) {
+        const char * text = whisper_full_get_segment_text(ctx, i);
+	if (text[0] == ' ')
+	  text = text + sizeof(char); //whisper_full_get_segment_text() returns a string with leading space, point to the next character.
+        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
+        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+	//need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
+        fout << 10 * t0 << ", " 
+	     << 10 * t1 << ", \"" 
+	     << text    << "\"\n";
+    }
+
+    return true;
+}
+
+
 // karaoke video generation
 // outputs a bash script that uses ffmpeg to generate a video with the subtitles
 // TODO: font parameter adjustments
-bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec) {
+bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & /*params*/, float t_sec) {
    std::ofstream fout(fname);

    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
@ -377,7 +397,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
            txt_ul = "\\ \\ ";

            {
-                int ncnt = 0;
                for (int k = 0; k < n; ++k) {
                    const auto & token2 = tokens[k];

@ -401,8 +420,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
                            txt_ul += "\\ ";
                        }
                    }
-
-                    ncnt += txt.size();
                }

                ::replace_all(txt_bg, "'", "\u2019");
@ -471,7 +488,7 @@ int main(int argc, char ** argv) {
    // initial prompt
    std::vector<whisper_token> prompt_tokens;

-    if (params.prompt.size() > 0) {
+    if (!params.prompt.empty()) {
        prompt_tokens.resize(1024);
        prompt_tokens.resize(whisper_tokenize(ctx, params.prompt.c_str(), prompt_tokens.data(), prompt_tokens.size()));

@ -508,14 +525,14 @@ int main(int argc, char ** argv) {
                    }
                }

-                if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), NULL) == false) {
+                if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
                    fprintf(stderr, "error: failed to open WAV file from stdin\n");
                    return 4;
                }

                fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
            }
-            else if (drwav_init_file(&wav, fname_inp.c_str(), NULL) == false) {
+            else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
                fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
                return 5;
            }
@ -531,7 +548,7 @@ int main(int argc, char ** argv) {
            }

            if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
-                fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
+                fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", argv[0], fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
                return 8;
            }

@ -550,11 +567,11 @@ int main(int argc, char ** argv) {
            // convert to mono, float
            pcmf32.resize(n);
            if (wav.channels == 1) {
-                for (int i = 0; i < n; i++) {
+                for (uint64_t i = 0; i < n; i++) {
                    pcmf32[i] = float(pcm16[i])/32768.0f;
                }
            } else {
-                for (int i = 0; i < n; i++) {
+                for (uint64_t i = 0; i < n; i++) {
                    pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
                }
            }
@ -565,7 +582,7 @@ int main(int argc, char ** argv) {

                pcmf32s[0].resize(n);
                pcmf32s[1].resize(n);
-                for (int i = 0; i < n; i++) {
+                for (uint64_t i = 0; i < n; i++) {
                    pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
                    pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
                }
@ -620,8 +637,8 @@ int main(int argc, char ** argv) {

            wparams.speed_up         = params.speed_up;

-            wparams.prompt_tokens    = prompt_tokens.size() == 0 ? nullptr : prompt_tokens.data();
-            wparams.prompt_n_tokens  = prompt_tokens.size() == 0 ? 0       : prompt_tokens.size();
+            wparams.prompt_tokens    = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
+            wparams.prompt_n_tokens  = prompt_tokens.empty() ? 0       : prompt_tokens.size();

            whisper_print_user_data user_data = { &params, &pcmf32s };

@ -637,7 +654,7 @@ int main(int argc, char ** argv) {
            {
                static bool is_aborted = false; // NOTE: this should be atomic to avoid data race

-                wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) {
+                wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
                    bool is_aborted = *(bool*)user_data;
                    return !is_aborted;
                };
@ -677,6 +694,13 @@ int main(int argc, char ** argv) {
                const auto fname_wts = fname_inp + ".wts";
                output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
            }
+
+	    // output to CSV file
+            if (params.output_csv) {
+                const auto fname_csv = fname_inp + ".csv";
+                output_csv(ctx, fname_csv.c_str());
+            }
+
        }
    }

--- a/examples/stream.wasm/CMakeLists.txt
+++ b/examples/stream.wasm/CMakeLists.txt
@ -8,6 +8,8 @@ add_executable(${TARGET}
    emscripten.cpp
    )

+include(DefaultTargetOptions)
+
 target_link_libraries(${TARGET} PRIVATE
    whisper
    )
--- a/examples/stream/CMakeLists.txt
+++ b/examples/stream/CMakeLists.txt
@ -2,6 +2,9 @@ if (WHISPER_SUPPORT_SDL2)
    # stream
    set(TARGET stream)
    add_executable(${TARGET} stream.cpp)
+
+    include(DefaultTargetOptions)
+
    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
    target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 endif ()
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@ -8,6 +8,7 @@
 #include <SDL.h>
 #include <SDL_audio.h>

+#include <atomic>
 #include <cassert>
 #include <cstdio>
 #include <string>
@ -51,7 +52,7 @@ struct whisper_params {

    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
-    std::string fname_out = "";
+    std::string fname_out;
 };

 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -90,7 +91,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
    return true;
 }

-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options]\n", argv[0]);
    fprintf(stderr, "\n");
@ -144,8 +145,8 @@ private:
    int m_len_ms = 0;
    int m_sample_rate = 0;

-    bool       m_running = false;
-    std::mutex m_mutex;
+    std::atomic_bool m_running;
+    std::mutex       m_mutex;

    std::vector<float> m_audio;
    std::vector<float> m_audio_new;
@ -155,6 +156,8 @@ private:

 audio_async::audio_async(int len_ms) {
    m_len_ms = len_ms;
+
+    m_running = false;
 }

 audio_async::~audio_async() {
@ -391,7 +394,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
    float energy_all  = 0.0f;
    float energy_last = 0.0f;

-    for (size_t i = 0; i < n_samples; i++) {
+    for (int i = 0; i < n_samples; i++) {
        energy_all += fabsf(pcmf32[i]);

        if (i >= n_samples - n_samples_last) {
@ -427,10 +430,10 @@ int main(int argc, char ** argv) {
    const int n_samples_keep = (params.keep_ms  *1e-3)*WHISPER_SAMPLE_RATE;
    const int n_samples_30s  = (30000           *1e-3)*WHISPER_SAMPLE_RATE;

-    const int n_new_line = params.length_ms / params.step_ms - 1; // number of steps to print new line
-
    const bool use_vad = n_samples_step <= 0; // sliding window mode uses VAD

+    const int n_new_line = !use_vad ? params.length_ms / params.step_ms - 1 : 1; // number of steps to print new line
+
    params.no_timestamps = !use_vad;
    params.no_context    = use_vad;
    params.max_tokens    = 0;
--- a/examples/talk.wasm/CMakeLists.txt
+++ b/examples/talk.wasm/CMakeLists.txt
@ -9,6 +9,8 @@ add_executable(${TARGET}
    gpt-2.cpp
    )

+include(DefaultTargetOptions)
+
 target_link_libraries(${TARGET} PRIVATE
    whisper
    )
@ -31,8 +33,8 @@ set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
    --bind \
    -s USE_PTHREADS=1 \
    -s PTHREAD_POOL_SIZE=8 \
-    -s INITIAL_MEMORY=1600MB \
-    -s TOTAL_MEMORY=1600MB \
+    -s INITIAL_MEMORY=1800MB \
+    -s TOTAL_MEMORY=1800MB \
    -s FORCE_FILESYSTEM=1 \
    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
    ${EXTRA_FLAGS} \
--- a/examples/talk.wasm/README.md
+++ b/examples/talk.wasm/README.md
@ -36,7 +36,7 @@ In order to run this demo efficiently, you need to have the following:
 - Latest Chrome or Firefox browser (Safari is not supported)
 - Run this on a desktop or laptop with modern CPU (a mobile phone will likely not be good enough)
 - Speak phrases that are no longer than 10 seconds - this is the audio context of the AI
- The web-page uses about 1.6GB of RAM
+- The web-page uses about 1.8GB of RAM

 Notice that this demo is using the smallest GPT-2 model, so the generated text responses are not always very good.
 Also, the prompting strategy can likely be improved to achieve better results.
--- a/examples/talk/CMakeLists.txt
+++ b/examples/talk/CMakeLists.txt
@ -8,6 +8,9 @@ if (WHISPER_SUPPORT_SDL2)
    # TODO: this is temporary
    #       need to export ggml symbols for MSVC, but too lazy ..
    add_executable(${TARGET} talk.cpp gpt-2.cpp ../../ggml.c ../../whisper.cpp)
+
+    include(DefaultTargetOptions)
+
    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)
    target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 endif ()
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@ -40,7 +40,7 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
    // find the longest tokens that form the words:
    std::vector<gpt_vocab::id> tokens;
    for (const auto & word : words) {
-        if (word.size() == 0) continue;
+        if (word.empty()) continue;

        int i = 0;
        int n = word.size();
@ -78,7 +78,7 @@ gpt_vocab::id gpt_sample_top_k_top_p(
        const float * logits,
        int    top_k,
        double top_p,
-        double temp,
+        double /*temp*/,
        std::mt19937 & rng) {
    int n_logits = vocab.id_to_token.size();

@ -86,7 +86,7 @@ gpt_vocab::id gpt_sample_top_k_top_p(
    logits_id.reserve(n_logits);

    for (int i = 0; i < n_logits; i++) {
-        logits_id.push_back(std::make_pair(logits[i], i));
+        logits_id.emplace_back(logits[i], i);
    }

    // find the top K tokens
@ -268,7 +268,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
            fin.read((char *) &len, sizeof(len));

            word.resize(len);
-            fin.read((char *) word.data(), len);
+            fin.read((char *) &word[0], len);

            vocab.token_to_id[word] = i;
            vocab.id_to_token[i] = word;
@ -327,7 +327,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
    {
        struct ggml_init_params params;
        params.mem_size   = ctx_size;
-        params.mem_buffer = NULL;
+        params.mem_buffer = nullptr;

        model.ctx = ggml_init(params);
        if (!model.ctx) {
@ -448,7 +448,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
            std::string name(length, 0);
            fin.read(&name[0], length);

-            if (model.tensors.find(name.data()) == model.tensors.end()) {
+            if (model.tensors.find(name) == model.tensors.end()) {
                fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
                return false;
            }
@ -833,7 +833,7 @@ Me too.
 struct gpt2_context * gpt2_init(const char * path_model) {
    gpt2_context * ctx = new gpt2_context;

-    ctx->rng = std::mt19937(time(NULL));
+    ctx->rng = std::mt19937(time(nullptr));

    // load the model
    {
@ -841,6 +841,7 @@ struct gpt2_context * gpt2_init(const char * path_model) {

        if (!gpt2_model_load(path_model, ctx->model, ctx->vocab)) {
            fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, path_model);
+            delete ctx;
            return nullptr;
        }

@ -884,9 +885,9 @@ std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens)

    std::string result;

-    for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
+    for (int i = embd.size(); i < (int) embd_inp.size() + n_predict; i++) {
        // predict
-        if (embd.size() > 0) {
+        if (!embd.empty()) {
            if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
                printf("gpt-2: failed to generate text\n");
                return "";
--- a/examples/talk/talk.cpp
+++ b/examples/talk/talk.cpp
@ -39,7 +39,7 @@ struct whisper_params {
    std::string model_wsp = "models/ggml-base.en.bin";
    std::string model_gpt = "models/ggml-gpt-2-117M.bin";
    std::string speak     = "./examples/talk/speak.sh";
-    std::string fname_out = "";
+    std::string fname_out;
 };

 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -79,7 +79,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
    return true;
 }

-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
    fprintf(stderr, "\n");
    fprintf(stderr, "usage: %s [options]\n", argv[0]);
    fprintf(stderr, "\n");
@ -397,7 +397,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
    float energy_all  = 0.0f;
    float energy_last = 0.0f;

-    for (size_t i = 0; i < n_samples; i++) {
+    for (int i = 0; i < n_samples; i++) {
        energy_all += fabsf(pcmf32[i]);

        if (i >= n_samples - n_samples_last) {
@ -541,7 +541,6 @@ int main(int argc, char ** argv) {
    bool force_speak = false;

    float prob0 = 0.0f;
-    float prob  = 0.0f;

    std::vector<float> pcmf32_cur;
    std::vector<float> pcmf32_prompt;
@ -589,7 +588,7 @@ int main(int argc, char ** argv) {

                audio.get(params.voice_ms, pcmf32_cur);

-                std::string text_heard = "";
+                std::string text_heard;

                if (!force_speak) {
                    text_heard = ::trim(::transcribe(ctx_wsp, params, pcmf32_cur, prob0, t_ms));
@ -611,7 +610,7 @@ int main(int argc, char ** argv) {
                text_heard = std::regex_replace(text_heard, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");

                // take first line
-                text_heard = text_heard.substr(0, text_heard.find_first_of("\n"));
+                text_heard = text_heard.substr(0, text_heard.find_first_of('\n'));

                // remove leading and trailing whitespace
                text_heard = std::regex_replace(text_heard, std::regex("^\\s+"), "");
@ -641,18 +640,18 @@ int main(int argc, char ** argv) {

                    text_to_speak = gpt2_gen_text(ctx_gpt, prompt.c_str(), params.max_tokens);
                    text_to_speak = std::regex_replace(text_to_speak, std::regex("[^a-zA-Z0-9\\.,\\?!\\s\\:\\'\\-]"), "");
-                    text_to_speak = text_to_speak.substr(0, text_to_speak.find_first_of("\n"));
+                    text_to_speak = text_to_speak.substr(0, text_to_speak.find_first_of('\n'));

                    // remove first 2 lines of base prompt
                    if (n_iter > 4) {
                        {
-                            const size_t pos = prompt_base.find_first_of("\n");
+                            const size_t pos = prompt_base.find_first_of('\n');
                            if (pos != std::string::npos) {
                                prompt_base = prompt_base.substr(pos + 1);
                            }
                        }
                        {
-                            const size_t pos = prompt_base.find_first_of("\n");
+                            const size_t pos = prompt_base.find_first_of('\n');
                            if (pos != std::string::npos) {
                                prompt_base = prompt_base.substr(pos + 1);
                            }
--- a/examples/whisper.android/.idea/gradle.xml
+++ b/examples/whisper.android/.idea/gradle.xml
@ -1,5 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="GradleMigrationSettings" migrationVersion="1" />
  <component name="GradleSettings">
    <option name="linkedExternalProjectsSettings">
      <GradleProjectSettings>
--- a/examples/whisper.android/app/build.gradle
+++ b/examples/whisper.android/app/build.gradle
@ -14,10 +14,6 @@ android {
        versionCode 1
        versionName "1.0"

-        ndk {
-            abiFilters 'arm64-v8a', 'x86_64'
-        }
-
        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary true
@ -44,7 +40,7 @@ android {
    composeOptions {
        kotlinCompilerExtensionVersion '1.3.1'
    }
-    ndkVersion "25.0.8528842"
+    ndkVersion "25.1.8937393"
    externalNativeBuild {
        ndkBuild {
            path 'src/main/jni/whisper/Android.mk'
--- a/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
+++ b/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt
@ -1,8 +1,13 @@
 package com.whispercppdemo.whisper

+import android.os.Build
+import android.util.Log
 import kotlinx.coroutines.*
+import java.io.File
 import java.util.concurrent.Executors

+private const val LOG_TAG = "LibWhisper"
+
 class WhisperContext private constructor(private var ptr: Long) {
    // Meet Whisper C++ constraint: Don't access from more than one thread at a time.
    private val scope: CoroutineScope = CoroutineScope(
@ -47,7 +52,27 @@ class WhisperContext private constructor(private var ptr: Long) {
 private class WhisperLib {
    companion object {
        init {
-            System.loadLibrary("whisper")
+            Log.d(LOG_TAG, "Primary ABI: ${Build.SUPPORTED_ABIS[0]}")
+            var loadVfpv4 = false
+            if (isArmEabiV7a()) {
+                // armeabi-v7a needs runtime detection support
+                val cpuInfo = cpuInfo()
+                cpuInfo?.let {
+                    Log.d(LOG_TAG, "CPU info: $cpuInfo")
+                    if (cpuInfo.contains("vfpv4")) {
+                        Log.d(LOG_TAG, "CPU supports vfpv4")
+                        loadVfpv4 = true
+                    }
+                }
+            }
+
+            if (loadVfpv4) {
+                Log.d(LOG_TAG, "Loading libwhisper_vfpv4.so")
+                System.loadLibrary("whisper_vfpv4")
+            } else {
+                Log.d(LOG_TAG, "Loading libwhisper.so")
+                System.loadLibrary("whisper")
+            }
        }

        // JNI methods
@ -59,3 +84,17 @@ private class WhisperLib {
    }
 }

+private fun isArmEabiV7a(): Boolean {
+    return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a")
+}
+
+private fun cpuInfo(): String? {
+    return try {
+        File("/proc/cpuinfo").inputStream().bufferedReader().use {
+            it.readText()
+        }
+    } catch (e: Exception) {
+        Log.w(LOG_TAG, "Couldn't read /proc/cpuinfo", e)
+        null
+    }
+}
--- a/examples/whisper.android/app/src/main/jni/whisper/Android.mk
+++ b/examples/whisper.android/app/src/main/jni/whisper/Android.mk
@ -1,22 +1,15 @@
 LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
-WHISPER_LIB_DIR := $(LOCAL_PATH)/../../../../../../../
-LOCAL_LDLIBS    := -llog
 LOCAL_MODULE    := libwhisper
+include $(LOCAL_PATH)/Whisper.mk
+include $(BUILD_SHARED_LIBRARY)

-# Make the final output library smaller by only keeping the symbols referenced from the app.
-ifneq ($(APP_OPTIM),debug)
-    LOCAL_CFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
-    LOCAL_CFLAGS += -ffunction-sections -fdata-sections
-    LOCAL_LDFLAGS += -Wl,--gc-sections
-    LOCAL_LDFLAGS += -Wl,--exclude-libs,ALL
-    LOCAL_LDFLAGS += -flto
-endif
-
-LOCAL_CFLAGS    += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR)
-LOCAL_CPPFLAGS  += -std=c++11
-LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \
-                   $(WHISPER_LIB_DIR)/whisper.cpp \
-                   $(LOCAL_PATH)/jni.c
-
-include $(BUILD_SHARED_LIBRARY)
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+	include $(CLEAR_VARS)
+	LOCAL_MODULE    := libwhisper_vfpv4
+	include $(LOCAL_PATH)/Whisper.mk
+	# Allow building NEON FMA code.
+	# https://android.googlesource.com/platform/ndk/+/master/sources/android/cpufeatures/cpu-features.h
+	LOCAL_CFLAGS += -mfpu=neon-vfpv4
+	include $(BUILD_SHARED_LIBRARY)
+endif
--- a/examples/whisper.android/app/src/main/jni/whisper/Whisper.mk
+++ b/examples/whisper.android/app/src/main/jni/whisper/Whisper.mk
@ -0,0 +1,18 @@
+WHISPER_LIB_DIR := $(LOCAL_PATH)/../../../../../../../
+LOCAL_LDLIBS    := -llog
+
+# Make the final output library smaller by only keeping the symbols referenced from the app.
+ifneq ($(APP_OPTIM),debug)
+    LOCAL_CFLAGS += -O3
+    LOCAL_CFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
+    LOCAL_CFLAGS += -ffunction-sections -fdata-sections
+    LOCAL_LDFLAGS += -Wl,--gc-sections
+    LOCAL_LDFLAGS += -Wl,--exclude-libs,ALL
+    LOCAL_LDFLAGS += -flto
+endif
+
+LOCAL_CFLAGS    += -DSTDC_HEADERS -std=c11 -I $(WHISPER_LIB_DIR)
+LOCAL_CPPFLAGS  += -std=c++11
+LOCAL_SRC_FILES := $(WHISPER_LIB_DIR)/ggml.c \
+                   $(WHISPER_LIB_DIR)/whisper.cpp \
+                   $(LOCAL_PATH)/jni.c
--- a/examples/whisper.objc/README.md
+++ b/examples/whisper.objc/README.md
@ -19,3 +19,8 @@ open whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/
 Make sure to build the project in `Release`:

 <img width="947" alt="image" src="https://user-images.githubusercontent.com/1991296/197382607-9e1e6d1b-79fa-496f-9d16-b71dc1535701.png">
+
+Also, don't forget to add the `-DGGML_USE_ACCELERATE` compiler flag in Build Phases.
+This can significantly improve the performance of the transcription:
+
+<img width="1072" alt="image" src="https://user-images.githubusercontent.com/1991296/208511239-8d7cdbd1-aa48-41b5-becd-ca288d53cc07.png">
--- a/examples/whisper.swiftui/README.md
+++ b/examples/whisper.swiftui/README.md
@ -0,0 +1,12 @@
+A sample SwiftUI app using [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions.
+See also: [whisper.objc](https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.objc).
+
+To use:
+
+1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1]
+2. Add the model to "whisper.swiftui.demo/Resources/models" via Xcode.
+3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)).
+4. Add the model to "whisper.swiftui.demo/Resources/samples" via Xcode.
+5. Select the "release" build configuration under "Run", then deploy and run to your device.
+
+[^1]: I recommend the tiny, base or small models for running on an iOS device.
--- a/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
+++ b/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift
@ -0,0 +1,70 @@
+import Foundation
+
+enum WhisperError: Error {
+    case couldNotInitializeContext
+}
+
+// Meet Whisper C++ constraint: Don't access from more than one thread at a time.
+actor WhisperContext {
+    private var context: OpaquePointer
+    
+    init(context: OpaquePointer) {
+        self.context = context
+    }
+    
+    deinit {
+        whisper_free(context)
+    }
+    
+    func fullTranscribe(samples: [Float]) {
+        // Leave 2 processors free (i.e. the high-efficiency cores).
+        let maxThreads = max(1, min(8, cpuCount() - 2))
+        print("Selecting \(maxThreads) threads")
+        var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
+        "en".withCString { en in
+            // Adapted from whisper.objc
+            params.print_realtime = true
+            params.print_progress = false
+            params.print_timestamps = true
+            params.print_special = false
+            params.translate = false
+            params.language = en
+            params.n_threads = Int32(maxThreads)
+            params.offset_ms = 0
+            params.no_context = true
+            params.single_segment = false
+            
+            whisper_reset_timings(context)
+            print("About to run whisper_full")
+            samples.withUnsafeBufferPointer { samples in
+                if (whisper_full(context, params, samples.baseAddress, Int32(samples.count)) != 0) {
+                    print("Failed to run the model")
+                } else {
+                    whisper_print_timings(context)
+                }
+            }
+        }
+    }
+    
+    func getTranscription() -> String {
+        var transcription = ""
+        for i in 0..<whisper_full_n_segments(context) {
+            transcription += String.init(cString: whisper_full_get_segment_text(context, i))
+        }
+        return transcription
+    }
+    
+    static func createContext(path: String) throws -> WhisperContext {
+        let context = whisper_init(path)
+        if let context {
+            return WhisperContext(context: context)
+        } else {
+            print("Couldn't load model at \(path)")
+            throw WhisperError.couldNotInitializeContext
+        }
+    }
+}
+
+fileprivate func cpuCount() -> Int {
+    ProcessInfo.processInfo.processorCount
+}
--- a/examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h
+++ b/examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h
@ -0,0 +1,4 @@
+//
+//  Use this file to import your target's public headers that you would like to expose to Swift.
+//
+#import "whisper.h"
--- a/examples/whisper.swiftui/whisper.swiftui.demo/Models/WhisperState.swift
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/Models/WhisperState.swift
@ -0,0 +1,162 @@
+import Foundation
+import SwiftUI
+import AVFoundation
+
+@MainActor
+class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
+    @Published var isModelLoaded = false
+    @Published var messageLog = ""
+    @Published var canTranscribe = false
+    @Published var isRecording = false
+    
+    private var whisperContext: WhisperContext?
+    private let recorder = Recorder()
+    private var recordedFile: URL? = nil
+    private var audioPlayer: AVAudioPlayer?
+    
+    private var modelUrl: URL? {
+        Bundle.main.url(forResource: "ggml-tiny.en", withExtension: "bin", subdirectory: "models")
+    }
+    
+    private var sampleUrl: URL? {
+        Bundle.main.url(forResource: "jfk", withExtension: "wav", subdirectory: "samples")
+    }
+    
+    private enum LoadError: Error {
+        case couldNotLocateModel
+    }
+    
+    override init() {
+        super.init()
+        do {
+            try loadModel()
+            canTranscribe = true
+        } catch {
+            print(error.localizedDescription)
+            messageLog += "\(error.localizedDescription)\n"
+        }
+    }
+    
+    private func loadModel() throws {
+        messageLog += "Loading model...\n"
+        if let modelUrl {
+            whisperContext = try WhisperContext.createContext(path: modelUrl.path())
+            messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
+        } else {
+            messageLog += "Could not locate model\n"
+        }
+    }
+    
+    func transcribeSample() async {
+        if let sampleUrl {
+            await transcribeAudio(sampleUrl)
+        } else {
+            messageLog += "Could not locate sample\n"
+        }
+    }
+    
+    private func transcribeAudio(_ url: URL) async {
+        if (!canTranscribe) {
+            return
+        }
+        guard let whisperContext else {
+            return
+        }
+        
+        do {
+            canTranscribe = false
+            messageLog += "Reading wave samples...\n"
+            let data = try readAudioSamples(url)
+            messageLog += "Transcribing data...\n"
+            await whisperContext.fullTranscribe(samples: data)
+            let text = await whisperContext.getTranscription()
+            messageLog += "Done: \(text)\n"
+        } catch {
+            print(error.localizedDescription)
+            messageLog += "\(error.localizedDescription)\n"
+        }
+        
+        canTranscribe = true
+    }
+    
+    private func readAudioSamples(_ url: URL) throws -> [Float] {
+        stopPlayback()
+        try startPlayback(url)
+        return try decodeWaveFile(url)
+    }
+    
+    func toggleRecord() async {
+        if isRecording {
+            await recorder.stopRecording()
+            isRecording = false
+            if let recordedFile {
+                await transcribeAudio(recordedFile)
+            }
+        } else {
+            requestRecordPermission { granted in
+                if granted {
+                    Task {
+                        do {
+                            self.stopPlayback()
+                            let file = try FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true)
+                                .appending(path: "output.wav")
+                            try await self.recorder.startRecording(toOutputFile: file, delegate: self)
+                            self.isRecording = true
+                            self.recordedFile = file
+                        } catch {
+                            print(error.localizedDescription)
+                            self.messageLog += "\(error.localizedDescription)\n"
+                            self.isRecording = false
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    private func requestRecordPermission(response: @escaping (Bool) -> Void) {
+#if os(macOS)
+        response(true)
+#else
+        AVAudioSession.sharedInstance().requestRecordPermission { granted in
+            response(granted)
+        }
+#endif
+    }
+    
+    private func startPlayback(_ url: URL) throws {
+        audioPlayer = try AVAudioPlayer(contentsOf: url)
+        audioPlayer?.play()
+    }
+    
+    private func stopPlayback() {
+        audioPlayer?.stop()
+        audioPlayer = nil
+    }
+    
+    // MARK: AVAudioRecorderDelegate
+    
+    nonisolated func audioRecorderEncodeErrorDidOccur(_ recorder: AVAudioRecorder, error: Error?) {
+        if let error {
+            Task {
+                await handleRecError(error)
+            }
+        }
+    }
+    
+    private func handleRecError(_ error: Error) {
+        print(error.localizedDescription)
+        messageLog += "\(error.localizedDescription)\n"
+        isRecording = false
+    }
+    
+    nonisolated func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
+        Task {
+            await onDidFinishRecording()
+        }
+    }
+    
+    private func onDidFinishRecording() {
+        isRecording = false
+    }
+}
--- a/files/Assets.xcassets/AccentColor.colorset/Contents.json
+++ b/files/Assets.xcassets/AccentColor.colorset/Contents.json
@ -0,0 +1,11 @@
+{
+  "colors" : [
+    {
+      "idiom" : "universal"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
@ -0,0 +1,63 @@
+{
+  "images" : [
+    {
+      "idiom" : "universal",
+      "platform" : "ios",
+      "size" : "1024x1024"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "512x512"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "512x512"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
@ -0,0 +1,6 @@
+{
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
@ -0,0 +1,6 @@
+{
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/Supporting
@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>com.apple.security.app-sandbox</key>
+	<true/>
+	<key>com.apple.security.device.audio-input</key>
+	<true/>
+	<key>com.apple.security.files.user-selected.read-only</key>
+	<true/>
+</dict>
+</plist>
--- a/examples/whisper.swiftui/whisper.swiftui.demo/UI/ContentView.swift
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/UI/ContentView.swift
@ -0,0 +1,43 @@
+import SwiftUI
+import AVFoundation
+
+struct ContentView: View {
+    @StateObject var whisperState = WhisperState()
+    
+    var body: some View {
+        NavigationStack {
+            VStack {
+                HStack {
+                    Button("Transcribe", action: {
+                        Task {
+                            await whisperState.transcribeSample()
+                        }
+                    })
+                    .buttonStyle(.bordered)
+                    .disabled(!whisperState.canTranscribe)
+                    
+                    Button(whisperState.isRecording ? "Stop recording" : "Start recording", action: {
+                        Task {
+                            await whisperState.toggleRecord()
+                        }
+                    })
+                    .buttonStyle(.bordered)
+                    .disabled(!whisperState.canTranscribe)
+                }
+                
+                ScrollView {
+                    Text(verbatim: whisperState.messageLog)
+                        .frame(maxWidth: .infinity, alignment: .leading)
+                }
+            }
+            .navigationTitle("Whisper SwiftUI Demo")
+            .padding()
+        }
+    }
+}
+
+struct ContentView_Previews: PreviewProvider {
+    static var previews: some View {
+        ContentView()
+    }
+}
--- a/examples/whisper.swiftui/whisper.swiftui.demo/Utils/Recorder.swift
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/Utils/Recorder.swift
@ -0,0 +1,35 @@
+import Foundation
+import AVFoundation
+
+actor Recorder {
+    private var recorder: AVAudioRecorder?
+    
+    enum RecorderError: Error {
+        case couldNotStartRecording
+    }
+    
+    func startRecording(toOutputFile url: URL, delegate: AVAudioRecorderDelegate?) throws {
+        let recordSettings: [String : Any] = [
+            AVFormatIDKey: Int(kAudioFormatLinearPCM),
+            AVSampleRateKey: 16000.0,
+            AVNumberOfChannelsKey: 1,
+            AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
+        ]
+#if !os(macOS)
+        let session = AVAudioSession.sharedInstance()
+        try session.setCategory(.playAndRecord, mode: .default)
+#endif
+        let recorder = try AVAudioRecorder(url: url, settings: recordSettings)
+        recorder.delegate = delegate
+        if recorder.record() == false {
+            print("Could not start recording")
+            throw RecorderError.couldNotStartRecording
+        }
+        self.recorder = recorder
+    }
+    
+    func stopRecording() {
+        recorder?.stop()
+        recorder = nil
+    }
+}
--- a/examples/whisper.swiftui/whisper.swiftui.demo/Utils/RiffWaveUtils.swift
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/Utils/RiffWaveUtils.swift
@ -0,0 +1,12 @@
+import Foundation
+
+func decodeWaveFile(_ url: URL) throws -> [Float] {
+    let data = try Data(contentsOf: url)
+    let floats = stride(from: 44, to: data.count, by: 2).map {
+        return data[$0..<$0 + 2].withUnsafeBytes {
+            let short = Int16(littleEndian: $0.load(as: Int16.self))
+            return max(-1.0, min(Float(short) / 32767.0, 1.0))
+        }
+    }
+    return floats
+}
--- a/examples/whisper.swiftui/whisper.swiftui.demo/WhisperCppDemoApp.swift
+++ b/examples/whisper.swiftui/whisper.swiftui.demo/WhisperCppDemoApp.swift
@ -0,0 +1,10 @@
+import SwiftUI
+
+@main
+struct WhisperCppDemoApp: App {
+    var body: some Scene {
+        WindowGroup {
+            ContentView()
+        }
+    }
+}
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/.gitignore
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/.gitignore
@ -0,0 +1 @@
+xcuserdata/
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj
@ -0,0 +1,467 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 56;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		0A8E49002954B3F100704C1B /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 0A8E48FF2954B3F100704C1B /* README.md */; };
+		0AA751482953AC2E001EE061 /* samples in Resources */ = {isa = PBXBuildFile; fileRef = 0AA751462953AC2E001EE061 /* samples */; };
+		0AA751492953AC2E001EE061 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 0AA751472953AC2E001EE061 /* models */; };
+		0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AA7514B2953B569001EE061 /* RiffWaveUtils.swift */; };
+		0AA7514E2953D958001EE061 /* Recorder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AA7514D2953D958001EE061 /* Recorder.swift */; };
+		0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */; };
+		0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9C29539CCF003032C3 /* ContentView.swift */; };
+		0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5D9E29539CD0003032C3 /* Assets.xcassets */; };
+		0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */; };
+		0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC729539EB0003032C3 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-Wno-shorten-64-to-32"; }; };
+		0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"; }; };
+		0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
+		0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		0A8E48FF2954B3F100704C1B /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
+		0AA751462953AC2E001EE061 /* samples */ = {isa = PBXFileReference; lastKnownFileType = folder; path = samples; sourceTree = "<group>"; };
+		0AA751472953AC2E001EE061 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; path = models; sourceTree = "<group>"; };
+		0AA7514B2953B569001EE061 /* RiffWaveUtils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RiffWaveUtils.swift; sourceTree = "<group>"; };
+		0AA7514D2953D958001EE061 /* Recorder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Recorder.swift; sourceTree = "<group>"; };
+		0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperCppDemoApp.swift; sourceTree = "<group>"; };
+		0AAC5D9C29539CCF003032C3 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
+		0AAC5D9E29539CD0003032C3 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = WhisperCppDemo.entitlements; sourceTree = "<group>"; };
+		0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
+		0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "WhisperCppDemo-Bridging-Header.h"; sourceTree = "<group>"; };
+		0AAC5DC729539EB0003032C3 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../whisper.cpp; sourceTree = "<group>"; };
+		0AAC5DC829539EB0003032C3 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../whisper.h; sourceTree = "<group>"; };
+		0AAC5DC929539EB0003032C3 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml.c; sourceTree = "<group>"; };
+		0AAC5DCA29539EB0003032C3 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml.h; sourceTree = "<group>"; };
+		0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
+		0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		0AAC5D9429539CCF003032C3 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		0AA7513F2953AB32001EE061 /* Models */ = {
+			isa = PBXGroup;
+			children = (
+				0AAC5DCD2953A05C003032C3 /* WhisperState.swift */,
+			);
+			path = Models;
+			sourceTree = "<group>";
+		};
+		0AA751402953ABA6001EE061 /* Resources */ = {
+			isa = PBXGroup;
+			children = (
+				0AA751472953AC2E001EE061 /* models */,
+				0AA751462953AC2E001EE061 /* samples */,
+			);
+			path = Resources;
+			sourceTree = "<group>";
+		};
+		0AA7514A2953B561001EE061 /* Utils */ = {
+			isa = PBXGroup;
+			children = (
+				0AA7514B2953B569001EE061 /* RiffWaveUtils.swift */,
+				0AA7514D2953D958001EE061 /* Recorder.swift */,
+			);
+			path = Utils;
+			sourceTree = "<group>";
+		};
+		0AAC5D8E29539CCF003032C3 = {
+			isa = PBXGroup;
+			children = (
+				0A8E48FF2954B3F100704C1B /* README.md */,
+				0AAC5DC529539E89003032C3 /* whisper.cpp */,
+				0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */,
+				0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */,
+				0AAC5D9829539CCF003032C3 /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		0AAC5D9829539CCF003032C3 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		0AAC5D9929539CCF003032C3 /* whisper.swiftui.demo */ = {
+			isa = PBXGroup;
+			children = (
+				0AA7514A2953B561001EE061 /* Utils */,
+				0AA751402953ABA6001EE061 /* Resources */,
+				0AA7513F2953AB32001EE061 /* Models */,
+				0AAC5DD32953A9ED003032C3 /* Supporting files */,
+				0AAC5DD22953A9E3003032C3 /* UI */,
+				0AAC5D9A29539CCF003032C3 /* WhisperCppDemoApp.swift */,
+			);
+			path = whisper.swiftui.demo;
+			sourceTree = "<group>";
+		};
+		0AAC5DA129539CD0003032C3 /* Preview Content */ = {
+			isa = PBXGroup;
+			children = (
+				0AAC5DA229539CD0003032C3 /* Preview Assets.xcassets */,
+			);
+			name = "Preview Content";
+			path = "../Preview Content";
+			sourceTree = "<group>";
+		};
+		0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
+			isa = PBXGroup;
+			children = (
+				0AAC5DC929539EB0003032C3 /* ggml.c */,
+				0AAC5DCA29539EB0003032C3 /* ggml.h */,
+				0AAC5DC729539EB0003032C3 /* whisper.cpp */,
+				0AAC5DC829539EB0003032C3 /* whisper.h */,
+			);
+			path = whisper.cpp;
+			sourceTree = "<group>";
+		};
+		0AAC5DCF2953A36C003032C3 /* whisper.cpp.swift */ = {
+			isa = PBXGroup;
+			children = (
+				0AAC5DC629539EAF003032C3 /* WhisperCppDemo-Bridging-Header.h */,
+				0AAC5DD02953A394003032C3 /* LibWhisper.swift */,
+			);
+			path = whisper.cpp.swift;
+			sourceTree = "<group>";
+		};
+		0AAC5DD22953A9E3003032C3 /* UI */ = {
+			isa = PBXGroup;
+			children = (
+				0AAC5D9C29539CCF003032C3 /* ContentView.swift */,
+			);
+			path = UI;
+			sourceTree = "<group>";
+		};
+		0AAC5DD32953A9ED003032C3 /* Supporting files */ = {
+			isa = PBXGroup;
+			children = (
+				0AAC5D9E29539CD0003032C3 /* Assets.xcassets */,
+				0AAC5DA029539CD0003032C3 /* WhisperCppDemo.entitlements */,
+				0AAC5DA129539CD0003032C3 /* Preview Content */,
+			);
+			path = "Supporting files";
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		0AAC5D9629539CCF003032C3 /* whisper.swiftui */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 0AAC5DBC29539CD0003032C3 /* Build configuration list for PBXNativeTarget "whisper.swiftui" */;
+			buildPhases = (
+				0AAC5D9329539CCF003032C3 /* Sources */,
+				0AAC5D9429539CCF003032C3 /* Frameworks */,
+				0AAC5D9529539CCF003032C3 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = whisper.swiftui;
+			productName = WhisperCppDemo;
+			productReference = 0AAC5D9729539CCF003032C3 /* whisper.swiftui.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		0AAC5D8F29539CCF003032C3 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				BuildIndependentTargetsInParallel = 1;
+				LastSwiftUpdateCheck = 1410;
+				LastUpgradeCheck = 1410;
+				TargetAttributes = {
+					0AAC5D9629539CCF003032C3 = {
+						CreatedOnToolsVersion = 14.1;
+						LastSwiftMigration = 1410;
+					};
+				};
+			};
+			buildConfigurationList = 0AAC5D9229539CCF003032C3 /* Build configuration list for PBXProject "whisper.swiftui" */;
+			compatibilityVersion = "Xcode 14.0";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 0AAC5D8E29539CCF003032C3;
+			productRefGroup = 0AAC5D9829539CCF003032C3 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				0AAC5D9629539CCF003032C3 /* whisper.swiftui */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		0AAC5D9529539CCF003032C3 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				0AA751482953AC2E001EE061 /* samples in Resources */,
+				0AAC5DA329539CD0003032C3 /* Preview Assets.xcassets in Resources */,
+				0A8E49002954B3F100704C1B /* README.md in Resources */,
+				0AA751492953AC2E001EE061 /* models in Resources */,
+				0AAC5D9F29539CD0003032C3 /* Assets.xcassets in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		0AAC5D9329539CCF003032C3 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				0AAC5D9D29539CCF003032C3 /* ContentView.swift in Sources */,
+				0AAC5D9B29539CCF003032C3 /* WhisperCppDemoApp.swift in Sources */,
+				0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */,
+				0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */,
+				0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */,
+				0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
+				0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
+				0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		0AAC5DBA29539CD0003032C3 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				INFOPLIST_KEY_NSMicrophoneUsageDescription = "Needed to transcribe audio";
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+			};
+			name = Debug;
+		};
+		0AAC5DBB29539CD0003032C3 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				INFOPLIST_KEY_NSMicrophoneUsageDescription = "Needed to transcribe audio";
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+			};
+			name = Release;
+		};
+		0AAC5DBD29539CD0003032C3 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_ENABLE_MODULES = YES;
+				CODE_SIGN_ENTITLEMENTS = "whisper.swiftui.demo/Supporting files/WhisperCppDemo.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
+				DEVELOPMENT_TEAM = 3TZ9BM962G;
+				ENABLE_HARDENED_RUNTIME = YES;
+				ENABLE_PREVIEWS = YES;
+				GENERATE_INFOPLIST_FILE = YES;
+				"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphoneos*]" = YES;
+				"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphonesimulator*]" = YES;
+				"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphoneos*]" = YES;
+				"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphonesimulator*]" = YES;
+				"INFOPLIST_KEY_UILaunchScreen_Generation[sdk=iphoneos*]" = YES;
+				"INFOPLIST_KEY_UILaunchScreen_Generation[sdk=iphonesimulator*]" = YES;
+				"INFOPLIST_KEY_UIStatusBarStyle[sdk=iphoneos*]" = UIStatusBarStyleDefault;
+				"INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault;
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				IPHONEOS_DEPLOYMENT_TARGET = 16.1;
+				LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks";
+				"LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks";
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.whispercppdemo.WhisperCppDemo;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = auto;
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+				SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+				SWIFT_VERSION = 5.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		0AAC5DBE29539CD0003032C3 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CLANG_ENABLE_MODULES = YES;
+				CODE_SIGN_ENTITLEMENTS = "whisper.swiftui.demo/Supporting files/WhisperCppDemo.entitlements";
+				CODE_SIGN_STYLE = Automatic;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
+				DEVELOPMENT_TEAM = 3TZ9BM962G;
+				ENABLE_HARDENED_RUNTIME = YES;
+				ENABLE_PREVIEWS = YES;
+				GENERATE_INFOPLIST_FILE = YES;
+				"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphoneos*]" = YES;
+				"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphonesimulator*]" = YES;
+				"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphoneos*]" = YES;
+				"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphonesimulator*]" = YES;
+				"INFOPLIST_KEY_UILaunchScreen_Generation[sdk=iphoneos*]" = YES;
+				"INFOPLIST_KEY_UILaunchScreen_Generation[sdk=iphonesimulator*]" = YES;
+				"INFOPLIST_KEY_UIStatusBarStyle[sdk=iphoneos*]" = UIStatusBarStyleDefault;
+				"INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault;
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				IPHONEOS_DEPLOYMENT_TARGET = 16.1;
+				LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks";
+				"LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks";
+				LLVM_LTO = YES;
+				MACOSX_DEPLOYMENT_TARGET = 13.0;
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = com.whispercppdemo.WhisperCppDemo;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SDKROOT = auto;
+				SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+				SWIFT_OBJC_BRIDGING_HEADER = "whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h";
+				SWIFT_VERSION = 5.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		0AAC5D9229539CCF003032C3 /* Build configuration list for PBXProject "whisper.swiftui" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				0AAC5DBA29539CD0003032C3 /* Debug */,
+				0AAC5DBB29539CD0003032C3 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		0AAC5DBC29539CD0003032C3 /* Build configuration list for PBXNativeTarget "whisper.swiftui" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				0AAC5DBD29539CD0003032C3 /* Debug */,
+				0AAC5DBE29539CD0003032C3 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 0AAC5D8F29539CCF003032C3 /* Project object */;
+}
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/.gitignore
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/.gitignore
@ -0,0 +1 @@
+contents.xcworkspacedata
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
--- a/examples/whisper.swiftui/whisper.swiftui.xcodeproj/xcshareddata/xcschemes/WhisperCppDemo.xcscheme
+++ b/examples/whisper.swiftui/whisper.swiftui.xcodeproj/xcshareddata/xcschemes/WhisperCppDemo.xcscheme
@ -0,0 +1,100 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "1410"
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "0AAC5D9629539CCF003032C3"
+               BuildableName = "whisper.swiftui.app"
+               BlueprintName = "whisper.swiftui"
+               ReferencedContainer = "container:whisper.swiftui.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES">
+      <Testables>
+         <TestableReference
+            skipped = "NO"
+            parallelizable = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "0AAC5DA729539CD0003032C3"
+               BuildableName = "whisper.swiftuiTests.xctest"
+               BlueprintName = "whisper.swiftuiTests"
+               ReferencedContainer = "container:whisper.swiftui.xcodeproj">
+            </BuildableReference>
+         </TestableReference>
+         <TestableReference
+            skipped = "NO"
+            parallelizable = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "0AAC5DB129539CD0003032C3"
+               BuildableName = "whisper.swiftuiUITests.xctest"
+               BlueprintName = "whisper.swiftuiUITests"
+               ReferencedContainer = "container:whisper.swiftui.xcodeproj">
+            </BuildableReference>
+         </TestableReference>
+      </Testables>
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Release"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "0AAC5D9629539CCF003032C3"
+            BuildableName = "whisper.swiftui.app"
+            BlueprintName = "whisper.swiftui"
+            ReferencedContainer = "container:whisper.swiftui.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "0AAC5D9629539CCF003032C3"
+            BuildableName = "whisper.swiftui.app"
+            BlueprintName = "whisper.swiftui"
+            ReferencedContainer = "container:whisper.swiftui.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
--- a/examples/whisper.wasm/CMakeLists.txt
+++ b/examples/whisper.wasm/CMakeLists.txt
@ -8,6 +8,8 @@ add_executable(${TARGET}
    emscripten.cpp
    )

+include(DefaultTargetOptions)
+
 target_link_libraries(${TARGET} PRIVATE
    whisper
    )
--- a/ggml.c
+++ b/ggml.c
--- a/ggml.h
+++ b/ggml.h
@ -724,11 +724,15 @@ enum ggml_opt_result ggml_opt(
 int ggml_cpu_has_avx(void);
 int ggml_cpu_has_avx2(void);
 int ggml_cpu_has_avx512(void);
+int ggml_cpu_has_fma(void);
 int ggml_cpu_has_neon(void);
+int ggml_cpu_has_arm_fma(void);
 int ggml_cpu_has_f16c(void);
 int ggml_cpu_has_fp16_va(void);
 int ggml_cpu_has_wasm_simd(void);
 int ggml_cpu_has_blas(void);
+int ggml_cpu_has_sse3(void);
+int ggml_cpu_has_vsx(void);

 #ifdef  __cplusplus
 }
--- a/models/convert-h5-to-ggml.py
+++ b/models/convert-h5-to-ggml.py
@ -56,7 +56,7 @@ def bytes_to_unicode():
    The reversible bpe codes work on unicode strings.
    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
-    This is a signficant percentage of your normal, say, 32K bpe vocab.
+    This is a significant percentage of your normal, say, 32K bpe vocab.
    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
    And avoids mapping to whitespace/control characters the bpe code barfs on.
    """
--- a/models/download-ggml-model.cmd
+++ b/models/download-ggml-model.cmd
@ -40,7 +40,7 @@ if exist "ggml-%model%.bin" (
  goto :eof
 )

-PowerShell -NoProfile -ExecutionPolicy Bypass -Command "Invoke-WebRequest -Uri https://huggingface.co/datasets/ggerganov/whisper.cpp/raw/main/ggml-%model%.bin -OutFile ggml-%model%.bin"
+PowerShell -NoProfile -ExecutionPolicy Bypass -Command "Invoke-WebRequest -Uri https://huggingface.co/datasets/ggerganov/whisper.cpp/resolve/main/ggml-%model%.bin -OutFile ggml-%model%.bin"

 if %ERRORLEVEL% neq 0 (
  echo Failed to download ggml model %model%
--- a/models/download-ggml-model.sh
+++ b/models/download-ggml-model.sh
@ -19,7 +19,7 @@ function get_script_path() {
    fi
 }

-models_path=$(get_script_path)
+models_path="$(get_script_path)"

 # Whisper models
 models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
--- a/whisper.cpp
+++ b/whisper.cpp
@ -204,6 +204,10 @@ struct whisper_vocab {
    std::map<token, id> token_to_id;
    std::map<id, token> id_to_token;

+    // used to avoid memory allocations during sampling
+    // TODO: move to whisper_context in the future
+    std::vector<std::pair<double, whisper_vocab::id>> probs_id;
+
    id token_eot  = 50256;
    id token_sot  = 50257;
    id token_prev = 50360;
@ -408,6 +412,8 @@ struct whisper_context {
    std::vector<uint8_t>   buf_compute;
    std::vector<uint8_t>   buf_compute_layer;

+    ggml_type wtype; // weight type (FP32 or FP16)
+
    whisper_model model;
    whisper_vocab vocab;

@ -431,9 +437,8 @@ struct whisper_context {
 };

 template<typename T>
-static void read_safe(std::ifstream& fin, T& dest)
-{
-  fin.read((char*)& dest, sizeof(T));
+static void read_safe(std::ifstream& fin, T& dest) {
+    fin.read((char*)& dest, sizeof(T));
 }

 // load the model from a ggml file
@ -551,6 +556,9 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx

        std::string word;
        std::vector<char> tmp;
+
+        tmp.reserve(128);
+
        for (int i = 0; i < n_vocab; i++) {
            uint32_t len;
            read_safe(fin, len);
@ -603,6 +611,11 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
                vocab.id_to_token[i] = word;
            }
        }
+
+        wctx.logits.reserve(vocab.n_vocab*model.hparams.n_text_ctx);
+        wctx.probs.reserve(vocab.n_vocab*model.hparams.n_text_ctx);
+
+        vocab.probs_id.reserve(n_vocab);
    }

    {
@ -618,10 +631,11 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx

    // for the big tensors, we have the option to store the data in 16-bit floats
    // in order to save memory and also to speed up the computation
-    const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
+    wctx.wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
+
+    const ggml_type wtype = wctx.wtype;

    size_t ctx_size = 0;
-    size_t ctx_mem_size = 0;

    {
        const auto & hparams = model.hparams;
@ -640,7 +654,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx

        // encoder
        {
-            // TODO: F16 .. maybe not?
            ctx_size += n_audio_ctx*n_audio_state*ggml_type_size(GGML_TYPE_F32); // e_pe;

            ctx_size += 3*n_mels*n_audio_state*ggml_type_size(wtype);         // e_conv_1_w
@ -655,7 +668,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx

        // decoder
        {
-            // TODO: F16 .. maybe not?
            ctx_size += n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F32); // d_pe;

            ctx_size += n_vocab*n_text_state*ggml_type_size(wtype); // d_te;
@ -730,12 +742,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
            ctx_size += n_text_layer*(             n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
        }

-        ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k
-        ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v
-
-        ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k
-        ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v
-
        ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead

        fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
@ -978,8 +984,8 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
            const int n_mem      = n_text_layer*n_text_ctx;
            const int n_elements = n_text_state*n_mem;

-            model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
-            model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+            model.memory_k = ggml_new_tensor_1d(ctx, wtype, n_elements);
+            model.memory_v = ggml_new_tensor_1d(ctx, wtype, n_elements);
        }

        // key/value memory for the cross-attention layer
@ -989,8 +995,8 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
            const int n_mem      = n_text_layer*n_audio_ctx;
            const int n_elements = n_text_state*n_mem;

-            model.memory_cross_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
-            model.memory_cross_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+            model.memory_cross_k = ggml_new_tensor_1d(ctx, wtype, n_elements);
+            model.memory_cross_v = ggml_new_tensor_1d(ctx, wtype, n_elements);
        }

        const size_t memory_size =
@ -1028,10 +1034,10 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx

            std::string name;
            std::vector<char> tmp(length); // create a buffer
-            fin.read( &tmp[0], tmp.size() ); // read to buffer
+            fin.read(&tmp[0], tmp.size()); // read to buffer
            name.assign(&tmp[0], tmp.size());

-            if (model.tensors.find(name.data()) == model.tensors.end()) {
+            if (model.tensors.find(name) == model.tensors.end()) {
                fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
                return false;
            }
@ -1236,14 +1242,14 @@ static bool whisper_encode(
                ggml_permute(ctxL,
                        ggml_cpy(ctxL,
                            Qcur,
-                            ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
+                            ggml_new_tensor_3d(ctxL, wctx.wtype, n_state/n_head, n_head, n_ctx)),
                        0, 2, 1, 3);

            struct ggml_tensor * K =
                ggml_permute(ctxL,
                        ggml_cpy(ctxL,
                            Kcur,
-                            ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
+                            ggml_new_tensor_3d(ctxL, wctx.wtype, n_state/n_head, n_head, n_ctx)),
                        0, 2, 1, 3);

            struct ggml_tensor * V =
@ -1253,7 +1259,7 @@ static bool whisper_encode(
                                Vcur,
                                n_state/n_head, n_head, n_ctx),
                            1, 2, 0, 3),
-                        ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_ctx, n_state/n_head, n_head)
+                        ggml_new_tensor_3d(ctxL, wctx.wtype, n_ctx, n_state/n_head, n_head)
                        );

            struct ggml_tensor * KQV = ggml_flash_attn(ctxL, Q, K, V, false);
@ -1269,7 +1275,7 @@ static bool whisper_encode(
                ggml_permute(ctxL,
                        ggml_cpy(ctxL,
                            Kcur,
-                            ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
+                            ggml_new_tensor_3d(ctxL, wctx.wtype, n_state/n_head, n_head, n_ctx)),
                        0, 2, 1, 3);

            // K * Q
@ -1287,7 +1293,7 @@ static bool whisper_encode(
            //    ggml_permute(ctxL,
            //            ggml_cpy(ctxL,
            //                Vcur,
-            //                ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_state/n_head, n_head, n_ctx)),
+            //                ggml_new_tensor_3d(ctxL, wctx.wtype, n_state/n_head, n_head, n_ctx)),
            //            1, 2, 0, 3);

            //struct ggml_tensor * KQV = ggml_mul_mat(ctxL, V_trans, KQ_soft_max);
@ -1299,7 +1305,7 @@ static bool whisper_encode(
                                Vcur,
                                n_state/n_head, n_head, n_ctx),
                            0, 2, 1, 3),
-                        ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_state/n_head, n_ctx, n_head)
+                        ggml_new_tensor_3d(ctxL, wctx.wtype, n_state/n_head, n_ctx, n_head)
                        );

            struct ggml_tensor * KQV = ggml_mul_mat(ctxL, ggml_transpose(ctxL, V), KQ_soft_max);
@ -1344,7 +1350,7 @@ static bool whisper_encode(

 #ifdef USE_FLASH_FF
            cur = ggml_flash_ff(ctxL,
-                    ggml_cpy(ctxL, cur, ggml_new_tensor_2d(ctxL, GGML_TYPE_F16, n_state, N)),
+                    ggml_cpy(ctxL, cur, ggml_new_tensor_2d(ctxL, wctx.wtype, n_state, N)),
                    layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
 #else
            // fully connected
@ -1387,8 +1393,8 @@ static bool whisper_encode(
        // input for next layer (inpO -> inpL)
        memcpy(inpL->data, inpO->data, ggml_nbytes(inpL));
        inpL->op = GGML_OP_NONE;
-        inpL->src0 = NULL;
-        inpL->src1 = NULL;
+        inpL->src0 = nullptr;
+        inpL->src1 = nullptr;

        //printf("%s: - used_mem(%d) = %f MB\n", __func__, il, ggml_used_mem(ctxL)/1024.0/1024.0);

@ -1441,8 +1447,8 @@ static bool whisper_encode(

        // TODO: hack to disconnect the encoded features from the previous graph
        cur->op = GGML_OP_NONE;
-        cur->src0 = NULL;
-        cur->src1 = NULL;
+        cur->src0 = nullptr;
+        cur->src1 = nullptr;

        for (int il = 0; il < model.hparams.n_text_layer; ++il) {
            auto & layer = model.layers_decoder[il];
@ -1451,7 +1457,7 @@ static bool whisper_encode(
                    layer.cross_attn_k_w,
                    cur);

-            Kcross = ggml_scale(ctx0, Kcross, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
+            //Kcross = ggml_scale(ctx0, Kcross, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));

            struct ggml_tensor * Vcross = ggml_mul_mat(ctx0,
                    layer.cross_attn_v_w,
@ -1573,14 +1579,14 @@ static bool whisper_decode(
                        Qcur),
                    Qcur);

-            Qcur = ggml_scale(ctxL, Qcur, ggml_new_f32(ctxL, pow(float(n_state)/n_head, -0.25)));
+            //Qcur = ggml_scale(ctxL, Qcur, ggml_new_f32(ctxL, pow(float(n_state)/n_head, -0.25)));

            // note: no bias for Key
            struct ggml_tensor * Kcur = ggml_mul_mat(ctxL,
                    layer.attn_k_w,
                    cur);

-            Kcur = ggml_scale(ctxL, Kcur, ggml_new_f32(ctxL, pow(float(n_state)/n_head, -0.25)));
+            //Kcur = ggml_scale(ctxL, Kcur, ggml_new_f32(ctxL, pow(float(n_state)/n_head, -0.25)));

            struct ggml_tensor * Vcur = ggml_mul_mat(ctxL,
                    layer.attn_v_w,
@ -1603,6 +1609,33 @@ static bool whisper_decode(

            // ------

+#ifdef USE_FLASH_ATTN
+            struct ggml_tensor * Q =
+                ggml_permute(ctxL,
+                        ggml_cpy(ctxL,
+                            Qcur,
+                            ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_state/n_head, n_head, N)),
+                        0, 2, 1, 3);
+
+            struct ggml_tensor * K =
+                ggml_permute(ctxL,
+                        ggml_reshape_3d(ctxL,
+                            ggml_view_1d(ctxL, model.memory_k, (n_past + N)*n_state, il*n_ctx*ggml_element_size(model.memory_k)*n_state),
+                            n_state/n_head, n_head, n_past + N),
+                        0, 2, 1, 3);
+
+            struct ggml_tensor * V =
+                ggml_cpy(ctxL,
+                        ggml_permute(ctxL,
+                            ggml_reshape_3d(ctxL,
+                                ggml_view_1d(ctxL, model.memory_v, (n_past + N)*n_state, il*n_ctx*ggml_element_size(model.memory_v)*n_state),
+                                n_state/n_head, n_head, n_past + N),
+                            1, 2, 0, 3),
+                        ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_past + N, n_state/n_head, n_head)
+                        );
+
+            struct ggml_tensor * KQV = ggml_flash_attn(ctxL, Q, K, V, true);
+#else
            struct ggml_tensor * Q =
                ggml_permute(ctxL,
                        ggml_cpy(ctxL,
@ -1620,13 +1653,13 @@ static bool whisper_decode(
            // K * Q
            struct ggml_tensor * KQ = ggml_mul_mat(ctxL, K, Q);

-            //struct ggml_tensor * KQ_scaled =
-            //    ggml_scale(ctxL,
-            //            KQ,
-            //            ggml_new_f32(ctxL, 1.0f/sqrt(float(n_state)/n_head))
-            //            );
+            struct ggml_tensor * KQ_scaled =
+                ggml_scale(ctxL,
+                        KQ,
+                        ggml_new_f32(ctxL, 1.0f/sqrt(float(n_state)/n_head))
+                        );

-            struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctxL, KQ, n_past);
+            struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctxL, KQ_scaled, n_past);

            struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctxL, KQ_masked);

@ -1638,6 +1671,7 @@ static bool whisper_decode(
                        1, 2, 0, 3);

            struct ggml_tensor * KQV = ggml_mul_mat(ctxL, V_trans, KQ_soft_max);
+#endif

            struct ggml_tensor * KQV_merged = ggml_permute(ctxL, KQV, 0, 2, 1, 3);

@ -1683,7 +1717,7 @@ static bool whisper_decode(
                        Qcur),
                    Qcur);

-            Qcur = ggml_scale(ctxL, Qcur, ggml_new_f32(ctxL, pow(float(n_state)/n_head, -0.25)));
+            //Qcur = ggml_scale(ctxL, Qcur, ggml_new_f32(ctxL, pow(float(n_state)/n_head, -0.25)));

            // Kcross is already scaled
            struct ggml_tensor * Kcross =
@ -1698,6 +1732,24 @@ static bool whisper_decode(

            // ------

+#ifdef USE_FLASH_ATTN
+            struct ggml_tensor * Q =
+                ggml_permute(ctxL,
+                        ggml_cpy(ctxL,
+                            Qcur,
+                            ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, n_state/n_head, n_head, N)),
+                        0, 2, 1, 3);
+
+            struct ggml_tensor * K = ggml_permute(ctxL, Kcross, 0, 2, 1, 3);
+
+            struct ggml_tensor * V =
+                ggml_cpy(ctxL,
+                        ggml_permute(ctxL, Vcross, 1, 2, 0, 3),
+                        ggml_new_tensor_3d(ctxL, GGML_TYPE_F16, M, n_state/n_head, n_head)
+                        );
+
+            struct ggml_tensor * KQV = ggml_flash_attn(ctxL, Q, K, V, false);
+#else
            struct ggml_tensor * Q =
                ggml_permute(ctxL,
                        ggml_cpy(ctxL,
@ -1724,6 +1776,7 @@ static bool whisper_decode(
            struct ggml_tensor * V_trans = ggml_permute(ctxL, Vcross, 1, 2, 0, 3);

            struct ggml_tensor * KQV = ggml_mul_mat(ctxL, V_trans, KQ_soft_max);
+#endif

            struct ggml_tensor * KQV_merged = ggml_permute(ctxL, KQV, 0, 2, 1, 3);

@ -1799,8 +1852,8 @@ static bool whisper_decode(
        // input for next layer (inpO -> inpL)
        memcpy(inpL->data, inpO->data, ggml_nbytes(inpL));
        inpL->op = GGML_OP_NONE;
-        inpL->src0 = NULL;
-        inpL->src1 = NULL;
+        inpL->src0 = nullptr;
+        inpL->src1 = nullptr;

        if (N > 1) {
            //printf("%s: - used_mem(%d) = %f MB\n", __func__, il, ggml_used_mem(ctxL)/1024.0/1024.0);
@ -1856,7 +1909,7 @@ static bool whisper_decode(

 // the most basic sampling scheme - select the top token
 static whisper_token_data whisper_sample_best(
-        const whisper_vocab & vocab,
+              whisper_vocab & vocab,
        const float * probs,
              bool force_timestamp,
              bool is_initial) {
@ -1864,13 +1917,13 @@ static whisper_token_data whisper_sample_best(
        0, 0, 0.0f, 0.0f, 0.0f, -1, -1, 0.0f,
    };

-    int n_logits = vocab.id_to_token.size();
+    const int n_logits = vocab.n_vocab;

-    std::vector<std::pair<double, whisper_vocab::id>> probs_id;
-    probs_id.reserve(n_logits);
+    auto & probs_id = vocab.probs_id;

+    probs_id.clear();
    for (int i = 0; i < n_logits; i++) {
-        probs_id.push_back(std::make_pair(probs[i], i));
+        probs_id.emplace_back(probs[i], i);
    }

    {
@ -2008,6 +2061,9 @@ static void fft(const std::vector<float> & in, std::vector<float> & out) {
    std::vector<float> even;
    std::vector<float> odd;

+    even.reserve(N/2);
+    odd.reserve(N/2);
+
    for (int i = 0; i < N; i++) {
        if (i % 2 == 0) {
            even.push_back(in[i]);
@ -2043,7 +2099,7 @@ static void fft(const std::vector<float> & in, std::vector<float> & out) {
 static bool log_mel_spectrogram(
    const float * samples,
    const int n_samples,
-    const int sample_rate,
+    const int /*sample_rate*/,
    const int fft_size,
    const int fft_step,
    const int n_mel,
@ -2194,7 +2250,7 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
    // find the longest tokens that form the words:
    std::vector<whisper_vocab::id> tokens;
    for (const auto & word : words) {
-        if (word.size() == 0) continue;
+        if (word.empty()) continue;

        int i = 0;
        int n = word.size();
@ -2242,7 +2298,8 @@ struct whisper_context * whisper_init(const char * path_model) {

    if (!whisper_model_load(path_model, *ctx)) {
        fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, path_model);
-        return NULL;
+        delete ctx;
+        return nullptr;
    }

    ctx->t_load_us = ggml_time_us() - t_start_us;
@ -2360,12 +2417,12 @@ struct whisper_token_data whisper_sample_timestamp(struct whisper_context * ctx,
 int whisper_tokenize(struct whisper_context * ctx, const char * text, whisper_token * tokens, int n_max_tokens) {
    const auto res = tokenize(ctx->vocab, text);

-    if (res.size() > n_max_tokens) {
+    if (n_max_tokens < (int) res.size()) {
        fprintf(stderr, "%s: too many resulting tokens: %d (max %d)\n", __func__, (int) res.size(), n_max_tokens);
        return -1;
    }

-    for (int i = 0; i < res.size(); i++) {
+    for (int i = 0; i < (int) res.size(); i++) {
        tokens[i] = res[i];
    }

@ -2404,7 +2461,7 @@ const char * whisper_lang_str(int id) {
    }

    fprintf(stderr, "%s: unknown language id %d\n", __func__, id);
-    return NULL;
+    return nullptr;
 }

 int whisper_lang_auto_detect(
@ -2438,9 +2495,9 @@ int whisper_lang_auto_detect(
    }

    std::vector<std::pair<float, int>> probs_id;
-    for (const auto kv : g_lang) {
+    for (const auto & kv : g_lang) {
        const auto token_lang = whisper_token_lang(ctx, kv.second.first);
-        probs_id.push_back({ ctx->probs[token_lang], kv.second.first });
+        probs_id.emplace_back(ctx->probs[token_lang], kv.second.first);
    }

    // sort descending
@ -2464,12 +2521,12 @@ int whisper_lang_auto_detect(
    }

    {
-        for (int i = 0; i < probs_id.size(); i++) {
+        for (const auto & prob : probs_id) {
            if (lang_probs) {
-                lang_probs[probs_id[i].second] = probs_id[i].first;
+                lang_probs[prob.second] = prob.first;
            }

-            //printf("%s: lang %2d (%3s): %f\n", __func__, probs_id[i].second, whisper_lang_str(probs_id[i].second), probs_id[i].first);
+            //printf("%s: lang %2d (%3s): %f\n", __func__, prob.second, whisper_lang_str(prob.second), prob.first);
        }
    }

@ -2488,6 +2545,10 @@ int whisper_n_text_ctx(struct whisper_context * ctx) {
    return ctx->model.hparams.n_text_ctx;
 }

+int whisper_n_audio_ctx(struct whisper_context * ctx) {
+    return ctx->model.hparams.n_audio_ctx;
+}
+
 int whisper_is_multilingual(struct whisper_context * ctx) {
    return ctx->vocab.is_multilingual() ? 1 : 0;
 }
@ -2561,11 +2622,15 @@ const char * whisper_print_system_info(void) {
    s += "AVX = "       + std::to_string(ggml_cpu_has_avx())       + " | ";
    s += "AVX2 = "      + std::to_string(ggml_cpu_has_avx2())      + " | ";
    s += "AVX512 = "    + std::to_string(ggml_cpu_has_avx512())    + " | ";
+    s += "FMA = "       + std::to_string(ggml_cpu_has_fma())       + " | ";
    s += "NEON = "      + std::to_string(ggml_cpu_has_neon())      + " | ";
+    s += "ARM_FMA = "   + std::to_string(ggml_cpu_has_arm_fma())   + " | ";
    s += "F16C = "      + std::to_string(ggml_cpu_has_f16c())      + " | ";
    s += "FP16_VA = "   + std::to_string(ggml_cpu_has_fp16_va())   + " | ";
    s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
    s += "BLAS = "      + std::to_string(ggml_cpu_has_blas())      + " | ";
+    s += "SSE3 = "      + std::to_string(ggml_cpu_has_sse3())      + " | ";
+    s += "VSX = "       + std::to_string(ggml_cpu_has_vsx())       + " | ";

    return s.c_str();
 }
@ -2811,7 +2876,11 @@ int whisper_full(
        std::rotate(prompt_past.begin(), prompt_past.end() - params.prompt_n_tokens, prompt_past.end());
    }

-    // overwrite audio_ctx
+    // overwrite audio_ctx, max allowed is hparams.n_audio_ctx
+    if (params.audio_ctx > whisper_n_audio_ctx(ctx)) {
+        fprintf(stderr, "%s: audio_ctx is larger than the maximum allowed (%d > %d)\n", __func__, params.audio_ctx, whisper_n_audio_ctx(ctx));
+        return -4;
+    }
    ctx->exp_n_audio_ctx = params.audio_ctx;

    // these tokens determine the task that will be performed
@ -2874,7 +2943,7 @@ int whisper_full(
        prompt.clear();

        // if we have already generated some text, use it as a prompt to condition the next generation
-        if (prompt_past.size() > 0) {
+        if (!prompt_past.empty()) {
            int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));

            prompt = { whisper_token_prev(ctx) };
@ -2985,7 +3054,7 @@ int whisper_full(
        if (failed) {
            // when we fail to sample timestamp token, retry by clearing the past prompt
            // if it fails again, then we advance the window by 1 second
-            if (prompt_past.size() > 0) {
+            if (!prompt_past.empty()) {
                prompt_past.clear();
            } else {
                fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
@ -3002,11 +3071,11 @@ int whisper_full(
        }

        // store the text from this iteration
-        if (tokens_cur.size() > 0) {
+        if (!tokens_cur.empty()) {
            int  i0 = 0;
            auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx));

-            std::string text = "";
+            std::string text;

            for (int i = 0; i < (int) tokens_cur.size(); i++) {
                //printf("%s: %18s %6.3f %18s %6.3f\n", __func__,
@ -3138,7 +3207,7 @@ int whisper_full_parallel(

        // separate key + value memory for each processor
        {
-            auto & ctx = model.ctx_mem;
+            auto & mctx = model.ctx_mem;

            const auto & hparams = model.hparams;

@ -3151,8 +3220,8 @@ int whisper_full_parallel(
                const int n_mem      = n_text_layer*n_text_ctx;
                const int n_elements = n_text_state*n_mem;

-                model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
-                model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+                model.memory_k = ggml_new_tensor_1d(mctx, ctx->wtype, n_elements);
+                model.memory_v = ggml_new_tensor_1d(mctx, ctx->wtype, n_elements);
            }

            // key/value memory for the cross-attention layer
@ -3162,8 +3231,8 @@ int whisper_full_parallel(
                const int n_mem      = n_text_layer*n_audio_ctx;
                const int n_elements = n_text_state*n_mem;

-                model.memory_cross_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
-                model.memory_cross_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+                model.memory_cross_k = ggml_new_tensor_1d(mctx, ctx->wtype, n_elements);
+                model.memory_cross_v = ggml_new_tensor_1d(mctx, ctx->wtype, n_elements);
            }
        }
    }
@ -3207,17 +3276,17 @@ int whisper_full_parallel(
    for (int i = 0; i < n_processors - 1; ++i) {
        auto & results_i = ctxs[i].result_all;

-        for (int j = 0; j < (int) results_i.size(); ++j) {
+        for (auto & result : results_i) {
            // correct the segment timestamp taking into account the offset
-            results_i[j].t0 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
-            results_i[j].t1 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
+            result.t0 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
+            result.t1 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;

            // make sure that segments are not overlapping
-            if (ctx->result_all.size() > 0) {
-                results_i[j].t0 = std::max(results_i[j].t0, ctx->result_all.back().t1);
+            if (!ctx->result_all.empty()) {
+                result.t0 = std::max(result.t0, ctx->result_all.back().t1);
            }

-            ctx->result_all.push_back(std::move(results_i[j]));
+            ctx->result_all.push_back(std::move(result));

            // call the new_segment_callback for each segment
            if (params.new_segment_callback) {
@ -3312,18 +3381,18 @@ static int64_t sample_to_timestamp(int i_sample) {
 static float voice_length(const std::string & text) {
    float res = 0.0f;

-    for (size_t i = 0; i < text.size(); ++i) {
-        if (text[i] == ' ') {
+    for (char c : text) {
+        if (c == ' ') {
            res += 0.01f;
-        } else if (text[i] == ',') {
+        } else if (c == ',') {
            res += 2.00f;
-        } else if (text[i] == '.') {
+        } else if (c == '.') {
            res += 3.00f;
-        } else if (text[i] == '!') {
+        } else if (c == '!') {
            res += 3.00f;
-        } else if (text[i] == '?') {
+        } else if (c == '?') {
            res += 3.00f;
-        } else if (text[i] >= '0' && text[i] <= '9') {
+        } else if (c >= '0' && c <= '9') {
            res += 3.00f;
        } else {
            res += 1.00f;
--- a/whisper.h
+++ b/whisper.h
@ -177,6 +177,7 @@ extern "C" {
    WHISPER_API int whisper_n_len          (struct whisper_context * ctx); // mel length
    WHISPER_API int whisper_n_vocab        (struct whisper_context * ctx);
    WHISPER_API int whisper_n_text_ctx     (struct whisper_context * ctx);
+    WHISPER_API int whisper_n_audio_ctx    (struct whisper_context * ctx);
    WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);

    // The probabilities for the next token
Author	SHA1	Message	Date
Georgi Gerganov	e2aa556a99	whisper : experiments with Flash Attention in the decoder	2023-01-07 21:00:51 +02:00
Georgi Gerganov	f30b5d322c	ggml : fix bug in new soft max computation	2023-01-07 21:00:07 +02:00
Georgi Gerganov	44efbf7ff1	cmake : add -Wno-unused-function + update whisper.js	2023-01-07 20:18:34 +02:00
Georgi Gerganov	d347a59a5f	ggml : when using BLAS start only 1 CPU thread	2023-01-07 19:48:56 +02:00
Georgi Gerganov	6394c906af	ggml : fix running tasks with variable number of threads	2023-01-07 19:20:18 +02:00
Georgi Gerganov	74ffa14e1d	ggml : unroll ggml_vec_dot_f16 in ggml_compute_forward_flash_attn_f16	2023-01-07 19:19:40 +02:00
Georgi Gerganov	65fdcbbbbb	whisper : revert accidental MB change	2023-01-07 16:18:21 +02:00
Georgi Gerganov	d61d55cd4b	ggml : speed-up soft max via Accelerate + unroll	2023-01-07 16:16:42 +02:00
Georgi Gerganov	d51fc3ee0a	ggml : use vDSP_sve and vDSP_maxv from Accelerate	2023-01-07 16:10:16 +02:00
Georgi Gerganov	f82a7dd019	ggml : make gcc happy (minor)	2023-01-07 09:34:39 +02:00
Georgi Gerganov	87dd4a3081	talk.wasm : bump memory usage + update whisper.js	2023-01-06 21:13:44 +02:00
m.bell	41e05c6b1b	cmake : support AVX2 in Windows better (#381 )	2023-01-06 19:36:33 +02:00
Georgi Gerganov	fa379cb22a	Revert "tmp" This reverts commit `1652965529`.	2023-01-06 19:33:09 +02:00
David Thorpe	322f4e6c4e	go : bindings updated so they can be used in third party packages. (#379 ) * Updated bindings so they can be used in third pary packages. * Updated makefiles to set FMA flag on optionally, for xeon E5 on Darwin	2023-01-06 19:32:28 +02:00
Georgi Gerganov	1652965529	tmp	2023-01-06 19:32:12 +02:00
Georgi Gerganov	6042c7a3be	cmake : change min required version to 3.0 (#351 ) We increase the min version only when want to use particular functionality that is available in the newer version	2023-01-06 19:25:28 +02:00
Georgi Gerganov	6b351bb669	command : add "guided-mode" video demo in the README.md	2023-01-06 18:59:26 +02:00
Abitofevrything	a62170c656	ggml : add SSE3 and fp16 conversion lookup table (#368 ) * Improves WASM performance: On MacBook M1 Pro, I observe 25% faster using Firefox and 35% faster using Chrome * Add support for SSE3 SIMD * Add SSE3 to system information * Add Imath support for fp16-fp32 conversions * Add Imath to system information * Wrap Imath calls to avoid static function warnings * Drop Imath; Add lookup table for f16 -> f32 conversions * Remove TODO comments * Update SSE3 to new macro arguments * Correct updated macro definitions * Prefer static inline where possible * ggml : static inlines + add public f16 <-> f32 conversions Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2023-01-06 18:45:59 +02:00
Thomas Fitzsimmons	1944e7c33e	whisper : document POWER VSX support	2023-01-05 23:53:00 +02:00
Thomas Fitzsimmons	49a8dd6732	ggml : reorganize POWER9 ppc64le SIMD code	2023-01-05 23:53:00 +02:00
Thomas Fitzsimmons	8c7f642286	ggml : change f16 load and store macro arguments	2023-01-05 23:53:00 +02:00
Georgi Gerganov	ad2a4ffa03	whisper : do not use F16 tensors when in F32 mode (#369 )	2023-01-05 22:56:25 +02:00
Georgi Gerganov	b3c865083e	ci : add emscripten build	2023-01-05 22:10:20 +02:00
Georgi Gerganov	a0d4f8e65c	main : make whisper_print_segment_callback() more readable (close #371 )	2023-01-05 21:45:05 +02:00
Georgi Gerganov	4a214d2f07	cmake : add CMAKE_RUNTIME_OUTPUT_DIRECTORY Currently needed by the wasm examples	2023-01-05 21:40:59 +02:00
Georgi Gerganov	0a0cfa7985	ggml : add void to argument-less functions	2023-01-05 21:40:38 +02:00
Georgi Gerganov	196d738974	minor : close #370 + Makefile build info print change	2023-01-05 21:35:45 +02:00
Andy Maloney	84c6b42e65	cmake : update to 3.19 (#351 ) - update from 3.0 (from 2014) to 3.19 (from 2020) - move some global setting onto the targets (through a cmake include)	2023-01-05 21:22:48 +02:00
Andy Maloney	dd6d582977	whisper : use ranged-based for loops for readability	2023-01-05 21:20:44 +02:00
Georgi Gerganov	d51c5eb906	ggml : define MIN / MAX only if not defined (minor)	2023-01-05 21:16:52 +02:00
Georgi Gerganov	0be6a1afd9	make : print build information	2023-01-02 13:35:26 +02:00
Georgi Gerganov	a466c3404d	stream : fix data race on bool + avoid division-by-zero	2023-01-02 10:20:50 +02:00
Georgi Gerganov	d629c034a4	models : fix HF model URL (close #356 )	2023-01-02 09:54:43 +02:00
Andy Maloney	f00509d57c	command : refactor to split command list & general transcription modes (#331 ) This makes it easier to understand if you're looking for only one of the capabilities.	2022-12-31 14:08:57 +02:00
Thomas Fitzsimmons	424c410c42	ggml : improve f16 acceleration for POWER9 ppc64le	2022-12-31 10:02:19 +02:00
Georgi Gerganov	d97e6005e9	whisper : add whisper_n_audio_ctx and check for invalid audio_ctx closes #344	2022-12-31 09:57:19 +02:00
Ikko Ashimine	3467230a77	models : fix typo in convert-h5-to-ggml.py signficant -> significant	2022-12-31 09:49:01 +02:00
Avik Sengupta	a091581eb3	cmake : add runtime destination install (#345 ) needed for mingw32 build to successfully install the dlls in the correct location	2022-12-31 09:48:00 +02:00
Georgi Gerganov	68daf6e487	whisper : avoid some memory allocations	2022-12-30 13:43:48 +02:00
Niels Mayer	a593b932e4	main : add -ocsv, aka --output-csv to output a CSV file Adds -ocsv, aka --output-csv feature to examples/main, which outputs a CSV file containing lines formatted as follows <startTime-in-integer-milliseconds>, <endTime-in-integer-milliseconds>, "<transcript-line-including-commas>".	2022-12-29 14:04:00 +02:00
Georgi Gerganov	9a8ad3db69	make : add i686 arch (close #329 )	2022-12-29 13:58:55 +02:00
Georgi Gerganov	4e0b2069e7	ggml : barrier refactor + static functions	2022-12-28 19:00:53 +02:00
Georgi Gerganov	ac521a566e	ggml : simplify the SIMD code (#324 ) * ggml : simplify the SIMD code * ggml : generic reduce for all register sizes + comments	2022-12-24 10:22:28 +02:00
Andy Maloney	331c0bbddc	examples : fix memory leak on failure to load gpt2 model (#323 )	2022-12-23 20:19:07 +02:00
Andy Maloney	dc90efd504	examples : small code cleanups (#322 ) - remove unnecessary initialization of string to "" - use empty() instead of checking size() - use emplace_back instead of push_back - use nullptr instead of NULL - remove unnecessary call to .data() on string - use character overload of find_first_of() instead of passing a string	2022-12-23 20:18:51 +02:00
Georgi Gerganov	7282e2109e	ggml : use vaddvq_f32 for slightly more efficient reduce	2022-12-23 13:48:19 +02:00
Thomas Fitzsimmons	466ceebb78	ggml : add f16 acceleration for POWER9 ppc64le	2022-12-23 13:23:58 +02:00
Georgi Gerganov	77226aa89d	models : fix support for spaces in path (close #315 )	2022-12-23 11:11:38 +02:00
Andy Maloney	543bd5627e	whisper : use emplace_back in place of push_back (#319 ) This avoids potential construction of temporaries.	2022-12-23 11:07:19 +02:00
Andy Maloney	62fee9a9cc	whisper : fix mem leak on failure to load model (#318 )	2022-12-23 11:06:17 +02:00
Andy Maloney	493d94130d	ggml : make consts static (#317 ) These shouldn't be able to be referenced outside the compilation unit.	2022-12-23 11:05:27 +02:00
Georgi Gerganov	1480a5f1af	Update README.md Add SwiftUI example links	2022-12-23 11:02:46 +02:00
Digipom	0f4227d9ee	examples : add whisper.swiftui demo app (#308 ) * Add SwiftUI demo project. * Add -DGGML_USE_ACCELERATE	2022-12-23 10:56:18 +02:00
Georgi Gerganov	4c1fe0c813	Update README.md Add bindings links / discussions	2022-12-22 18:22:58 +02:00
Andy Maloney	fa463313ad	minor : small code cleanups (#302 ) * Small code cleanups - fix indentation - remove extra semicolons - remove extra break after returns in case statements - remove unnecessary call to .data() on string - use empty() instead of checking size() - no need to check for nullptr before free - remove unnecessary initialization of string to "" * minor : switch case always break Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2022-12-22 17:06:19 +02:00
Georgi Gerganov	501a6b455c	minor : flag "ARM FMA" -> "ARM_FMA"	2022-12-22 16:47:54 +02:00
Kevin Brothaler	91fc08c641	Build a vfpv4 library for armeabi-v7a and do runtime detection to select the right library	2022-12-22 16:47:54 +02:00
Kevin Brothaler	e1432dd91a	Check for both __ARM_NEON and __ARM_FEATURE_FMA so that the project can be compiled for armv7a. Android armeabi-v7a's NEON support doesn't support FMA unless configured with `-mfpu=neon-fp-armv8`, which would need runtime checks. * Also removed ABI filter from Android project.	2022-12-22 16:47:54 +02:00
Kevin Brothaler	22193cbfe8	Bump NDK version	2022-12-22 16:47:54 +02:00
Andy Maloney	42c6730732	whisper : use nullptr (C++11) instead of NULL macro (#299 )	2022-12-22 16:35:18 +02:00
Andy Maloney	76b6211f9b	cmake : add headers to target (#298 ) This will show the header files in IDEs.	2022-12-22 16:34:47 +02:00
Mohit Agarwal	86a277f78d	go : run `go mod tidy` before building examples + fix permissions (#296 ) * run `go mod tidy` before building examples Running `make examples` after cloning the repository gives the following error: ``` ... [100%] Built target whisper gmake[3]: Leaving directory '/tmp/exp/whisper.cpp/bindings/go/build' gmake[2]: Leaving directory '/tmp/exp/whisper.cpp/bindings/go/build' gmake[1]: Leaving directory '/tmp/exp/whisper.cpp/bindings/go/build' Build example go-model-download Build example go-whisper examples/go-whisper/process.go:11:2: missing go.sum entry for module providing package github.com/go-audio/wav (imported by github.com/ggerganov/whisper.cpp/bindings/go/examples/go-whisper); to add: go get github.com/ggerganov/whisper.cpp/bindings/go/examples/go-whisper make: *** [Makefile:26: examples/go-whisper] Error 1 ``` * remove executable bit from various files	2022-12-22 16:34:20 +02:00
David Thorpe	231bebca7d	bindings : initial import of golang bindings (#287 ) * Initial import of golang bindings * Updated makefile rules * Updated bindings * Makefile update to add in more tests	2022-12-20 08:54:33 +02:00
Georgi Gerganov	90564f85f9	Update README.md	2022-12-19 22:09:21 +02:00
Georgi Gerganov	99da1e5cc8	cmake : enable and fix -Wall -Wextra -Wpedantic C++ warnings	2022-12-19 20:45:08 +02:00
Matheus de Sousa	8e3f129b4d	minor : resolves some of warnings when compiling with clang/clang++ (#294 ) * Resolves some of warnings when compiling with clang/clang++ Mostly nit stuff that clang catches when compiling with -Wall -Wextra -pedantic. - Fix comparison between sign/unsigned integers. - Passes a constant reference (const&) instead of copying each time. * minor : normalize coding style * minor : fix warning Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2022-12-19 20:19:01 +02:00