From 858fce41e474a7d7f10cc9c8ed3643cd4222e8a9 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Tue, 1 Apr 2025 12:29:27 +0200
Subject: [PATCH] coreml: fix audio shape in whisper decoder conversion [no ci]

This commit fixes the audio shape in the whisper decoder conversion
script.

The motivation for this is that the  audio shape was incorrect and
was causing the conversion to fail.
---
 models/convert-whisper-to-coreml.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py
index 74575052..3876a287 100644
--- a/models/convert-whisper-to-coreml.py
+++ b/models/convert-whisper-to-coreml.py
@@ -269,10 +269,11 @@ def convert_decoder(hparams, model, quantize=False):
     model.eval()
 
     tokens_shape = (1, 1)
-    audio_shape = (1, hparams.n_audio_state, 1, 1500)
+    audio_shape = (1, hparams.n_audio_ctx, hparams.n_audio_state)
 
     audio_data = torch.randn(audio_shape)
-    token_data = torch.randint(50257, tokens_shape).long()
+    token_data = torch.randint(hparams.n_vocab, tokens_shape).long()
+
     traced_model = torch.jit.trace(model, (token_data, audio_data))
 
     model = ct.convert(