From 858fce41e474a7d7f10cc9c8ed3643cd4222e8a9 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Tue, 1 Apr 2025 12:29:27 +0200 Subject: [PATCH] coreml: fix audio shape in whisper decoder conversion [no ci] This commit fixes the audio shape in the whisper decoder conversion script. The motivation for this is that the audio shape was incorrect and was causing the conversion to fail. --- models/convert-whisper-to-coreml.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py index 74575052..3876a287 100644 --- a/models/convert-whisper-to-coreml.py +++ b/models/convert-whisper-to-coreml.py @@ -269,10 +269,11 @@ def convert_decoder(hparams, model, quantize=False): model.eval() tokens_shape = (1, 1) - audio_shape = (1, hparams.n_audio_state, 1, 1500) + audio_shape = (1, hparams.n_audio_ctx, hparams.n_audio_state) audio_data = torch.randn(audio_shape) - token_data = torch.randint(50257, tokens_shape).long() + token_data = torch.randint(hparams.n_vocab, tokens_shape).long() + traced_model = torch.jit.trace(model, (token_data, audio_data)) model = ct.convert(