From 06a1da9daff94c1bf1b1d38950628264fe443f76 Mon Sep 17 00:00:00 2001
From: CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
Date: Mon, 14 Oct 2024 09:46:33 +0200
Subject: [PATCH] convert : handle max_target_positions (#2477)

as needed eg for
https://huggingface.co/primeline/whisper-large-v3-turbo-german/blob/main/config.json
---
 models/convert-h5-to-ggml.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/models/convert-h5-to-ggml.py b/models/convert-h5-to-ggml.py
index 50836a21..5474d586 100644
--- a/models/convert-h5-to-ggml.py
+++ b/models/convert-h5-to-ggml.py
@@ -82,7 +82,11 @@ dir_out     = Path(sys.argv[3])
 
 encoder = json.load((dir_model / "vocab.json").open("r", encoding="utf8"))
 encoder_added = json.load((dir_model / "added_tokens.json").open( "r", encoding="utf8"))
-hparams = json.load((dir_model / "config.json").open("r", encoding="utf8") )
+hparams = json.load((dir_model / "config.json").open("r", encoding="utf8"))
+
+# Add this block to handle missing 'max_length'
+if "max_length" not in hparams:
+    hparams["max_length"] = hparams.get("max_target_positions", 448)
 
 model = WhisperForConditionalGeneration.from_pretrained(dir_model)