From c310272fa088d3c91980c5b34a5efad441998fdb Mon Sep 17 00:00:00 2001
From: fxzjshm <11426482+fxzjshm@users.noreply.github.com>
Date: Wed, 5 Feb 2025 02:18:38 +0800
Subject: [PATCH] HIP: force max threads per block to be 1024 (llama/11621)

Some old/vendor forked version of llvm still use 256. Explicitly set it to 1024 to align with upstream llvm.

Signed-off-by: fxzjshm <fxzjshm@163.com>
---
 ggml/src/ggml-hip/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt
index eb03e10f..f4a46836 100644
--- a/ggml/src/ggml-hip/CMakeLists.txt
+++ b/ggml/src/ggml-hip/CMakeLists.txt
@@ -46,6 +46,9 @@ endif()
 
 message(STATUS "HIP and hipBLAS found")
 
+# Workaround old compilers
+set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --gpu-max-threads-per-block=1024")
+
 file(GLOB   GGML_HEADERS_ROCM "../ggml-cuda/*.cuh")
 list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h")