ggml : always define ggml_fp16_t as uint16_t (llama/5666)

* ggml : always define ggml_fp16_t as uint16_t ggml-ci * ggml : cont ggml-ci * ggml : cont * ggml : cont ggml-ci * ggml : cont ggml-ci * cuda : no longer ggml headers last ggml-ci * ggml : fix q6_K FP16 -> FP32 conversion ggml-ci * ggml : more FP16 -> FP32 conversion fixes ggml-ci
2025-06-19 07:18:07 +00:00 · 2024-02-22 23:21:39 +02:00
parent 31891db2e3
commit e5d06cfc0f
5 changed files with 42 additions and 36 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -1,3 +1,7 @@
+#include "ggml-cuda.h"
+#include "ggml.h"
+#include "ggml-backend-impl.h"
+
 #include <algorithm>
 #include <assert.h>
 #include <atomic>
@ -121,11 +125,6 @@

 #endif // defined(GGML_USE_HIPBLAS)

-// ggml-cuda need half type so keep ggml headers include at last
-#include "ggml-cuda.h"
-#include "ggml.h"
-#include "ggml-backend-impl.h"
-
 #define CUDART_HMAX     11070 // CUDA 11.7, min. ver. for which __hmax and __hmax2 are known to work (may be higher than needed)

 #define CC_PASCAL     600