CUDA: fix MMQ for non-contiguous src0, add tests (llama/10021)

* CUDA: fix MMQ for non-contiguous src0, add tests * revise test code
2025-06-16 05:48:09 +00:00 · 2024-10-24 11:09:36 +02:00
parent 10eb603a3c
commit ab0385f43b
3 changed files with 13 additions and 11 deletions
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -3464,7 +3464,7 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {

 size_t ggml_nbytes(const struct ggml_tensor * tensor) {
    size_t nbytes;
-    size_t blck_size = ggml_blck_size(tensor->type);
+    const size_t blck_size = ggml_blck_size(tensor->type);
    if (blck_size == 1) {
        nbytes = ggml_type_size(tensor->type);
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {