From fd102343632f06b3c782275dc92d753ebe1b4ec7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 17 Jan 2024 18:46:30 +0200 Subject: [PATCH] imatrix : offload to GPU support (llama/4957) * backend : add eval callback ggml-ci * backend : group nodes in a single compute when user don't need them * backend : clean-up the implementation ggml-ci * simple : do not perform tensor data copy if not needed * simple : fix * imatrix : offload to GPU support * imatrix : fix ggml_mul_mat_id hanlding ggml-ci * ci : add imatrix test ggml-ci * ci : rearrange output ggml-ci --- ggml.c | 14 -------------- ggml.h | 6 ------ 2 files changed, 20 deletions(-) diff --git a/ggml.c b/ggml.c index d7e01b81..35fd29a9 100644 --- a/ggml.c +++ b/ggml.c @@ -394,12 +394,6 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y); static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y); -ggml_collect_imatrix_t g_imatrix_collect = NULL; - -void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect) { - g_imatrix_collect = imatrix_collect; -} - static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { [GGML_TYPE_I8] = { .type_name = "i8", @@ -9790,10 +9784,6 @@ static void ggml_compute_forward_mul_mat( const int ith = params->ith; const int nth = params->nth; - if (ith == 1 && g_imatrix_collect) { - g_imatrix_collect(src0, src1); - } - const enum ggml_type type = src0->type; const bool src1_cont = ggml_is_contiguous(src1); @@ -10097,10 +10087,6 @@ static void ggml_compute_forward_mul_mat_id( const struct ggml_tensor * src0_cur = dst->src[cur_a + 2]; - if (ith == 1 && g_imatrix_collect) { - g_imatrix_collect(src0_cur, src1); - } - const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata; const size_t row_size = ggml_row_size(vec_dot_type, ne10); diff --git a/ggml.h b/ggml.h index 837c52e6..27daf6fd 100644 --- a/ggml.h +++ b/ggml.h @@ -2085,12 +2085,6 @@ extern "C" { GGML_API void ggml_init_iq2_quantization(enum ggml_type type); GGML_API void ggml_deinit_iq2_quantization(enum ggml_type type); - // - // Importance matrix - // - typedef void(*ggml_collect_imatrix_t)(const struct ggml_tensor * src0, const struct ggml_tensor * src1); - GGML_API void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect); - // // gguf //