diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c index d0c407bd..8d5e3e20 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-quants.c +++ b/ggml/src/ggml-cpu/ggml-cpu-quants.c @@ -5265,6 +5265,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r #if defined(__ARM_FEATURE_SVE) + uint32_t aux[3]; uint32_t utmp[4]; const int8_t m32 = 32; @@ -5276,7 +5277,6 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r const svuint8_t m1_sv = svlsl_n_u8_x(svptrue_b8(), m0_sv, 1); const svuint8_t m2_sv = svlsl_n_u8_x(svptrue_b8(), m0_sv, 2); const svuint8_t m3_sv = svlsl_n_u8_x(svptrue_b8(), m0_sv, 3); - svbool_t pred_s32 = svnot_b_z (svptrue_b32(), svptrue_pat_b32(SV_VL4)); float sum = 0; @@ -5289,7 +5289,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r const int8_t * restrict q8_sv = y[i].qs; // Set up scales - uint32_t * aux = &x[i].scales; + memcpy(aux, x[i].scales, 12); utmp[3] = ((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4); utmp[2] = ((aux[0] >> 4) & kmask2) | (((aux[2] >> 4) & kmask1) << 4); utmp[1] = (aux[1] & kmask2) | (((aux[2] >> 2) & kmask1) << 4);