From 90dd5fca9c00a89b57f175a9c4491886aac538dc Mon Sep 17 00:00:00 2001 From: leo-pony Date: Thu, 28 Nov 2024 15:25:24 +0800 Subject: [PATCH] CANN: Fix SOC_TYPE compile bug (llama/10519) * CANN: Fix the bug build fail on Ascend310P under two cases: 1) Manual specify SOC_TYPE 2) Under some unusual compile environment * Update the cann backend News content: Support F16 and F32 data type model for Ascend 310P NPU. * fix CANN compile fail bug: the assert in ascend kernel function doesn't supportted on some CANN version --- ggml/src/ggml-cann/CMakeLists.txt | 7 ++++--- ggml/src/ggml-cann/kernels/CMakeLists.txt | 2 +- ggml/src/ggml-cann/kernels/dup.cpp | 1 - ggml/src/ggml-cann/kernels/get_row_q4_0.cpp | 16 ++++++++++++---- .../src/ggml-cann/kernels/quantize_f16_q8_0.cpp | 10 ++++++++++ .../src/ggml-cann/kernels/quantize_f32_q8_0.cpp | 10 ++++++++++ .../kernels/quantize_float_to_q4_0.cpp | 17 +++++++++++++++++ 7 files changed, 54 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-cann/CMakeLists.txt b/ggml/src/ggml-cann/CMakeLists.txt index 90132718..05cf06bf 100644 --- a/ggml/src/ggml-cann/CMakeLists.txt +++ b/ggml/src/ggml-cann/CMakeLists.txt @@ -22,13 +22,14 @@ if(NOT SOC_TYPE) detect_ascend_soc_type(SOC_VERSION) set(SOC_TYPE "${SOC_VERSION}") message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}") -else() - string(TOLOWER ${SOC_TYPE} SOC_VERSION) endif() -# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND310P. +string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower + +# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P. string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}") set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}") +string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION) if (CANN_INSTALL_DIR) # Only Support Linux. diff --git a/ggml/src/ggml-cann/kernels/CMakeLists.txt b/ggml/src/ggml-cann/kernels/CMakeLists.txt index 6a4e17cc..d687220c 100644 --- a/ggml/src/ggml-cann/kernels/CMakeLists.txt +++ b/ggml/src/ggml-cann/kernels/CMakeLists.txt @@ -25,6 +25,6 @@ ascendc_library(ascendc_kernels STATIC ${SRC_FILES} ) -message(STATUS "CANN: compile ascend kernels witch SOC_VERSION:${SOC_VERSION}.") +message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.") ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}") # ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP) diff --git a/ggml/src/ggml-cann/kernels/dup.cpp b/ggml/src/ggml-cann/kernels/dup.cpp index 99f03e05..c7ba38d1 100644 --- a/ggml/src/ggml-cann/kernels/dup.cpp +++ b/ggml/src/ggml-cann/kernels/dup.cpp @@ -20,7 +20,6 @@ class DupByRows { // Input has four dims. int64_t op_block_num = GetBlockNum(); int64_t op_block_idx = GetBlockIdx(); - assert(op_block_idx < SUPPORTED_MAX_DIM && op_block_idx >= 0, "Invalid block index:%d, max is:%d\n", op_block_idx, SUPPORTED_MAX_DIM); // param num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3]; diff --git a/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp b/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp index 37721109..4fbe7220 100644 --- a/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +++ b/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp @@ -2,6 +2,15 @@ // optimize me. Use template to avoid copy code. using namespace AscendC; +#ifdef ASCEND_310P // 310P not support 4bit get row + extern "C" __global__ __aicore__ void ascendc_get_row_q4_0( + GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm, + GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm, + GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) { + // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. + printf("Ascend310P not support 4bit get row.\n"); + } +#else #define BUFFER_NUM 2 @@ -110,12 +119,9 @@ class GET_ROW_Q4_0 { LocalTensor output_local = output_queue.AllocTensor(); // TODO: cast more data to speed up. -#ifdef ASCEND_310P - // TODO: 310P support quantification -#else Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0); Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0); -#endif + // Only mul need compile by group. half scale = scale_gm.GetValue(scale_offset); @@ -194,3 +200,5 @@ extern "C" __global__ __aicore__ void ascendc_get_row_q4_0( indices_nb_ub, output_ne_ub, output_nb_ub); op.calculate(); } + +#endif // #ifdef ASCEND_310P diff --git a/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp b/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp index 8423b3f0..504b43af 100644 --- a/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +++ b/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp @@ -1,6 +1,14 @@ #include "kernel_operator.h" using namespace AscendC; +#ifdef ASCEND_310P + extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0( + GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, + GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { + // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. + printf("Ascend310P not support f16->8bit quantization.\n"); + } +#else #define BUFFER_NUM 2 #define QK8_0 32 @@ -206,3 +214,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0( op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); op.calculate(); } + +#endif // #ifdef ASCEND_310P diff --git a/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp b/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp index b7c57509..05b0bc1d 100644 --- a/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +++ b/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp @@ -1,6 +1,14 @@ #include "kernel_operator.h" using namespace AscendC; +#ifdef ASCEND_310P // 310P not support f32->8bit quantization + extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0( + GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, + GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { + // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. + printf("Ascend310P not support f32->8bit quantization.\n"); + } +#else #define BUFFER_NUM 2 #define QK8_0 32 @@ -204,3 +212,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0( op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); op.calculate(); } + +#endif // #ifdef ASCEND_310P diff --git a/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp b/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp index 9c8c86b6..1188937b 100644 --- a/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +++ b/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp @@ -1,6 +1,21 @@ #include "kernel_operator.h" using namespace AscendC; +#ifdef ASCEND_310P // 310P not support float->4bit quantization + extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0( + GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, + GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { + // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. + printf("Ascend310P not support f32->4bit quantization.\n"); + } + + extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0( + GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm, + GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) { + // let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed. + printf("Ascend310P not support f16->4bit quantization.\n"); + } +#else #define BUFFER_NUM 2 #define Group_Size 32 @@ -276,3 +291,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0( op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); op.calculate(); } + +#endif // #ifdef ASCEND_310P