mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-19 04:37:51 +00:00
Add Ascend NPU backend (llama/6035)
* [CANN] Add Ascend NPU backend Ascend is a full-stack AI computing infrastructure for industry applications and services based on Huawei Ascend processors and software. CANN (Compute Architecture of Neural Networks), developped by Huawei, is a heterogeneous computing architecture for AI. Co-authored-by: wangshuai09 <391746016@qq.com> * delete trailing whitespaces * Modify the code based on review comment * Rename LLAMA_CANN to GGML_CANN * Make ggml-common.h private * add ggml_cann prefix for acl funcs * Add logging for CANN backend * Delete Trailing whitespace --------- Co-authored-by: wangshuai09 <391746016@qq.com>
This commit is contained in:
parent
fcba6aa352
commit
8923bb4292
@ -753,6 +753,8 @@ extern "C" {
|
||||
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||
|
||||
GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||
|
||||
// use this to compute the memory overhead of a tensor
|
||||
GGML_API size_t ggml_tensor_overhead(void);
|
||||
|
||||
@ -2397,6 +2399,7 @@ extern "C" {
|
||||
GGML_API int ggml_cpu_has_rpc (void);
|
||||
GGML_API int ggml_cpu_has_vsx (void);
|
||||
GGML_API int ggml_cpu_has_matmul_int8(void);
|
||||
GGML_API int ggml_cpu_has_cann (void);
|
||||
|
||||
//
|
||||
// Internal types and functions exposed for tests and benchmarks
|
||||
|
@ -770,6 +770,74 @@ if (GGML_CPU_HBM)
|
||||
target_link_libraries(ggml PUBLIC memkind)
|
||||
endif()
|
||||
|
||||
if (GGML_CANN)
|
||||
if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
|
||||
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
|
||||
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
|
||||
endif()
|
||||
|
||||
if (CANN_INSTALL_DIR)
|
||||
# Only Support Linux.
|
||||
if (GGML_CANN)
|
||||
if (NOT UNIX)
|
||||
set(GGML_CANN OFF)
|
||||
message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off GGML_CANN")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Supported platforms: x86-64, arm64
|
||||
if (GGML_CANN)
|
||||
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
|
||||
else()
|
||||
set(GGML_CANN OFF)
|
||||
message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off GGML_CANN")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Set header and libs
|
||||
if(GGML_CANN)
|
||||
set(CANN_INCLUDE_DIRS
|
||||
${CANN_INSTALL_DIR}/include
|
||||
${CANN_INSTALL_DIR}/include/aclnn
|
||||
${CANN_INSTALL_DIR}/acllib/include
|
||||
)
|
||||
|
||||
# TODO: find libs
|
||||
link_directories(
|
||||
${CANN_INSTALL_DIR}/lib64
|
||||
)
|
||||
|
||||
add_subdirectory(ggml-cann/kernels)
|
||||
list(APPEND CANN_LIBRARIES
|
||||
ascendcl
|
||||
nnopbase
|
||||
opapi
|
||||
acl_op_compiler
|
||||
ascendc_kernels
|
||||
)
|
||||
|
||||
set(GGML_HEADERS_CANN "../include/ggml-cann.h")
|
||||
file(GLOB GGML_SOURCES_CANN "ggml-cann/*.cpp")
|
||||
list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
|
||||
|
||||
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
|
||||
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
|
||||
|
||||
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} )
|
||||
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
|
||||
list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN)
|
||||
endif()
|
||||
else()
|
||||
set(GGML_CANN OFF)
|
||||
message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN")
|
||||
endif()
|
||||
|
||||
if(NOT GGML_CANN)
|
||||
message(WARNING "CANN: GGML_CANN is turned OFF, see above for details.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
function(get_flags CCID CCVER)
|
||||
set(C_FLAGS "")
|
||||
set(CXX_FLAGS "")
|
||||
@ -1184,6 +1252,7 @@ add_library(ggml
|
||||
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
||||
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
|
||||
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
|
||||
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
|
||||
ggml-aarch64.c ggml-aarch64.h
|
||||
)
|
||||
|
||||
|
@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
|
||||
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
|
||||
ggml_backend_kompute_reg_devices();
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CANN
|
||||
extern GGML_CALL int ggml_backend_cann_reg_devices(void);
|
||||
ggml_backend_cann_reg_devices();
|
||||
#endif
|
||||
}
|
||||
|
||||
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
||||
|
@ -3341,7 +3341,7 @@ bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tenso
|
||||
}
|
||||
|
||||
// check if t1 can be represented as a repeatition of t0
|
||||
static inline bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return ggml_is_empty(t0) ? ggml_is_empty(t1) :
|
||||
@ -13699,6 +13699,7 @@ static void ggml_compute_forward_soft_max(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ggml_compute_forward_soft_max_back
|
||||
|
||||
static void ggml_compute_forward_soft_max_back_f32(
|
||||
@ -21994,6 +21995,14 @@ int ggml_cpu_has_rpc(void) {
|
||||
#endif
|
||||
}
|
||||
|
||||
int ggml_cpu_has_cann(void) {
|
||||
#if defined(GGML_USE_CANN)
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int ggml_cpu_has_gpublas(void) {
|
||||
return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user