mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-27 14:29:43 +00:00
CANN: Refactor to reduce duplicate code (llama/12731)
* CANN: Refactor to reduce duplicate code * CANN: fix review comment
This commit is contained in:
parent
1901505138
commit
b8d3e45342
File diff suppressed because it is too large
Load Diff
@ -31,20 +31,25 @@
|
|||||||
* IN THE SOFTWARE.
|
* IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <aclnnop/aclnn_add.h>
|
#include <aclnnop/aclnn_abs.h>
|
||||||
|
#include <aclnnop/aclnn_neg.h>
|
||||||
|
#include <aclnnop/aclnn_exp.h>
|
||||||
#include <aclnnop/aclnn_arange.h>
|
#include <aclnnop/aclnn_arange.h>
|
||||||
#include <aclnnop/aclnn_argsort.h>
|
#include <aclnnop/aclnn_argsort.h>
|
||||||
#include <aclnnop/aclnn_cat.h>
|
#include <aclnnop/aclnn_cat.h>
|
||||||
#include <aclnnop/aclnn_clamp.h>
|
#include <aclnnop/aclnn_clamp.h>
|
||||||
#include <aclnnop/aclnn_div.h>
|
|
||||||
#include <aclnnop/aclnn_gelu.h>
|
#include <aclnnop/aclnn_gelu.h>
|
||||||
|
#include <aclnnop/aclnn_gelu_v2.h>
|
||||||
|
#include <aclnnop/aclnn_sigmoid.h>
|
||||||
#include <aclnnop/aclnn_hardsigmoid.h>
|
#include <aclnnop/aclnn_hardsigmoid.h>
|
||||||
#include <aclnnop/aclnn_hardswish.h>
|
#include <aclnnop/aclnn_hardswish.h>
|
||||||
#include <aclnnop/aclnn_leaky_relu.h>
|
#include <aclnnop/aclnn_leaky_relu.h>
|
||||||
#include <aclnnop/aclnn_mul.h>
|
|
||||||
#include <aclnnop/aclnn_relu.h>
|
#include <aclnnop/aclnn_relu.h>
|
||||||
#include <aclnnop/aclnn_silu.h>
|
#include <aclnnop/aclnn_silu.h>
|
||||||
#include <aclnnop/aclnn_tanh.h>
|
#include <aclnnop/aclnn_tanh.h>
|
||||||
|
#include <aclnnop/aclnn_sqrt.h>
|
||||||
|
#include <aclnnop/aclnn_sin.h>
|
||||||
|
#include <aclnnop/aclnn_cos.h>
|
||||||
#include "acl_tensor.h"
|
#include "acl_tensor.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
@ -63,23 +68,6 @@
|
|||||||
*/
|
*/
|
||||||
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Adds two ggml tensors using the CANN backend.
|
|
||||||
*
|
|
||||||
* @details This function performs an element-wise addition of two tensors. In
|
|
||||||
* case the tensors do not have the same shape, one or both tensors
|
|
||||||
* will be broadcasted to match the shape of the other before the
|
|
||||||
* addition is performed.The formula for the operation is given by:
|
|
||||||
* \f[
|
|
||||||
* \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1}
|
|
||||||
* \f]
|
|
||||||
*
|
|
||||||
* @param ctx The CANN context used for operations.
|
|
||||||
* @param dst The ggml tensor representing the destination, result of the
|
|
||||||
* addition is stored at dst->data, and dst->op is `GGML_OP_ADD`
|
|
||||||
*/
|
|
||||||
void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN
|
||||||
* backend.
|
* backend.
|
||||||
@ -131,19 +119,6 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||||||
*/
|
*/
|
||||||
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Computes the square of the elements of a ggml tensor using the CANN
|
|
||||||
* backend.
|
|
||||||
* @details The function sets the second source tensor of the destination
|
|
||||||
* tensor `dst` to be equal to the first source tensor. This is
|
|
||||||
* effectively squaring the elements since the multiplication becomes
|
|
||||||
* `element * element`.
|
|
||||||
* @param ctx The CANN context used for operations.
|
|
||||||
* @param dst The destination tensor where the squared values will be stored,
|
|
||||||
* which dst->op is `GGML_OP_SQR`.
|
|
||||||
*/
|
|
||||||
void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
* @brief Applies a clamp operation to the elements of a ggml tensor using the
|
||||||
* CANN backend.
|
* CANN backend.
|
||||||
@ -275,6 +250,20 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||||||
*/
|
*/
|
||||||
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Computes the sum of elements in a ggml tensor.
|
||||||
|
*
|
||||||
|
* @details This function performs a reduction sum operation along the last
|
||||||
|
* dimension of the input tensor `src`. The result of the sum is stored
|
||||||
|
* in the destination tensor `dst`.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the reduced values will be stored。
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using
|
||||||
* the CANN backend.
|
* the CANN backend.
|
||||||
@ -500,128 +489,247 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|||||||
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Computes the cosine of each element in a ggml tensor using the CANN backend.
|
* @brief Adds two tensors element-wise and stores the result in a destination
|
||||||
|
* tensor.
|
||||||
*
|
*
|
||||||
* @details This function applies the cosine function element-wise to the input tensor.
|
* This function performs the operation:
|
||||||
* The computed cosine values are stored in the destination tensor `dst`.
|
* \f[
|
||||||
* The operation is optimized using the CANN backend for improved performance.
|
* dst = acl\_src0 + alpha \times acl\_src1
|
||||||
|
* \f]
|
||||||
|
* where alpha is a scalar value and defaults to 1.0f.
|
||||||
*
|
*
|
||||||
* @param ctx The CANN context used for operations.
|
* @param ctx The context for the CANN backend operations.
|
||||||
* @param dst The destination tensor where the cosine values will be stored.
|
* @param acl_src0 The first source tensor.
|
||||||
* dst->op is `GGML_OP_COS`.
|
* @param acl_src1 The second source tensor.
|
||||||
|
* @param acl_dst The destination tensor where the result will be stored.
|
||||||
*/
|
*/
|
||||||
void ggml_cann_cos(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
||||||
|
aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Computes the sine of each element in a ggml tensor using the CANN backend.
|
* @brief Sub two tensors element-wise and stores the result in a destination
|
||||||
|
* tensor.
|
||||||
*
|
*
|
||||||
* @details This function applies the sine function element-wise to the input tensor.
|
* This function performs the operation:
|
||||||
* The computed sine values are stored in the destination tensor `dst`.
|
* \f[
|
||||||
* The operation is optimized using the CANN backend for improved performance.
|
* dst = acl\_src0 - alpha \times acl\_src1
|
||||||
|
* \f]
|
||||||
|
* where alpha is a scalar value and defaults to 1.0f.
|
||||||
*
|
*
|
||||||
* @param ctx The CANN context used for operations.
|
* @param ctx The context for the CANN backend operations.
|
||||||
* @param dst The destination tensor where the sine values will be stored.
|
* @param acl_src0 The first source tensor.
|
||||||
* dst->op is `GGML_OP_SIN`.
|
* @param acl_src1 The second source tensor.
|
||||||
|
* @param acl_dst The destination tensor where the result will be stored.
|
||||||
*/
|
*/
|
||||||
void ggml_cann_sin(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
|
||||||
|
aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
|
||||||
|
|
||||||
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
/**
|
||||||
aclTensor*, uint64_t*, aclOpExecutor**),
|
* @brief Performs element-wise multiplication of two tensors and stores the
|
||||||
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
|
* result in a destination tensor.
|
||||||
void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
*
|
||||||
|
* This function performs element-wise multiplication of the tensors `acl_src`
|
||||||
|
* and `acl_other` and stores the result in the destination tensor `acl_dst`.
|
||||||
|
* The operation is defined as:
|
||||||
|
* \f[
|
||||||
|
* \text {acl_dst }_i=\text {acl_src }_i \times \text {acl_other }_i
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* @param ctx The context for the CANN backend operations.
|
||||||
|
* @param acl_src The first tensor for element-wise multiplication.
|
||||||
|
* @param acl_other The second tensor for element-wise multiplication.
|
||||||
|
* @param acl_dst The destination tensor where the result will be stored.
|
||||||
|
*/
|
||||||
|
void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
|
aclTensor* acl_other, aclTensor* acl_dst = nullptr);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Matrix division, optionally in-place.
|
||||||
|
*
|
||||||
|
* This function division each element of the source tensor `acl_src` by the
|
||||||
|
* tensor `acl_other` and stores the result in the destination tensor `acl_dst`.
|
||||||
|
* If `inplace` is true, `acl_dst` will not be used and the operation is
|
||||||
|
* performed in-place on `acl_src`. The operation is defined as: \f[
|
||||||
|
* \text{dst}_i = \frac{\text{acl_src}_i}{\text{acl_other}_i}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* @param ctx The context for the CANN backend operations.
|
||||||
|
* @param acl_src Numerator tensor..
|
||||||
|
* @param acl_other Denominator tensor.
|
||||||
|
* @param acl_dst The destination tensor where the result will be stored if
|
||||||
|
* `inplace` is false.
|
||||||
|
* @param inplace Flag indicating whether to perform the operation in-place on
|
||||||
|
* `acl_src`.
|
||||||
|
*/
|
||||||
|
void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
|
aclTensor* acl_other, aclTensor* acl_dst = nullptr);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies element-wise cosine function to the elements of a tensor.
|
||||||
|
*
|
||||||
|
* This function computes the cosine of each element in the source tensor
|
||||||
|
* `acl_src` and stores the result in the destination tensor `acl_dst`. The
|
||||||
|
* operation is defined as: \f[ \text {acl_dst }_i=\cos \left(\text {acl_src
|
||||||
|
* }_i\right) \f]
|
||||||
|
*
|
||||||
|
* @param ctx The context for the CANN backend operations.
|
||||||
|
* @param acl_src The source tensor on which the cosine function will be
|
||||||
|
* applied.
|
||||||
|
* @param acl_dst The destination tensor where the cosine results will be
|
||||||
|
* stored.
|
||||||
|
*/
|
||||||
|
void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
|
aclTensor* acl_dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies element-wise sine function to the elements of a tensor.
|
||||||
|
*
|
||||||
|
* This function computes the sine of each element in the source tensor
|
||||||
|
`acl_src`
|
||||||
|
* and stores the result in the destination tensor `acl_dst`.
|
||||||
|
* The operation is defined as:
|
||||||
|
* \f[
|
||||||
|
* \text {acl_dst }_i=\sin \left(\text {acl_src }_i\right)
|
||||||
|
* \f]
|
||||||
|
|
||||||
|
* @param ctx The context for the CANN backend operations.
|
||||||
|
* @param acl_src The source tensor on which the sine function will be applied.
|
||||||
|
* @param acl_dst The destination tensor where the sine results will be stored.
|
||||||
|
*/
|
||||||
|
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
||||||
|
aclTensor* acl_dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Launches an asynchronous task using the memory allocator.
|
||||||
|
*
|
||||||
|
* This macro submit an asynchronous task on the specified stream.
|
||||||
|
* The task uses memory allocated by the allocator. It is guaranteed
|
||||||
|
* that the memory will not be accessed by other tasks until this task
|
||||||
|
* completes, due to the sequential execution order within the same stream.
|
||||||
|
*
|
||||||
|
* @param OP_NAME aclnn operator name.
|
||||||
|
* @param args Additional arguments required by the task.
|
||||||
|
*
|
||||||
|
* @note
|
||||||
|
* Memory from the allocator will be "freed" immediately and can be
|
||||||
|
* reallocated to other pointers. However, it won't be accessed by any
|
||||||
|
* other task before this asynchronous task ends, because all tasks in the
|
||||||
|
* same stream are executed in queue order.
|
||||||
|
*/
|
||||||
|
#define GGML_CANN_CALL_ACLNN_OP(OP_NAME, ...) \
|
||||||
|
do { \
|
||||||
|
uint64_t workspaceSize = 0; \
|
||||||
|
aclOpExecutor * executor; \
|
||||||
|
void * workspaceAddr = nullptr; \
|
||||||
|
\
|
||||||
|
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
|
||||||
|
\
|
||||||
|
if (workspaceSize > 0) { \
|
||||||
|
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); \
|
||||||
|
workspaceAddr = workspace_allocator.get(); \
|
||||||
|
} \
|
||||||
|
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
|
||||||
|
*
|
||||||
|
* This function checks whether broadcasting is needed between `src0` and `src1`.
|
||||||
|
* If broadcasting is required, it calculates the proper shapes and creates
|
||||||
|
* ACL tensors with broadcast parameters. Otherwise, it directly creates ACL tensors
|
||||||
|
* based on the original tensor shapes.
|
||||||
|
*
|
||||||
|
* @param src0 The first input tensor (reference shape).
|
||||||
|
* @param src1 The second input tensor (possibly broadcasted).
|
||||||
|
* @param dst The destination/output tensor.
|
||||||
|
* @param acl_src0 Output pointer to the created ACL tensor corresponding to src0.
|
||||||
|
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
||||||
|
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
||||||
|
*/
|
||||||
|
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
|
||||||
|
aclTensor ** acl_src1, aclTensor ** acl_dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies a element-wise operation to two input tensors using the CANN backend.
|
||||||
|
*
|
||||||
|
* This templated function takes a binary operator and applies it to two source tensors
|
||||||
|
* associated with the destination tensor. The function handles broadcasting as needed.
|
||||||
|
*
|
||||||
|
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
|
||||||
|
* the binary operation to be performed. It must take three arguments:
|
||||||
|
* (ggml_backend_cann_context&, aclTensor*, aclTensor*, aclTensor*).
|
||||||
|
*
|
||||||
|
* @param ctx The CANN backend context used to manage execution and resources.
|
||||||
|
* @param dst The destination tensor.
|
||||||
|
*/
|
||||||
|
template <auto binary_op>
|
||||||
|
void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src0 = dst->src[0];
|
ggml_tensor* src0 = dst->src[0];
|
||||||
ggml_tensor* src1 = dst->src[1];
|
ggml_tensor* src1 = dst->src[1];
|
||||||
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
|
||||||
|
|
||||||
aclTensor* acl_src0;
|
aclTensor* acl_src0;
|
||||||
aclTensor* acl_src1;
|
aclTensor* acl_src1;
|
||||||
aclTensor* acl_dst;
|
aclTensor* acl_dst;
|
||||||
|
|
||||||
// Need bcast
|
// Need bcast
|
||||||
if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
|
bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
|
||||||
BCAST_SHAPE(src0, src1)
|
binary_op(ctx, acl_src0, acl_src1, acl_dst);
|
||||||
acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
|
|
||||||
acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
|
|
||||||
acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
|
|
||||||
} else {
|
|
||||||
acl_src0 = ggml_cann_create_tensor(src0);
|
|
||||||
acl_src1 = ggml_cann_create_tensor(src1);
|
|
||||||
acl_dst = ggml_cann_create_tensor(dst);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
|
||||||
aclOpExecutor* executor;
|
|
||||||
void* workspaceAddr = nullptr;
|
|
||||||
|
|
||||||
ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize,
|
|
||||||
&executor));
|
|
||||||
if (workspaceSize > 0) {
|
|
||||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
||||||
workspaceAddr = workspace_allocator.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
aclrtStream main_stream = ctx.stream();
|
|
||||||
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src0));
|
ACL_CHECK(aclDestroyTensor(acl_src0));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src1));
|
ACL_CHECK(aclDestroyTensor(acl_src1));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Activation functions template.
|
/**
|
||||||
template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
|
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
||||||
aclOpExecutor**),
|
*
|
||||||
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
* This templated function applies a unary operator to the source tensor of `dst`
|
||||||
const aclrtStream)>
|
* and stores the result in the destination tensor.
|
||||||
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
*
|
||||||
|
* @tparam unary_op A callable with the signature:
|
||||||
|
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
||||||
|
* where the first aclTensor is the source and the second is the destination.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN backend context for managing resources and execution.
|
||||||
|
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
||||||
|
*/
|
||||||
|
template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
||||||
|
void ggml_cann_unary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
ggml_tensor* src = dst->src[0];
|
ggml_tensor* src = dst->src[0];
|
||||||
|
|
||||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
||||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
uint64_t workspaceSize = 0;
|
unary_op(ctx, acl_src, acl_dst);
|
||||||
aclOpExecutor* executor;
|
|
||||||
void* workspaceAddr = nullptr;
|
|
||||||
|
|
||||||
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
|
||||||
if (workspaceSize > 0) {
|
|
||||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
|
||||||
workspaceAddr = workspace_allocator.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
aclrtStream main_stream = ctx.stream();
|
|
||||||
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Activation functions template for const aclTensors.
|
/**
|
||||||
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
|
||||||
uint64_t*, aclOpExecutor**),
|
*
|
||||||
aclnnStatus execute(void*, uint64_t, aclOpExecutor*,
|
* This macro defines an inline lambda wrapping a specific ACL operation name,
|
||||||
const aclrtStream)>
|
* and passes it to the templated ggml_cann_unary_op function. It simplifies
|
||||||
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
* calling unary ops by hiding the lambda boilerplate.
|
||||||
ggml_tensor* src = dst->src[0];
|
*
|
||||||
|
* Internally, the lambda will call:
|
||||||
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
* @code
|
||||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
* GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst);
|
||||||
|
* @endcode
|
||||||
uint64_t workspaceSize = 0;
|
*
|
||||||
aclOpExecutor* executor;
|
* @param OP_NAME The name of the ACL unary operator to invoke via GGML_CANN_CALL_ACLNN_OP.
|
||||||
void* workspaceAddr = nullptr;
|
*
|
||||||
|
* @see ggml_cann_unary_op
|
||||||
ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor));
|
* @see GGML_CANN_CALL_ACLNN_OP
|
||||||
if (workspaceSize > 0) {
|
*/
|
||||||
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
|
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
|
||||||
workspaceAddr = workspace_allocator.get();
|
do { \
|
||||||
}
|
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \
|
||||||
|
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
|
||||||
aclrtStream main_stream = ctx.stream();
|
}; \
|
||||||
ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream));
|
ggml_cann_unary_op<lambda>(ctx, dst); \
|
||||||
|
} \
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
while (0)
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // CANN_ACLNN_OPS
|
#endif // CANN_ACLNN_OPS
|
||||||
|
@ -1300,47 +1300,59 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
ggml_cann_dup(ctx, dst);
|
ggml_cann_dup(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_ADD:
|
case GGML_OP_ADD:
|
||||||
ggml_cann_add(ctx, dst);
|
case GGML_OP_ADD1:
|
||||||
|
ggml_cann_binary_op<aclnn_add>(ctx, dst);
|
||||||
|
break;
|
||||||
|
case GGML_OP_SUB:
|
||||||
|
ggml_cann_binary_op<aclnn_sub>(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_ACC:
|
case GGML_OP_ACC:
|
||||||
ggml_cann_acc(ctx, dst);
|
ggml_cann_acc(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_MUL:
|
case GGML_OP_MUL:
|
||||||
ggml_cann_mul_div<aclnnMulGetWorkspaceSize, aclnnMul>(ctx, dst);
|
ggml_cann_binary_op<aclnn_mul>(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_DIV:
|
case GGML_OP_DIV:
|
||||||
ggml_cann_mul_div<aclnnDivGetWorkspaceSize, aclnnDiv>(ctx, dst);
|
ggml_cann_binary_op<aclnn_div>(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
switch (ggml_get_unary_op(dst)) {
|
switch (ggml_get_unary_op(dst)) {
|
||||||
|
case GGML_UNARY_OP_ABS:
|
||||||
|
GGML_CANN_CALL_UNARY_OP(Abs);
|
||||||
|
break;
|
||||||
|
case GGML_UNARY_OP_NEG:
|
||||||
|
GGML_CANN_CALL_UNARY_OP(Neg);
|
||||||
|
break;
|
||||||
case GGML_UNARY_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
ggml_cann_activation<aclnnGeluGetWorkspaceSize, aclnnGelu>(
|
GGML_CANN_CALL_UNARY_OP(Gelu);
|
||||||
ctx, dst);
|
|
||||||
break;
|
break;
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
ggml_cann_activation<aclnnSiluGetWorkspaceSize, aclnnSilu>(
|
GGML_CANN_CALL_UNARY_OP(Silu);
|
||||||
ctx, dst);
|
|
||||||
break;
|
break;
|
||||||
// TODO: Use faster gelu??
|
case GGML_UNARY_OP_GELU_QUICK: {
|
||||||
case GGML_UNARY_OP_GELU_QUICK:
|
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) {
|
||||||
ggml_cann_activation<aclnnGeluGetWorkspaceSize, aclnnGelu>(
|
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
|
||||||
ctx, dst);
|
};
|
||||||
|
ggml_cann_unary_op<lambda>(ctx, dst);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
ggml_cann_activation<aclnnTanhGetWorkspaceSize, aclnnTanh>(
|
GGML_CANN_CALL_UNARY_OP(Tanh);
|
||||||
ctx, dst);
|
|
||||||
break;
|
break;
|
||||||
case GGML_UNARY_OP_RELU:
|
case GGML_UNARY_OP_RELU:
|
||||||
ggml_cann_activation<aclnnReluGetWorkspaceSize, aclnnRelu>(
|
GGML_CANN_CALL_UNARY_OP(Relu);
|
||||||
ctx, dst);
|
break;
|
||||||
|
case GGML_UNARY_OP_SIGMOID:
|
||||||
|
GGML_CANN_CALL_UNARY_OP(Sigmoid);
|
||||||
break;
|
break;
|
||||||
case GGML_UNARY_OP_HARDSIGMOID:
|
case GGML_UNARY_OP_HARDSIGMOID:
|
||||||
ggml_cann_activation<aclnnHardsigmoidGetWorkspaceSize,
|
GGML_CANN_CALL_UNARY_OP(Hardsigmoid);
|
||||||
aclnnHardsigmoid>(ctx, dst);
|
|
||||||
break;
|
break;
|
||||||
case GGML_UNARY_OP_HARDSWISH:
|
case GGML_UNARY_OP_HARDSWISH:
|
||||||
ggml_cann_activation<aclnnHardswishGetWorkspaceSize,
|
GGML_CANN_CALL_UNARY_OP(Hardswish);
|
||||||
aclnnHardswish>(ctx, dst);
|
break;
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
|
GGML_CANN_CALL_UNARY_OP(Exp);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@ -1382,7 +1394,12 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
ggml_cann_scale(ctx, dst);
|
ggml_cann_scale(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_SQR:
|
case GGML_OP_SQR:
|
||||||
ggml_cann_sqr(ctx, dst);
|
GGML_ASSERT(dst->src[1] == nullptr);
|
||||||
|
dst->src[1] = dst->src[0];
|
||||||
|
ggml_cann_binary_op<aclnn_mul>(ctx, dst);
|
||||||
|
break;
|
||||||
|
case GGML_OP_SQRT:
|
||||||
|
GGML_CANN_CALL_UNARY_OP(Sqrt);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_CLAMP:
|
case GGML_OP_CLAMP:
|
||||||
ggml_cann_clamp(ctx, dst);
|
ggml_cann_clamp(ctx, dst);
|
||||||
@ -1414,6 +1431,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
case GGML_OP_POOL_2D:
|
case GGML_OP_POOL_2D:
|
||||||
ggml_cann_pool2d(ctx, dst);
|
ggml_cann_pool2d(ctx, dst);
|
||||||
break;
|
break;
|
||||||
|
case GGML_OP_SUM:
|
||||||
|
ggml_cann_sum(ctx, dst);
|
||||||
|
break;
|
||||||
case GGML_OP_SUM_ROWS:
|
case GGML_OP_SUM_ROWS:
|
||||||
ggml_cann_sum_rows(ctx, dst);
|
ggml_cann_sum_rows(ctx, dst);
|
||||||
break;
|
break;
|
||||||
@ -1424,11 +1444,11 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
ggml_cann_argmax(ctx, dst);
|
ggml_cann_argmax(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_COS:
|
case GGML_OP_COS:
|
||||||
ggml_cann_cos(ctx, dst);
|
ggml_cann_unary_op<aclnn_cos>(ctx, dst);
|
||||||
break;
|
break;
|
||||||
case GGML_OP_SIN:
|
case GGML_OP_SIN:
|
||||||
ggml_cann_sin(ctx, dst);
|
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1679,13 +1699,17 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
switch (op->op) {
|
switch (op->op) {
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
switch (ggml_get_unary_op(op)) {
|
switch (ggml_get_unary_op(op)) {
|
||||||
|
case GGML_UNARY_OP_ABS:
|
||||||
|
case GGML_UNARY_OP_NEG:
|
||||||
case GGML_UNARY_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
case GGML_UNARY_OP_RELU:
|
case GGML_UNARY_OP_RELU:
|
||||||
|
case GGML_UNARY_OP_SIGMOID:
|
||||||
case GGML_UNARY_OP_HARDSIGMOID:
|
case GGML_UNARY_OP_HARDSIGMOID:
|
||||||
case GGML_UNARY_OP_HARDSWISH:
|
case GGML_UNARY_OP_HARDSWISH:
|
||||||
case GGML_UNARY_OP_GELU_QUICK:
|
case GGML_UNARY_OP_GELU_QUICK:
|
||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@ -1784,6 +1808,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
// value of paddingW should be at most half of kernelW
|
// value of paddingW should be at most half of kernelW
|
||||||
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
|
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
|
||||||
}
|
}
|
||||||
|
case GGML_OP_SUM:
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
case GGML_OP_IM2COL:
|
case GGML_OP_IM2COL:
|
||||||
case GGML_OP_CONCAT:
|
case GGML_OP_CONCAT:
|
||||||
@ -1795,11 +1820,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
case GGML_OP_TRANSPOSE:
|
case GGML_OP_TRANSPOSE:
|
||||||
case GGML_OP_NORM:
|
case GGML_OP_NORM:
|
||||||
case GGML_OP_ADD:
|
case GGML_OP_ADD:
|
||||||
|
case GGML_OP_ADD1:
|
||||||
|
case GGML_OP_SUB:
|
||||||
case GGML_OP_MUL:
|
case GGML_OP_MUL:
|
||||||
case GGML_OP_DIV:
|
case GGML_OP_DIV:
|
||||||
case GGML_OP_RMS_NORM:
|
case GGML_OP_RMS_NORM:
|
||||||
case GGML_OP_SCALE:
|
case GGML_OP_SCALE:
|
||||||
case GGML_OP_SQR:
|
case GGML_OP_SQR:
|
||||||
|
case GGML_OP_SQRT:
|
||||||
case GGML_OP_CLAMP:
|
case GGML_OP_CLAMP:
|
||||||
case GGML_OP_DIAG_MASK_INF:
|
case GGML_OP_DIAG_MASK_INF:
|
||||||
case GGML_OP_SOFT_MAX:
|
case GGML_OP_SOFT_MAX:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user