mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-29 15:30:03 +00:00
CANN: Add 310P operator support check (llama/12962)
This commit is contained in:
parent
b8755670ca
commit
be42a19eab
@ -625,6 +625,10 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context& ctx,
|
|||||||
bool count_include_pad = true;
|
bool count_include_pad = true;
|
||||||
int64_t divisor_override = 0;
|
int64_t divisor_override = 0;
|
||||||
int8_t cube_math_type = 0;
|
int8_t cube_math_type = 0;
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
cube_math_type = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
GGML_CANN_CALL_ACLNN_OP(AvgPool2d, acl_src, kernel_size, strides, paddings_avg,
|
GGML_CANN_CALL_ACLNN_OP(AvgPool2d, acl_src, kernel_size, strides, paddings_avg,
|
||||||
ceil_mode, count_include_pad, divisor_override,
|
ceil_mode, count_include_pad, divisor_override,
|
||||||
cube_math_type, acl_dst);
|
cube_math_type, acl_dst);
|
||||||
@ -2590,6 +2594,10 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
|
|||||||
int64_t groups = 1;
|
int64_t groups = 1;
|
||||||
int8_t cubeMathType = 0;
|
int8_t cubeMathType = 0;
|
||||||
|
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
cubeMathType = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
|
GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
|
||||||
padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
|
padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
|
||||||
|
|
||||||
|
@ -2022,6 +2022,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
return true;
|
return true;
|
||||||
case GGML_TYPE_Q8_0:
|
case GGML_TYPE_Q8_0:
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
// Q4 && Q8 per group is not suppor on 310p device
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
// only support contiguous for quantized types.
|
// only support contiguous for quantized types.
|
||||||
return ggml_is_contiguous(op->src[0]) &&
|
return ggml_is_contiguous(op->src[0]) &&
|
||||||
ggml_is_contiguous(op->src[1]);
|
ggml_is_contiguous(op->src[1]);
|
||||||
@ -2107,6 +2111,12 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
}
|
}
|
||||||
case GGML_OP_POOL_2D: {
|
case GGML_OP_POOL_2D: {
|
||||||
const int32_t * opts = (const int32_t *) op->op_params;
|
const int32_t * opts = (const int32_t *) op->op_params;
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
enum ggml_op_pool opt = static_cast<ggml_op_pool>(opts[0]);
|
||||||
|
if(opt == GGML_OP_POOL_MAX){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
const int k0 = opts[1];
|
const int k0 = opts[1];
|
||||||
const int k1 = opts[2];
|
const int k1 = opts[2];
|
||||||
const int p0 = opts[5];
|
const int p0 = opts[5];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user