ggml : Depthwise 2D convolution (ggml/1152)

* ggml-cpu : kernels for faster depthwise 2D convolution

* fix compile: remove static after moving to ops.cpp

* add dilation for depthwise_conv_2d

* review: rename to ggml_conv_2d_dw_direct, remove redundant struct keywords, pass by ref, whitespace

* review: rename depthwise_conv_2d -> conv_2d_dw everywhere
This commit is contained in:
Acly
2025-04-17 14:16:45 +02:00
committed by Georgi Gerganov
parent 915c14ef10
commit d87dfcf7c0
5 changed files with 250 additions and 3 deletions

View File

@ -481,6 +481,7 @@ extern "C" {
GGML_OP_CONV_TRANSPOSE_1D,
GGML_OP_IM2COL,
GGML_OP_IM2COL_BACK,
GGML_OP_CONV_2D_DW,
GGML_OP_CONV_TRANSPOSE_2D,
GGML_OP_POOL_1D,
GGML_OP_POOL_2D,
@ -677,6 +678,9 @@ extern "C" {
GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
@ -1660,7 +1664,7 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
// depthwise
// depthwise (via im2col and mul_mat)
GGML_API struct ggml_tensor * ggml_conv_2d_dw(
struct ggml_context * ctx,
struct ggml_tensor * a, // convolution kernel
@ -1672,6 +1676,22 @@ extern "C" {
int d0, // dilation dimension 0
int d1); // dilation dimension 1
// Depthwise 2D convolution
// may be faster than ggml_conv_2d_dw, but not available in all backends
// a: KW KH 1 C convolution kernel
// b: W H C N input data
// res: W_out H_out C N
GGML_API struct ggml_tensor * ggml_conv_2d_dw_direct(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
int stride0,
int stride1,
int pad0,
int pad1,
int dilation0,
int dilation1);
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
struct ggml_context * ctx,
struct ggml_tensor * a,