openwrt/target/linux/qualcommbe/patches-6.6/103-41-net-ethernet-qualcomm-Add-Tx-Ethernet-DMA-support.patch
Christian Marangi 93173aee96
qualcommbe: ipq95xx: Add initial support for new target
Add initial support for new target with the initial patch for ethernet
support using pending upstream patches for PCS UNIPHY, PPE and EDMA.

Only initramfs currently working as support for new SPI/NAND
implementation, USB, CPUFreq and other devices is still unfinished and
needs to be evaluated.

Link: https://github.com/openwrt/openwrt/pull/17725
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
2025-01-25 21:24:06 +01:00

2391 lines
74 KiB
Diff

From 1c2736afc17435d3bca18a84f9ed2620a5b03830 Mon Sep 17 00:00:00 2001
From: Suruchi Agarwal <quic_suruchia@quicinc.com>
Date: Thu, 21 Mar 2024 16:26:29 -0700
Subject: [PATCH 41/50] net: ethernet: qualcomm: Add Tx Ethernet DMA support
Add Tx queues, rings, descriptors configurations and
DMA support for the EDMA.
Change-Id: Idfb0e1fe5ac494d614097d6c97dd15d63bbce8e6
Co-developed-by: Pavithra R <quic_pavir@quicinc.com>
Signed-off-by: Pavithra R <quic_pavir@quicinc.com>
Signed-off-by: Suruchi Agarwal <quic_suruchia@quicinc.com>
---
drivers/net/ethernet/qualcomm/ppe/Makefile | 2 +-
drivers/net/ethernet/qualcomm/ppe/edma.c | 97 ++-
drivers/net/ethernet/qualcomm/ppe/edma.h | 7 +
.../net/ethernet/qualcomm/ppe/edma_cfg_tx.c | 648 ++++++++++++++
.../net/ethernet/qualcomm/ppe/edma_cfg_tx.h | 28 +
drivers/net/ethernet/qualcomm/ppe/edma_port.c | 136 +++
drivers/net/ethernet/qualcomm/ppe/edma_port.h | 35 +
drivers/net/ethernet/qualcomm/ppe/edma_tx.c | 808 ++++++++++++++++++
drivers/net/ethernet/qualcomm/ppe/edma_tx.h | 302 +++++++
9 files changed, 2055 insertions(+), 8 deletions(-)
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.c
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.h
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_tx.c
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_tx.h
diff --git a/drivers/net/ethernet/qualcomm/ppe/Makefile b/drivers/net/ethernet/qualcomm/ppe/Makefile
index 3fd607ce42de..b358bfd781fb 100644
--- a/drivers/net/ethernet/qualcomm/ppe/Makefile
+++ b/drivers/net/ethernet/qualcomm/ppe/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_QCOM_PPE) += qcom-ppe.o
qcom-ppe-objs := ppe.o ppe_config.o ppe_api.o ppe_debugfs.o ppe_port.o
#EDMA
-qcom-ppe-objs += edma.o edma_cfg_rx.o edma_port.o edma_rx.o
\ No newline at end of file
+qcom-ppe-objs += edma.o edma_cfg_rx.o edma_cfg_tx.o edma_port.o edma_rx.o edma_tx.o
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma.c b/drivers/net/ethernet/qualcomm/ppe/edma.c
index 134f6b95c294..739fcfbde0f9 100644
--- a/drivers/net/ethernet/qualcomm/ppe/edma.c
+++ b/drivers/net/ethernet/qualcomm/ppe/edma.c
@@ -18,6 +18,7 @@
#include <linux/reset.h>
#include "edma.h"
+#include "edma_cfg_tx.h"
#include "edma_cfg_rx.h"
#include "ppe_regs.h"
@@ -25,6 +26,7 @@
/* Global EDMA context. */
struct edma_context *edma_ctx;
+static char **edma_txcmpl_irq_name;
static char **edma_rxdesc_irq_name;
/* Module params. */
@@ -192,22 +194,59 @@ static int edma_configure_ucast_prio_map_tbl(void)
static int edma_irq_register(void)
{
struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
struct edma_ring_info *rx = hw_info->rx;
int ret;
u32 i;
+ /* Request IRQ for TXCMPL rings. */
+ edma_txcmpl_irq_name = kzalloc((sizeof(char *) * txcmpl->num_rings), GFP_KERNEL);
+ if (!edma_txcmpl_irq_name)
+ return -ENOMEM;
+
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ edma_txcmpl_irq_name[i] = kzalloc((sizeof(char *) * EDMA_IRQ_NAME_SIZE),
+ GFP_KERNEL);
+ if (!edma_txcmpl_irq_name[i]) {
+ ret = -ENOMEM;
+ goto txcmpl_ring_irq_name_alloc_fail;
+ }
+
+ snprintf(edma_txcmpl_irq_name[i], EDMA_IRQ_NAME_SIZE, "edma_txcmpl_%d",
+ txcmpl->ring_start + i);
+
+ irq_set_status_flags(edma_ctx->intr_info.intr_txcmpl[i], IRQ_DISABLE_UNLAZY);
+
+ ret = request_irq(edma_ctx->intr_info.intr_txcmpl[i],
+ edma_tx_handle_irq, IRQF_SHARED,
+ edma_txcmpl_irq_name[i],
+ (void *)&edma_ctx->txcmpl_rings[i]);
+ if (ret) {
+ pr_err("TXCMPL ring IRQ:%d request %d failed\n",
+ edma_ctx->intr_info.intr_txcmpl[i], i);
+ goto txcmpl_ring_intr_req_fail;
+ }
+
+ pr_debug("TXCMPL ring: %d IRQ:%d request success: %s\n",
+ txcmpl->ring_start + i,
+ edma_ctx->intr_info.intr_txcmpl[i],
+ edma_txcmpl_irq_name[i]);
+ }
+
/* Request IRQ for RXDESC rings. */
edma_rxdesc_irq_name = kzalloc((sizeof(char *) * rx->num_rings),
GFP_KERNEL);
- if (!edma_rxdesc_irq_name)
- return -ENOMEM;
+ if (!edma_rxdesc_irq_name) {
+ ret = -ENOMEM;
+ goto rxdesc_irq_name_alloc_fail;
+ }
for (i = 0; i < rx->num_rings; i++) {
edma_rxdesc_irq_name[i] = kzalloc((sizeof(char *) * EDMA_IRQ_NAME_SIZE),
GFP_KERNEL);
if (!edma_rxdesc_irq_name[i]) {
ret = -ENOMEM;
- goto rxdesc_irq_name_alloc_fail;
+ goto rxdesc_ring_irq_name_alloc_fail;
}
snprintf(edma_rxdesc_irq_name[i], 20, "edma_rxdesc_%d",
@@ -236,8 +275,19 @@ static int edma_irq_register(void)
rx_desc_ring_intr_req_fail:
for (i = 0; i < rx->num_rings; i++)
kfree(edma_rxdesc_irq_name[i]);
-rxdesc_irq_name_alloc_fail:
+rxdesc_ring_irq_name_alloc_fail:
kfree(edma_rxdesc_irq_name);
+rxdesc_irq_name_alloc_fail:
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ synchronize_irq(edma_ctx->intr_info.intr_txcmpl[i]);
+ free_irq(edma_ctx->intr_info.intr_txcmpl[i],
+ (void *)&edma_ctx->txcmpl_rings[i]);
+ }
+txcmpl_ring_intr_req_fail:
+ for (i = 0; i < txcmpl->num_rings; i++)
+ kfree(edma_txcmpl_irq_name[i]);
+txcmpl_ring_irq_name_alloc_fail:
+ kfree(edma_txcmpl_irq_name);
return ret;
}
@@ -326,12 +376,22 @@ static int edma_irq_init(void)
static int edma_alloc_rings(void)
{
+ if (edma_cfg_tx_rings_alloc()) {
+ pr_err("Error in allocating Tx rings\n");
+ return -ENOMEM;
+ }
+
if (edma_cfg_rx_rings_alloc()) {
pr_err("Error in allocating Rx rings\n");
- return -ENOMEM;
+ goto rx_rings_alloc_fail;
}
return 0;
+
+rx_rings_alloc_fail:
+ edma_cfg_tx_rings_cleanup();
+
+ return -ENOMEM;
}
static int edma_hw_reset(void)
@@ -389,7 +449,7 @@ static int edma_hw_configure(void)
struct edma_hw_info *hw_info = edma_ctx->hw_info;
struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
struct regmap *regmap = ppe_dev->regmap;
- u32 data, reg;
+ u32 data, reg, i;
int ret;
reg = EDMA_BASE_OFFSET + EDMA_REG_MAS_CTRL_ADDR;
@@ -439,11 +499,17 @@ static int edma_hw_configure(void)
}
/* Disable interrupts. */
+ for (i = 1; i <= hw_info->max_ports; i++)
+ edma_cfg_tx_disable_interrupts(i);
+
edma_cfg_rx_disable_interrupts();
edma_cfg_rx_rings_disable();
edma_cfg_rx_ring_mappings();
+ edma_cfg_tx_ring_mappings();
+
+ edma_cfg_tx_rings();
ret = edma_cfg_rx_rings();
if (ret) {
@@ -520,6 +586,7 @@ static int edma_hw_configure(void)
edma_cfg_rx_napi_delete();
edma_cfg_rx_rings_disable();
edma_cfg_rx_rings_failed:
+ edma_cfg_tx_rings_cleanup();
edma_cfg_rx_rings_cleanup();
edma_alloc_rings_failed:
free_netdev(edma_ctx->dummy_dev);
@@ -538,13 +605,27 @@ static int edma_hw_configure(void)
void edma_destroy(struct ppe_device *ppe_dev)
{
struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
struct edma_ring_info *rx = hw_info->rx;
u32 i;
/* Disable interrupts. */
+ for (i = 1; i <= hw_info->max_ports; i++)
+ edma_cfg_tx_disable_interrupts(i);
+
edma_cfg_rx_disable_interrupts();
- /* Free IRQ for RXDESC rings. */
+ /* Free IRQ for TXCMPL rings. */
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ synchronize_irq(edma_ctx->intr_info.intr_txcmpl[i]);
+
+ free_irq(edma_ctx->intr_info.intr_txcmpl[i],
+ (void *)&edma_ctx->txcmpl_rings[i]);
+ kfree(edma_txcmpl_irq_name[i]);
+ }
+ kfree(edma_txcmpl_irq_name);
+
+ /* Free IRQ for RXDESC rings */
for (i = 0; i < rx->num_rings; i++) {
synchronize_irq(edma_ctx->intr_info.intr_rx[i]);
free_irq(edma_ctx->intr_info.intr_rx[i],
@@ -560,6 +641,7 @@ void edma_destroy(struct ppe_device *ppe_dev)
edma_cfg_rx_napi_delete();
edma_cfg_rx_rings_disable();
edma_cfg_rx_rings_cleanup();
+ edma_cfg_tx_rings_cleanup();
free_netdev(edma_ctx->dummy_dev);
kfree(edma_ctx->netdev_arr);
@@ -585,6 +667,7 @@ int edma_setup(struct ppe_device *ppe_dev)
edma_ctx->hw_info = &ipq9574_hw_info;
edma_ctx->ppe_dev = ppe_dev;
edma_ctx->rx_buf_size = rx_buff_size;
+ edma_ctx->tx_requeue_stop = false;
/* Configure the EDMA common clocks. */
ret = edma_clock_init();
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma.h b/drivers/net/ethernet/qualcomm/ppe/edma.h
index 778df7997d9f..fb8ccbfbaf41 100644
--- a/drivers/net/ethernet/qualcomm/ppe/edma.h
+++ b/drivers/net/ethernet/qualcomm/ppe/edma.h
@@ -7,6 +7,7 @@
#include "ppe_api.h"
#include "edma_rx.h"
+#include "edma_tx.h"
/* One clock cycle = 1/(EDMA clock frequency in Mhz) micro seconds.
*
@@ -94,8 +95,11 @@ struct edma_intr_info {
* @intr_info: EDMA Interrupt info
* @rxfill_rings: Rx fill Rings, SW is producer
* @rx_rings: Rx Desc Rings, SW is consumer
+ * @tx_rings: Tx Descriptor Ring, SW is producer
+ * @txcmpl_rings: Tx complete Ring, SW is consumer
* @rx_page_mode: Page mode enabled or disabled
* @rx_buf_size: Rx buffer size for Jumbo MRU
+ * @tx_requeue_stop: Tx requeue stop enabled or disabled
*/
struct edma_context {
struct net_device **netdev_arr;
@@ -105,8 +109,11 @@ struct edma_context {
struct edma_intr_info intr_info;
struct edma_rxfill_ring *rxfill_rings;
struct edma_rxdesc_ring *rx_rings;
+ struct edma_txdesc_ring *tx_rings;
+ struct edma_txcmpl_ring *txcmpl_rings;
u32 rx_page_mode;
u32 rx_buf_size;
+ bool tx_requeue_stop;
};
/* Global EDMA context */
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.c b/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.c
new file mode 100644
index 000000000000..f704c654b2cd
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+/* Configure rings, Buffers and NAPI for transmit path along with
+ * providing APIs to enable, disable, clean and map the Tx rings.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/regmap.h>
+#include <linux/skbuff.h>
+
+#include "edma.h"
+#include "edma_cfg_tx.h"
+#include "edma_port.h"
+#include "ppe.h"
+#include "ppe_regs.h"
+
+static void edma_cfg_txcmpl_ring_cleanup(struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+
+ /* Free any buffers assigned to any descriptors. */
+ edma_tx_complete(EDMA_TX_RING_SIZE - 1, txcmpl_ring);
+
+ /* Free TxCmpl ring descriptors. */
+ dma_free_coherent(dev, sizeof(struct edma_txcmpl_desc)
+ * txcmpl_ring->count, txcmpl_ring->desc,
+ txcmpl_ring->dma);
+ txcmpl_ring->desc = NULL;
+ txcmpl_ring->dma = (dma_addr_t)0;
+}
+
+static int edma_cfg_txcmpl_ring_setup(struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+
+ /* Allocate RxFill ring descriptors. */
+ txcmpl_ring->desc = dma_alloc_coherent(dev, sizeof(struct edma_txcmpl_desc)
+ * txcmpl_ring->count,
+ &txcmpl_ring->dma,
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (unlikely(!txcmpl_ring->desc))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void edma_cfg_tx_desc_ring_cleanup(struct edma_txdesc_ring *txdesc_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_pri *txdesc = NULL;
+ struct device *dev = ppe_dev->dev;
+ u32 prod_idx, cons_idx, data, reg;
+ struct sk_buff *skb = NULL;
+
+ /* Free any buffers assigned to any descriptors. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_PROD_IDX(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ prod_idx = data & EDMA_TXDESC_PROD_IDX_MASK;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CONS_IDX(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ cons_idx = data & EDMA_TXDESC_CONS_IDX_MASK;
+
+ /* Walk active list, obtain skb from descriptor and free it. */
+ while (cons_idx != prod_idx) {
+ txdesc = EDMA_TXDESC_PRI_DESC(txdesc_ring, cons_idx);
+ skb = (struct sk_buff *)EDMA_TXDESC_OPAQUE_GET(txdesc);
+ dev_kfree_skb_any(skb);
+
+ cons_idx = ((cons_idx + 1) & EDMA_TX_RING_SIZE_MASK);
+ }
+
+ /* Free Tx ring descriptors. */
+ dma_free_coherent(dev, (sizeof(struct edma_txdesc_pri)
+ * txdesc_ring->count),
+ txdesc_ring->pdesc,
+ txdesc_ring->pdma);
+ txdesc_ring->pdesc = NULL;
+ txdesc_ring->pdma = (dma_addr_t)0;
+
+ /* Free any buffers assigned to any secondary descriptors. */
+ dma_free_coherent(dev, (sizeof(struct edma_txdesc_sec)
+ * txdesc_ring->count),
+ txdesc_ring->sdesc,
+ txdesc_ring->sdma);
+ txdesc_ring->sdesc = NULL;
+ txdesc_ring->sdma = (dma_addr_t)0;
+}
+
+static int edma_cfg_tx_desc_ring_setup(struct edma_txdesc_ring *txdesc_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+
+ /* Allocate RxFill ring descriptors. */
+ txdesc_ring->pdesc = dma_alloc_coherent(dev, sizeof(struct edma_txdesc_pri)
+ * txdesc_ring->count,
+ &txdesc_ring->pdma,
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (unlikely(!txdesc_ring->pdesc))
+ return -ENOMEM;
+
+ txdesc_ring->sdesc = dma_alloc_coherent(dev, sizeof(struct edma_txdesc_sec)
+ * txdesc_ring->count,
+ &txdesc_ring->sdma,
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (unlikely(!txdesc_ring->sdesc)) {
+ dma_free_coherent(dev, (sizeof(struct edma_txdesc_pri)
+ * txdesc_ring->count),
+ txdesc_ring->pdesc,
+ txdesc_ring->pdma);
+ txdesc_ring->pdesc = NULL;
+ txdesc_ring->pdma = (dma_addr_t)0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void edma_cfg_tx_desc_ring_configure(struct edma_txdesc_ring *txdesc_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 data, reg;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_BA(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)(txdesc_ring->pdma & EDMA_RING_DMA_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_BA2(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)(txdesc_ring->sdma & EDMA_RING_DMA_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_RING_SIZE(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)(txdesc_ring->count & EDMA_TXDESC_RING_SIZE_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_PROD_IDX(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)EDMA_TX_INITIAL_PROD_IDX);
+
+ data = FIELD_PREP(EDMA_TXDESC_CTRL_FC_GRP_ID_MASK, txdesc_ring->fc_grp_id);
+
+ /* Configure group ID for flow control for this Tx ring. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CTRL(txdesc_ring->id);
+ regmap_write(regmap, reg, data);
+}
+
+static void edma_cfg_txcmpl_ring_configure(struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 data, reg;
+
+ /* Configure TxCmpl ring base address. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_BA(txcmpl_ring->id);
+ regmap_write(regmap, reg, (u32)(txcmpl_ring->dma & EDMA_RING_DMA_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_RING_SIZE(txcmpl_ring->id);
+ regmap_write(regmap, reg, (u32)(txcmpl_ring->count & EDMA_TXDESC_RING_SIZE_MASK));
+
+ /* Set TxCmpl ret mode to opaque. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_CTRL(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_TXCMPL_RETMODE_OPAQUE);
+
+ /* Configure the Mitigation timer. */
+ data = EDMA_MICROSEC_TO_TIMER_UNIT(EDMA_TX_MITIGATION_TIMER_DEF,
+ ppe_dev->clk_rate / MHZ);
+ data = ((data & EDMA_TX_MOD_TIMER_INIT_MASK)
+ << EDMA_TX_MOD_TIMER_INIT_SHIFT);
+ pr_debug("EDMA Tx mitigation timer value: %d\n", data);
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_MOD_TIMER(txcmpl_ring->id);
+ regmap_write(regmap, reg, data);
+
+ /* Configure the Mitigation packet count. */
+ data = (EDMA_TX_MITIGATION_PKT_CNT_DEF & EDMA_TXCMPL_LOW_THRE_MASK)
+ << EDMA_TXCMPL_LOW_THRE_SHIFT;
+ pr_debug("EDMA Tx mitigation packet count value: %d\n", data);
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_UGT_THRE(txcmpl_ring->id);
+ regmap_write(regmap, reg, data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_CTRL(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_TX_NE_INT_EN);
+}
+
+/**
+ * edma_cfg_tx_fill_per_port_tx_map - Fill Tx ring mapping.
+ * @netdev: Netdevice.
+ * @port_id: Port ID.
+ *
+ * Fill per-port Tx ring mapping in net device private area.
+ */
+void edma_cfg_tx_fill_per_port_tx_map(struct net_device *netdev, u32 port_id)
+{
+ u32 i;
+
+ /* Ring to core mapping is done in order starting from 0 for port 1. */
+ for_each_possible_cpu(i) {
+ struct edma_port_priv *port_dev = (struct edma_port_priv *)netdev_priv(netdev);
+ struct edma_txdesc_ring *txdesc_ring;
+ u32 txdesc_ring_id;
+
+ txdesc_ring_id = ((port_id - 1) * num_possible_cpus()) + i;
+ txdesc_ring = &edma_ctx->tx_rings[txdesc_ring_id];
+ port_dev->txr_map[i] = txdesc_ring;
+ }
+}
+
+/**
+ * edma_cfg_tx_rings_enable - Enable Tx rings.
+ *
+ * Enable Tx rings.
+ */
+void edma_cfg_tx_rings_enable(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_ring *txdesc_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txdesc_ring = &edma_ctx->tx_rings[ring_idx];
+ u32 data;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CTRL(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ data |= FIELD_PREP(EDMA_TXDESC_CTRL_TXEN_MASK, EDMA_TXDESC_TX_ENABLE);
+
+ regmap_write(regmap, reg, data);
+ }
+}
+
+/**
+ * edma_cfg_tx_rings_disable - Disable Tx rings.
+ *
+ * Disable Tx rings.
+ */
+void edma_cfg_tx_rings_disable(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_ring *txdesc_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txdesc_ring = &edma_ctx->tx_rings[ring_idx];
+ u32 data;
+
+ txdesc_ring = &edma_ctx->tx_rings[i];
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CTRL(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ data &= ~EDMA_TXDESC_TX_ENABLE;
+ regmap_write(regmap, reg, data);
+ }
+}
+
+/**
+ * edma_cfg_tx_ring_mappings - Map Tx to Tx complete rings.
+ *
+ * Map Tx to Tx complete rings.
+ */
+void edma_cfg_tx_ring_mappings(void)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 desc_index, i, data, reg;
+
+ /* Clear the TXDESC2CMPL_MAP_xx reg before setting up
+ * the mapping. This register holds TXDESC to TXFILL ring
+ * mapping.
+ */
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_0_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_1_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_2_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_3_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_4_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_5_ADDR, 0);
+ desc_index = txcmpl->ring_start;
+
+ /* 6 registers to hold the completion mapping for total 32
+ * TX desc rings (0-5, 6-11, 12-17, 18-23, 24-29 and rest).
+ * In each entry 5 bits hold the mapping for a particular TX desc ring.
+ */
+ for (i = tx->ring_start; i < tx->ring_start + tx->num_rings; i++) {
+ u32 reg, data;
+
+ if (i >= 0 && i <= 5)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_0_ADDR;
+ else if (i >= 6 && i <= 11)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_1_ADDR;
+ else if (i >= 12 && i <= 17)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_2_ADDR;
+ else if (i >= 18 && i <= 23)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_3_ADDR;
+ else if (i >= 24 && i <= 29)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_4_ADDR;
+ else
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_5_ADDR;
+
+ pr_debug("Configure Tx desc:%u to use TxCmpl:%u\n", i, desc_index);
+
+ /* Set the Tx complete descriptor ring number in the mapping register.
+ * E.g. If (txcmpl ring)desc_index = 31, (txdesc ring)i = 28.
+ * reg = EDMA_REG_TXDESC2CMPL_MAP_4_ADDR
+ * data |= (desc_index & 0x1F) << ((i % 6) * 5);
+ * data |= (0x1F << 20); -
+ * This sets 11111 at 20th bit of register EDMA_REG_TXDESC2CMPL_MAP_4_ADDR.
+ */
+ regmap_read(regmap, reg, &data);
+ data |= (desc_index & EDMA_TXDESC2CMPL_MAP_TXDESC_MASK) << ((i % 6) * 5);
+ regmap_write(regmap, reg, data);
+
+ desc_index++;
+ if (desc_index == txcmpl->ring_start + txcmpl->num_rings)
+ desc_index = txcmpl->ring_start;
+ }
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_0_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_0_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_1_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_1_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_2_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_2_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_3_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_3_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_4_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_4_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_5_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_5_ADDR: 0x%x\n", data);
+}
+
+static int edma_cfg_tx_rings_setup(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 i, j = 0;
+
+ /* Set Txdesc flow control group id, same as port number. */
+ for (i = 0; i < hw_info->max_ports; i++) {
+ for_each_possible_cpu(j) {
+ struct edma_txdesc_ring *txdesc_ring = NULL;
+ u32 txdesc_idx = (i * num_possible_cpus()) + j;
+
+ txdesc_ring = &edma_ctx->tx_rings[txdesc_idx];
+ txdesc_ring->fc_grp_id = i + 1;
+ }
+ }
+
+ /* Allocate TxDesc ring descriptors. */
+ for (i = 0; i < tx->num_rings; i++) {
+ struct edma_txdesc_ring *txdesc_ring = NULL;
+ int ret;
+
+ txdesc_ring = &edma_ctx->tx_rings[i];
+ txdesc_ring->count = EDMA_TX_RING_SIZE;
+ txdesc_ring->id = tx->ring_start + i;
+
+ ret = edma_cfg_tx_desc_ring_setup(txdesc_ring);
+ if (ret) {
+ pr_err("Error in setting up %d txdesc ring. ret: %d",
+ txdesc_ring->id, ret);
+ while (i-- >= 0)
+ edma_cfg_tx_desc_ring_cleanup(&edma_ctx->tx_rings[i]);
+
+ return -ENOMEM;
+ }
+ }
+
+ /* Allocate TxCmpl ring descriptors. */
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ struct edma_txcmpl_ring *txcmpl_ring = NULL;
+ int ret;
+
+ txcmpl_ring = &edma_ctx->txcmpl_rings[i];
+ txcmpl_ring->count = EDMA_TX_RING_SIZE;
+ txcmpl_ring->id = txcmpl->ring_start + i;
+
+ ret = edma_cfg_txcmpl_ring_setup(txcmpl_ring);
+ if (ret != 0) {
+ pr_err("Error in setting up %d TxCmpl ring. ret: %d",
+ txcmpl_ring->id, ret);
+ while (i-- >= 0)
+ edma_cfg_txcmpl_ring_cleanup(&edma_ctx->txcmpl_rings[i]);
+
+ goto txcmpl_mem_alloc_fail;
+ }
+ }
+
+ pr_debug("Tx descriptor count for Tx desc and Tx complete rings: %d\n",
+ EDMA_TX_RING_SIZE);
+
+ return 0;
+
+txcmpl_mem_alloc_fail:
+ for (i = 0; i < tx->num_rings; i++)
+ edma_cfg_tx_desc_ring_cleanup(&edma_ctx->tx_rings[i]);
+
+ return -ENOMEM;
+}
+
+/**
+ * edma_cfg_tx_rings_alloc - Allocate EDMA Tx rings.
+ *
+ * Allocate EDMA Tx rings.
+ */
+int edma_cfg_tx_rings_alloc(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+
+ edma_ctx->tx_rings = kzalloc((sizeof(*edma_ctx->tx_rings) * tx->num_rings),
+ GFP_KERNEL);
+ if (!edma_ctx->tx_rings)
+ return -ENOMEM;
+
+ edma_ctx->txcmpl_rings = kzalloc((sizeof(*edma_ctx->txcmpl_rings) * txcmpl->num_rings),
+ GFP_KERNEL);
+ if (!edma_ctx->txcmpl_rings)
+ goto txcmpl_ring_alloc_fail;
+
+ pr_debug("Num rings - TxDesc:%u (%u-%u) TxCmpl:%u (%u-%u)\n",
+ tx->num_rings, tx->ring_start,
+ (tx->ring_start + tx->num_rings - 1),
+ txcmpl->num_rings, txcmpl->ring_start,
+ (txcmpl->ring_start + txcmpl->num_rings - 1));
+
+ if (edma_cfg_tx_rings_setup()) {
+ pr_err("Error in setting up tx rings\n");
+ goto tx_rings_setup_fail;
+ }
+
+ return 0;
+
+tx_rings_setup_fail:
+ kfree(edma_ctx->txcmpl_rings);
+ edma_ctx->txcmpl_rings = NULL;
+
+txcmpl_ring_alloc_fail:
+ kfree(edma_ctx->tx_rings);
+ edma_ctx->tx_rings = NULL;
+
+ return -ENOMEM;
+}
+
+/**
+ * edma_cfg_tx_rings_cleanup - Cleanup EDMA Tx rings.
+ *
+ * Cleanup EDMA Tx rings.
+ */
+void edma_cfg_tx_rings_cleanup(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 i;
+
+ /* Free any buffers assigned to any descriptors. */
+ for (i = 0; i < tx->num_rings; i++)
+ edma_cfg_tx_desc_ring_cleanup(&edma_ctx->tx_rings[i]);
+
+ /* Free Tx completion descriptors. */
+ for (i = 0; i < txcmpl->num_rings; i++)
+ edma_cfg_txcmpl_ring_cleanup(&edma_ctx->txcmpl_rings[i]);
+
+ kfree(edma_ctx->tx_rings);
+ kfree(edma_ctx->txcmpl_rings);
+ edma_ctx->tx_rings = NULL;
+ edma_ctx->txcmpl_rings = NULL;
+}
+
+/**
+ * edma_cfg_tx_rings - Configure EDMA Tx rings.
+ *
+ * Configure EDMA Tx rings.
+ */
+void edma_cfg_tx_rings(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 i;
+
+ /* Configure Tx desc ring. */
+ for (i = 0; i < tx->num_rings; i++)
+ edma_cfg_tx_desc_ring_configure(&edma_ctx->tx_rings[i]);
+
+ /* Configure TxCmpl ring. */
+ for (i = 0; i < txcmpl->num_rings; i++)
+ edma_cfg_txcmpl_ring_configure(&edma_ctx->txcmpl_rings[i]);
+}
+
+/**
+ * edma_cfg_tx_disable_interrupts - EDMA disable TX interrupts.
+ *
+ * Disable TX interrupt masks.
+ */
+void edma_cfg_tx_disable_interrupts(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_MASK_INT_CLEAR);
+ }
+}
+
+/**
+ * edma_cfg_tx_enable_interrupts - EDMA enable TX interrupts.
+ *
+ * Enable TX interrupt masks.
+ */
+void edma_cfg_tx_enable_interrupts(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, edma_ctx->intr_info.intr_mask_txcmpl);
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_enable - EDMA Tx NAPI.
+ * @port_id: Port ID.
+ *
+ * Enable Tx NAPI.
+ */
+void edma_cfg_tx_napi_enable(u32 port_id)
+{
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Enabling Tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ if (!txcmpl_ring->napi_added)
+ continue;
+
+ napi_enable(&txcmpl_ring->napi);
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_disable - Disable Tx NAPI.
+ * @port_id: Port ID.
+ *
+ * Disable Tx NAPI.
+ */
+void edma_cfg_tx_napi_disable(u32 port_id)
+{
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Disabling Tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ if (!txcmpl_ring->napi_added)
+ continue;
+
+ napi_disable(&txcmpl_ring->napi);
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_delete - Delete Tx NAPI.
+ * @port_id: Port ID.
+ *
+ * Delete Tx NAPI.
+ */
+void edma_cfg_tx_napi_delete(u32 port_id)
+{
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Disabling Tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ if (!txcmpl_ring->napi_added)
+ continue;
+
+ netif_napi_del(&txcmpl_ring->napi);
+ txcmpl_ring->napi_added = false;
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_add - TX NAPI add.
+ * @netdev: Netdevice.
+ * @port_id: Port ID.
+ *
+ * TX NAPI add.
+ */
+void edma_cfg_tx_napi_add(struct net_device *netdev, u32 port_id)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Adding tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ netif_napi_add_weight(netdev, &txcmpl_ring->napi,
+ edma_tx_napi_poll, hw_info->napi_budget_tx);
+ txcmpl_ring->napi_added = true;
+ netdev_dbg(netdev, "Napi added for txcmpl ring: %u\n", txcmpl_ring->id);
+ }
+
+ netdev_dbg(netdev, "Tx NAPI budget: %d\n", hw_info->napi_budget_tx);
+}
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.h b/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.h
new file mode 100644
index 000000000000..4840c601fc86
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __EDMA_CFG_TX__
+#define __EDMA_CFG_TX__
+
+/* Tx mitigation timer's default value. */
+#define EDMA_TX_MITIGATION_TIMER_DEF 250
+
+/* Tx mitigation packet count default value. */
+#define EDMA_TX_MITIGATION_PKT_CNT_DEF 16
+
+void edma_cfg_tx_rings(void);
+int edma_cfg_tx_rings_alloc(void);
+void edma_cfg_tx_rings_cleanup(void);
+void edma_cfg_tx_disable_interrupts(u32 port_id);
+void edma_cfg_tx_enable_interrupts(u32 port_id);
+void edma_cfg_tx_napi_enable(u32 port_id);
+void edma_cfg_tx_napi_disable(u32 port_id);
+void edma_cfg_tx_napi_delete(u32 port_id);
+void edma_cfg_tx_napi_add(struct net_device *netdevice, u32 macid);
+void edma_cfg_tx_ring_mappings(void);
+void edma_cfg_txcmpl_mapping_fill(void);
+void edma_cfg_tx_rings_enable(u32 port_id);
+void edma_cfg_tx_rings_disable(u32 port_id);
+void edma_cfg_tx_fill_per_port_tx_map(struct net_device *netdev, u32 macid);
+#endif
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma_port.c b/drivers/net/ethernet/qualcomm/ppe/edma_port.c
index bbb5823408fd..afa2b6479822 100644
--- a/drivers/net/ethernet/qualcomm/ppe/edma_port.c
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_port.c
@@ -13,6 +13,7 @@
#include "edma.h"
#include "edma_cfg_rx.h"
+#include "edma_cfg_tx.h"
#include "edma_port.h"
#include "ppe_regs.h"
@@ -35,6 +36,15 @@ static int edma_port_stats_alloc(struct net_device *netdev)
return -ENOMEM;
}
+ port_priv->pcpu_stats.tx_stats =
+ netdev_alloc_pcpu_stats(struct edma_port_tx_stats);
+ if (!port_priv->pcpu_stats.tx_stats) {
+ netdev_err(netdev, "Per-cpu EDMA Tx stats alloc failed for %s\n",
+ netdev->name);
+ free_percpu(port_priv->pcpu_stats.rx_stats);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -43,6 +53,28 @@ static void edma_port_stats_free(struct net_device *netdev)
struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
free_percpu(port_priv->pcpu_stats.rx_stats);
+ free_percpu(port_priv->pcpu_stats.tx_stats);
+}
+
+static void edma_port_configure(struct net_device *netdev)
+{
+ struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
+ struct ppe_port *port = port_priv->ppe_port;
+ int port_id = port->port_id;
+
+ edma_cfg_tx_fill_per_port_tx_map(netdev, port_id);
+ edma_cfg_tx_rings_enable(port_id);
+ edma_cfg_tx_napi_add(netdev, port_id);
+}
+
+static void edma_port_deconfigure(struct net_device *netdev)
+{
+ struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
+ struct ppe_port *port = port_priv->ppe_port;
+ int port_id = port->port_id;
+
+ edma_cfg_tx_napi_delete(port_id);
+ edma_cfg_tx_rings_disable(port_id);
}
static u16 __maybe_unused edma_port_select_queue(__maybe_unused struct net_device *netdev,
@@ -60,6 +92,7 @@ static int edma_port_open(struct net_device *netdev)
{
struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
struct ppe_port *ppe_port;
+ int port_id;
if (!port_priv)
return -EINVAL;
@@ -74,10 +107,14 @@ static int edma_port_open(struct net_device *netdev)
netdev->wanted_features |= EDMA_NETDEV_FEATURES;
ppe_port = port_priv->ppe_port;
+ port_id = ppe_port->port_id;
if (ppe_port->phylink)
phylink_start(ppe_port->phylink);
+ edma_cfg_tx_napi_enable(port_id);
+ edma_cfg_tx_enable_interrupts(port_id);
+
netif_start_queue(netdev);
return 0;
@@ -87,13 +124,21 @@ static int edma_port_close(struct net_device *netdev)
{
struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
struct ppe_port *ppe_port;
+ int port_id;
if (!port_priv)
return -EINVAL;
netif_stop_queue(netdev);
+ /* 20ms delay would provide a plenty of margin to take care of in-flight packets. */
+ msleep(20);
+
ppe_port = port_priv->ppe_port;
+ port_id = ppe_port->port_id;
+
+ edma_cfg_tx_disable_interrupts(port_id);
+ edma_cfg_tx_napi_disable(port_id);
/* Phylink close. */
if (ppe_port->phylink)
@@ -137,6 +182,92 @@ static netdev_features_t edma_port_feature_check(__maybe_unused struct sk_buff *
return features;
}
+static netdev_tx_t edma_port_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct edma_port_priv *port_priv = NULL;
+ struct edma_port_pcpu_stats *pcpu_stats;
+ struct edma_txdesc_ring *txdesc_ring;
+ struct edma_port_tx_stats *stats;
+ enum edma_tx_gso_status result;
+ struct sk_buff *segs = NULL;
+ u8 cpu_id;
+ u32 skbq;
+ int ret;
+
+ if (!skb || !dev)
+ return NETDEV_TX_OK;
+
+ port_priv = netdev_priv(dev);
+
+ /* Select a TX ring. */
+ skbq = (skb_get_queue_mapping(skb) & (num_possible_cpus() - 1));
+
+ txdesc_ring = (struct edma_txdesc_ring *)port_priv->txr_map[skbq];
+
+ pcpu_stats = &port_priv->pcpu_stats;
+ stats = this_cpu_ptr(pcpu_stats->tx_stats);
+
+ /* HW does not support TSO for packets with more than or equal to
+ * 32 segments. Perform SW GSO for such packets.
+ */
+ result = edma_tx_gso_segment(skb, dev, &segs);
+ if (likely(result == EDMA_TX_GSO_NOT_NEEDED)) {
+ /* Transmit the packet. */
+ ret = edma_tx_ring_xmit(dev, skb, txdesc_ring, stats);
+
+ if (unlikely(ret == EDMA_TX_FAIL_NO_DESC)) {
+ if (likely(!edma_ctx->tx_requeue_stop)) {
+ cpu_id = smp_processor_id();
+ netdev_dbg(dev, "Stopping tx queue due to lack oftx descriptors\n");
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_queue_stopped[cpu_id];
+ u64_stats_update_end(&stats->syncp);
+ netif_tx_stop_queue(netdev_get_tx_queue(dev, skbq));
+ return NETDEV_TX_BUSY;
+ }
+ }
+
+ if (unlikely(ret != EDMA_TX_OK)) {
+ dev_kfree_skb_any(skb);
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_drops;
+ u64_stats_update_end(&stats->syncp);
+ }
+
+ return NETDEV_TX_OK;
+ } else if (unlikely(result == EDMA_TX_GSO_FAIL)) {
+ netdev_dbg(dev, "%p: SW GSO failed for segment size: %d\n",
+ skb, skb_shinfo(skb)->gso_segs);
+ dev_kfree_skb_any(skb);
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_gso_drop_pkts;
+ u64_stats_update_end(&stats->syncp);
+ return NETDEV_TX_OK;
+ }
+
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_gso_pkts;
+ u64_stats_update_end(&stats->syncp);
+
+ dev_kfree_skb_any(skb);
+ while (segs) {
+ skb = segs;
+ segs = segs->next;
+
+ /* Transmit the packet. */
+ ret = edma_tx_ring_xmit(dev, skb, txdesc_ring, stats);
+ if (unlikely(ret != EDMA_TX_OK)) {
+ dev_kfree_skb_any(skb);
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_drops;
+ u64_stats_update_end(&stats->syncp);
+ }
+ }
+
+ return NETDEV_TX_OK;
+}
+
static void edma_port_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -179,6 +310,7 @@ static int edma_port_set_mac_address(struct net_device *netdev, void *macaddr)
static const struct net_device_ops edma_port_netdev_ops = {
.ndo_open = edma_port_open,
.ndo_stop = edma_port_close,
+ .ndo_start_xmit = edma_port_xmit,
.ndo_get_stats64 = edma_port_get_stats64,
.ndo_set_mac_address = edma_port_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
@@ -199,6 +331,7 @@ void edma_port_destroy(struct ppe_port *port)
int port_id = port->port_id;
struct net_device *netdev = edma_ctx->netdev_arr[port_id - 1];
+ edma_port_deconfigure(netdev);
edma_port_stats_free(netdev);
unregister_netdev(netdev);
free_netdev(netdev);
@@ -276,6 +409,8 @@ int edma_port_setup(struct ppe_port *port)
*/
edma_ctx->netdev_arr[port_id - 1] = netdev;
+ edma_port_configure(netdev);
+
/* Setup phylink. */
ret = ppe_port_phylink_setup(port, netdev);
if (ret) {
@@ -298,6 +433,7 @@ int edma_port_setup(struct ppe_port *port)
register_netdev_fail:
ppe_port_phylink_destroy(port);
port_phylink_setup_fail:
+ edma_port_deconfigure(netdev);
edma_ctx->netdev_arr[port_id - 1] = NULL;
edma_port_stats_free(netdev);
stats_alloc_fail:
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma_port.h b/drivers/net/ethernet/qualcomm/ppe/edma_port.h
index 75f544a4f324..b67eddabd41c 100644
--- a/drivers/net/ethernet/qualcomm/ppe/edma_port.h
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_port.h
@@ -7,6 +7,8 @@
#include "ppe_port.h"
+#define EDMA_PORT_MAX_CORE 4
+
#define EDMA_NETDEV_FEATURES (NETIF_F_FRAGLIST \
| NETIF_F_SG \
| NETIF_F_RXCSUM \
@@ -34,12 +36,44 @@ struct edma_port_rx_stats {
struct u64_stats_sync syncp;
};
+/**
+ * struct edma_port_tx_stats - EDMA TX port per CPU stats for the port.
+ * @tx_pkts: Number of Tx packets
+ * @tx_bytes: Number of Tx bytes
+ * @tx_drops: Number of Tx drops
+ * @tx_nr_frag_pkts: Number of Tx nr_frag packets
+ * @tx_fraglist_pkts: Number of Tx fraglist packets
+ * @tx_fraglist_with_nr_frags_pkts: Number of Tx packets with fraglist and nr_frags
+ * @tx_tso_pkts: Number of Tx TSO packets
+ * @tx_tso_drop_pkts: Number of Tx TSO drop packets
+ * @tx_gso_pkts: Number of Tx GSO packets
+ * @tx_gso_drop_pkts: Number of Tx GSO drop packets
+ * @tx_queue_stopped: Number of Tx queue stopped packets
+ * @syncp: Synchronization pointer
+ */
+struct edma_port_tx_stats {
+ u64 tx_pkts;
+ u64 tx_bytes;
+ u64 tx_drops;
+ u64 tx_nr_frag_pkts;
+ u64 tx_fraglist_pkts;
+ u64 tx_fraglist_with_nr_frags_pkts;
+ u64 tx_tso_pkts;
+ u64 tx_tso_drop_pkts;
+ u64 tx_gso_pkts;
+ u64 tx_gso_drop_pkts;
+ u64 tx_queue_stopped[EDMA_PORT_MAX_CORE];
+ struct u64_stats_sync syncp;
+};
+
/**
* struct edma_port_pcpu_stats - EDMA per cpu stats data structure for the port.
* @rx_stats: Per CPU Rx statistics
+ * @tx_stats: Per CPU Tx statistics
*/
struct edma_port_pcpu_stats {
struct edma_port_rx_stats __percpu *rx_stats;
+ struct edma_port_tx_stats __percpu *tx_stats;
};
/**
@@ -54,6 +88,7 @@ struct edma_port_priv {
struct ppe_port *ppe_port;
struct net_device *netdev;
struct edma_port_pcpu_stats pcpu_stats;
+ struct edma_txdesc_ring *txr_map[EDMA_PORT_MAX_CORE];
unsigned long flags;
};
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma_tx.c b/drivers/net/ethernet/qualcomm/ppe/edma_tx.c
new file mode 100644
index 000000000000..47876c142df5
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_tx.c
@@ -0,0 +1,808 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+/* Provide APIs to alloc Tx Buffers, fill the Tx descriptors and transmit
+ * Scatter Gather and linear packets, Tx complete to free the skb after transmit.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <net/gso.h>
+#include <linux/regmap.h>
+
+#include "edma.h"
+#include "edma_cfg_tx.h"
+#include "edma_port.h"
+#include "ppe.h"
+#include "ppe_regs.h"
+
+static u32 edma_tx_num_descs_for_sg(struct sk_buff *skb)
+{
+ u32 nr_frags_first = 0, num_tx_desc_needed = 0;
+
+ /* Check if we have enough Tx descriptors for SG. */
+ if (unlikely(skb_shinfo(skb)->nr_frags)) {
+ nr_frags_first = skb_shinfo(skb)->nr_frags;
+ WARN_ON_ONCE(nr_frags_first > MAX_SKB_FRAGS);
+ num_tx_desc_needed += nr_frags_first;
+ }
+
+ /* Walk through fraglist skbs making a note of nr_frags
+ * One Tx desc for fraglist skb. Fraglist skb may have
+ * further nr_frags.
+ */
+ if (unlikely(skb_has_frag_list(skb))) {
+ struct sk_buff *iter_skb;
+
+ skb_walk_frags(skb, iter_skb) {
+ u32 nr_frags = skb_shinfo(iter_skb)->nr_frags;
+
+ WARN_ON_ONCE(nr_frags > MAX_SKB_FRAGS);
+ num_tx_desc_needed += (1 + nr_frags);
+ }
+ }
+
+ return (num_tx_desc_needed + 1);
+}
+
+/**
+ * edma_tx_gso_segment - Tx GSO.
+ * @skb: Socket Buffer.
+ * @netdev: Netdevice.
+ * @segs: SKB segments from GSO.
+ *
+ * Format skbs into GSOs.
+ *
+ * Return 1 on success, error code on failure.
+ */
+enum edma_tx_gso_status edma_tx_gso_segment(struct sk_buff *skb,
+ struct net_device *netdev, struct sk_buff **segs)
+{
+ u32 num_tx_desc_needed;
+
+ /* Check is skb is non-linear to proceed. */
+ if (likely(!skb_is_nonlinear(skb)))
+ return EDMA_TX_GSO_NOT_NEEDED;
+
+ /* Check if TSO is enabled. If so, return as skb doesn't
+ * need to be segmented by linux.
+ */
+ if (netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) {
+ num_tx_desc_needed = edma_tx_num_descs_for_sg(skb);
+ if (likely(num_tx_desc_needed <= EDMA_TX_TSO_SEG_MAX))
+ return EDMA_TX_GSO_NOT_NEEDED;
+ }
+
+ /* GSO segmentation of the skb into multiple segments. */
+ *segs = skb_gso_segment(skb, netdev->features
+ & ~(NETIF_F_TSO | NETIF_F_TSO6));
+
+ /* Check for error in GSO segmentation. */
+ if (IS_ERR_OR_NULL(*segs)) {
+ netdev_info(netdev, "Tx gso fail\n");
+ return EDMA_TX_GSO_FAIL;
+ }
+
+ return EDMA_TX_GSO_SUCCEED;
+}
+
+/**
+ * edma_tx_complete - Reap Tx completion descriptors.
+ * @work_to_do: Work to do.
+ * @txcmpl_ring: Tx Completion ring.
+ *
+ * Reap Tx completion descriptors of the transmitted
+ * packets and free the corresponding SKBs.
+ *
+ * Return the number descriptors for which Tx complete is done.
+ */
+u32 edma_tx_complete(u32 work_to_do, struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct edma_txcmpl_stats *txcmpl_stats = &txcmpl_ring->txcmpl_stats;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 cons_idx, end_idx, data, cpu_id;
+ struct device *dev = ppe_dev->dev;
+ u32 avail, count, txcmpl_errors;
+ struct edma_txcmpl_desc *txcmpl;
+ u32 prod_idx = 0, more_bit = 0;
+ struct netdev_queue *nq;
+ struct sk_buff *skb;
+ u32 reg;
+
+ cons_idx = txcmpl_ring->cons_idx;
+
+ if (likely(txcmpl_ring->avail_pkt >= work_to_do)) {
+ avail = work_to_do;
+ } else {
+ /* Get TXCMPL ring producer index. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_PROD_IDX(txcmpl_ring->id);
+ regmap_read(regmap, reg, &data);
+ prod_idx = data & EDMA_TXCMPL_PROD_IDX_MASK;
+
+ avail = EDMA_DESC_AVAIL_COUNT(prod_idx, cons_idx, EDMA_TX_RING_SIZE);
+ txcmpl_ring->avail_pkt = avail;
+
+ if (unlikely(!avail)) {
+ dev_dbg(dev, "No available descriptors are pending for %d txcmpl ring\n",
+ txcmpl_ring->id);
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->no_pending_desc;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ return 0;
+ }
+
+ avail = min(avail, work_to_do);
+ }
+
+ count = avail;
+
+ end_idx = (cons_idx + avail) & EDMA_TX_RING_SIZE_MASK;
+ txcmpl = EDMA_TXCMPL_DESC(txcmpl_ring, cons_idx);
+
+ /* Instead of freeing the skb, it might be better to save and use
+ * for Rxfill.
+ */
+ while (likely(avail--)) {
+ /* The last descriptor holds the SKB pointer for scattered frames.
+ * So skip the descriptors with more bit set.
+ */
+ more_bit = EDMA_TXCMPL_MORE_BIT_GET(txcmpl);
+ if (unlikely(more_bit)) {
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->desc_with_more_bit;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ cons_idx = ((cons_idx + 1) & EDMA_TX_RING_SIZE_MASK);
+ txcmpl = EDMA_TXCMPL_DESC(txcmpl_ring, cons_idx);
+ continue;
+ }
+
+ /* Find and free the skb for Tx completion. */
+ skb = (struct sk_buff *)EDMA_TXCMPL_OPAQUE_GET(txcmpl);
+ if (unlikely(!skb)) {
+ if (net_ratelimit())
+ dev_warn(dev, "Invalid cons_idx:%u prod_idx:%u word2:%x word3:%x\n",
+ cons_idx, prod_idx, txcmpl->word2, txcmpl->word3);
+
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->invalid_buffer;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ } else {
+ dev_dbg(dev, "TXCMPL: skb:%p, skb->len %d, skb->data_len %d, cons_idx:%d prod_idx:%d word2:0x%x word3:0x%x\n",
+ skb, skb->len, skb->data_len, cons_idx, prod_idx,
+ txcmpl->word2, txcmpl->word3);
+
+ txcmpl_errors = EDMA_TXCOMP_RING_ERROR_GET(txcmpl->word3);
+ if (unlikely(txcmpl_errors)) {
+ if (net_ratelimit())
+ dev_err(dev, "Error 0x%0x observed in tx complete %d ring\n",
+ txcmpl_errors, txcmpl_ring->id);
+
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->errors;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ }
+
+ /* Retrieve pool id for unmapping.
+ * 0 for linear skb and (pool id - 1) represents nr_frag index.
+ */
+ if (!EDMA_TXCOMP_POOL_ID_GET(txcmpl)) {
+ dma_unmap_single(dev, virt_to_phys(skb->data),
+ skb->len, DMA_TO_DEVICE);
+ } else {
+ u8 frag_index = (EDMA_TXCOMP_POOL_ID_GET(txcmpl) - 1);
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_index];
+
+ dma_unmap_page(dev, virt_to_phys(frag),
+ PAGE_SIZE, DMA_TO_DEVICE);
+ }
+
+ dev_kfree_skb(skb);
+ }
+
+ cons_idx = ((cons_idx + 1) & EDMA_TX_RING_SIZE_MASK);
+ txcmpl = EDMA_TXCMPL_DESC(txcmpl_ring, cons_idx);
+ }
+
+ txcmpl_ring->cons_idx = cons_idx;
+ txcmpl_ring->avail_pkt -= count;
+
+ dev_dbg(dev, "TXCMPL:%u count:%u prod_idx:%u cons_idx:%u\n",
+ txcmpl_ring->id, count, prod_idx, cons_idx);
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_CONS_IDX(txcmpl_ring->id);
+ regmap_write(regmap, reg, cons_idx);
+
+ /* If tx_requeue_stop disabled (tx_requeue_stop = 0)
+ * Fetch the tx queue of interface and check if it is stopped.
+ * if queue is stopped and interface is up, wake up this queue.
+ */
+ if (unlikely(!edma_ctx->tx_requeue_stop)) {
+ cpu_id = smp_processor_id();
+ nq = netdev_get_tx_queue(txcmpl_ring->napi.dev, cpu_id);
+ if (unlikely(netif_tx_queue_stopped(nq)) &&
+ netif_carrier_ok(txcmpl_ring->napi.dev)) {
+ dev_dbg(dev, "Waking queue number %d, for interface %s\n",
+ cpu_id, txcmpl_ring->napi.dev->name);
+ __netif_tx_lock(nq, cpu_id);
+ netif_tx_wake_queue(nq);
+ __netif_tx_unlock(nq);
+ }
+ }
+
+ return count;
+}
+
+/**
+ * edma_tx_napi_poll - EDMA TX NAPI handler.
+ * @napi: NAPI structure.
+ * @budget: Tx NAPI Budget.
+ *
+ * EDMA TX NAPI handler.
+ */
+int edma_tx_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct edma_txcmpl_ring *txcmpl_ring = (struct edma_txcmpl_ring *)napi;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 txcmpl_intr_status;
+ int work_done = 0;
+ u32 data, reg;
+
+ do {
+ work_done += edma_tx_complete(budget - work_done, txcmpl_ring);
+ if (work_done >= budget)
+ return work_done;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_STAT(txcmpl_ring->id);
+ regmap_read(regmap, reg, &data);
+ txcmpl_intr_status = data & EDMA_TXCMPL_RING_INT_STATUS_MASK;
+ } while (txcmpl_intr_status);
+
+ /* No more packets to process. Finish NAPI processing. */
+ napi_complete(napi);
+
+ /* Set TXCMPL ring interrupt mask. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, edma_ctx->intr_info.intr_mask_txcmpl);
+
+ return work_done;
+}
+
+/**
+ * edma_tx_handle_irq - Tx IRQ Handler.
+ * @irq: Interrupt request.
+ * @ctx: Context.
+ *
+ * Process TX IRQ and schedule NAPI.
+ *
+ * Return IRQ handler code.
+ */
+irqreturn_t edma_tx_handle_irq(int irq, void *ctx)
+{
+ struct edma_txcmpl_ring *txcmpl_ring = (struct edma_txcmpl_ring *)ctx;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 reg;
+
+ pr_debug("irq: irq=%d txcmpl_ring_id=%u\n", irq, txcmpl_ring->id);
+ if (likely(napi_schedule_prep(&txcmpl_ring->napi))) {
+ /* Disable TxCmpl intr. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_MASK_INT_DISABLE);
+ __napi_schedule(&txcmpl_ring->napi);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void edma_tx_dma_unmap_frags(struct sk_buff *skb, u32 nr_frags)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+ u32 buf_len = 0;
+ u8 i = 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags - nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ /* DMA mapping was not done for zero size segments. */
+ buf_len = skb_frag_size(frag);
+ if (unlikely(buf_len == 0))
+ continue;
+
+ dma_unmap_page(dev, virt_to_phys(frag), PAGE_SIZE,
+ DMA_TO_DEVICE);
+ }
+}
+
+static u32 edma_tx_skb_nr_frags(struct edma_txdesc_ring *txdesc_ring,
+ struct edma_txdesc_pri **txdesc, struct sk_buff *skb,
+ u32 *hw_next_to_use, u32 *invalid_frag)
+{
+ u32 nr_frags = 0, buf_len = 0, num_descs = 0, start_idx = 0, end_idx = 0;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ u32 start_hw_next_to_use = *hw_next_to_use;
+ struct edma_txdesc_pri *txd = *txdesc;
+ struct device *dev = ppe_dev->dev;
+ u8 i = 0;
+
+ /* Hold onto the index mapped to *txdesc.
+ * This will be the index previous to that of current *hw_next_to_use.
+ */
+ start_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK)
+ & EDMA_TX_RING_SIZE_MASK);
+
+ /* Handle if the skb has nr_frags. */
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ num_descs = nr_frags;
+ i = 0;
+
+ while (nr_frags--) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ dma_addr_t buff_addr;
+
+ buf_len = skb_frag_size(frag);
+
+ /* Zero size segment can lead EDMA HW to hang so, we don't want to
+ * process them. Zero size segment can happen during TSO operation
+ * if there is nothing but header in the primary segment.
+ */
+ if (unlikely(buf_len == 0)) {
+ num_descs--;
+ i++;
+ continue;
+ }
+
+ /* Setting the MORE bit on the previous Tx descriptor.
+ * Note: We will flush this descriptor as well later.
+ */
+ EDMA_TXDESC_MORE_BIT_SET(txd, 1);
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ txd = EDMA_TXDESC_PRI_DESC(txdesc_ring, *hw_next_to_use);
+ memset(txd, 0, sizeof(struct edma_txdesc_pri));
+ buff_addr = skb_frag_dma_map(dev, frag, 0, buf_len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, buff_addr)) {
+ dev_dbg(dev, "Unable to dma first descriptor for nr_frags tx\n");
+ *hw_next_to_use = start_hw_next_to_use;
+ *invalid_frag = nr_frags;
+ return 0;
+ }
+
+ EDMA_TXDESC_BUFFER_ADDR_SET(txd, buff_addr);
+ EDMA_TXDESC_DATA_LEN_SET(txd, buf_len);
+ EDMA_TXDESC_POOL_ID_SET(txd, (i + 1));
+
+ *hw_next_to_use = ((*hw_next_to_use + 1) & EDMA_TX_RING_SIZE_MASK);
+ i++;
+ }
+
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ /* This will be the index previous to that of current *hw_next_to_use. */
+ end_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK) & EDMA_TX_RING_SIZE_MASK);
+
+ *txdesc = txd;
+
+ return num_descs;
+}
+
+static void edma_tx_fill_pp_desc(struct edma_port_priv *port_priv,
+ struct edma_txdesc_pri *txd, struct sk_buff *skb,
+ struct edma_port_tx_stats *stats)
+{
+ struct ppe_port *port = port_priv->ppe_port;
+ int port_id = port->port_id;
+
+ /* Offload L3/L4 checksum computation. */
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ EDMA_TXDESC_ADV_OFFLOAD_SET(txd);
+ EDMA_TXDESC_IP_CSUM_SET(txd);
+ EDMA_TXDESC_L4_CSUM_SET(txd);
+ }
+
+ /* Check if the packet needs TSO
+ * This will be mostly true for SG packets.
+ */
+ if (unlikely(skb_is_gso(skb))) {
+ if ((skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) ||
+ (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6)) {
+ u32 mss = skb_shinfo(skb)->gso_size;
+
+ /* If MSS<256, HW will do TSO using MSS=256,
+ * if MSS>10K, HW will do TSO using MSS=10K,
+ * else HW will report error 0x200000 in Tx Cmpl.
+ */
+ if (mss < EDMA_TX_TSO_MSS_MIN)
+ mss = EDMA_TX_TSO_MSS_MIN;
+ else if (mss > EDMA_TX_TSO_MSS_MAX)
+ mss = EDMA_TX_TSO_MSS_MAX;
+
+ EDMA_TXDESC_TSO_ENABLE_SET(txd, 1);
+ EDMA_TXDESC_MSS_SET(txd, mss);
+
+ /* Update tso stats. */
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_tso_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ }
+ }
+
+ /* Set destination information in the descriptor. */
+ EDMA_TXDESC_SERVICE_CODE_SET(txd, PPE_EDMA_SC_BYPASS_ID);
+ EDMA_DST_INFO_SET(txd, port_id);
+}
+
+static struct edma_txdesc_pri *edma_tx_skb_first_desc(struct edma_port_priv *port_priv,
+ struct edma_txdesc_ring *txdesc_ring,
+ struct sk_buff *skb, u32 *hw_next_to_use,
+ struct edma_port_tx_stats *stats)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct edma_txdesc_pri *txd = NULL;
+ struct device *dev = ppe_dev->dev;
+ dma_addr_t buff_addr;
+ u32 buf_len = 0;
+
+ /* Get the packet length. */
+ buf_len = skb_headlen(skb);
+ txd = EDMA_TXDESC_PRI_DESC(txdesc_ring, *hw_next_to_use);
+ memset(txd, 0, sizeof(struct edma_txdesc_pri));
+
+ /* Set the data pointer as the buffer address in the descriptor. */
+ buff_addr = dma_map_single(dev, skb->data, buf_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, buff_addr)) {
+ dev_dbg(dev, "Unable to dma first descriptor for tx\n");
+ return NULL;
+ }
+
+ EDMA_TXDESC_BUFFER_ADDR_SET(txd, buff_addr);
+ EDMA_TXDESC_POOL_ID_SET(txd, 0);
+ edma_tx_fill_pp_desc(port_priv, txd, skb, stats);
+
+ /* Set packet length in the descriptor. */
+ EDMA_TXDESC_DATA_LEN_SET(txd, buf_len);
+ *hw_next_to_use = (*hw_next_to_use + 1) & EDMA_TX_RING_SIZE_MASK;
+
+ return txd;
+}
+
+static void edma_tx_handle_dma_err(struct sk_buff *skb, u32 num_sg_frag_list)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+ struct sk_buff *iter_skb = NULL;
+ u32 cnt_sg_frag_list = 0;
+
+ /* Walk through all fraglist skbs. */
+ skb_walk_frags(skb, iter_skb) {
+ if (skb_headlen(iter_skb)) {
+ dma_unmap_single(dev, virt_to_phys(iter_skb->data),
+ skb_headlen(iter_skb), DMA_TO_DEVICE);
+ cnt_sg_frag_list += 1;
+ }
+
+ if (cnt_sg_frag_list == num_sg_frag_list)
+ return;
+
+ /* skb fraglist skb had nr_frags, unmap that memory. */
+ u32 nr_frags = skb_shinfo(iter_skb)->nr_frags;
+
+ if (nr_frags == 0)
+ continue;
+
+ for (int i = 0; i < nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(iter_skb)->frags[i];
+
+ /* DMA mapping was not done for zero size segments. */
+ if (unlikely(skb_frag_size(frag) == 0))
+ continue;
+
+ dma_unmap_page(dev, virt_to_phys(frag),
+ PAGE_SIZE, DMA_TO_DEVICE);
+ cnt_sg_frag_list += 1;
+ if (cnt_sg_frag_list == num_sg_frag_list)
+ return;
+ }
+ }
+}
+
+static u32 edma_tx_skb_sg_fill_desc(struct edma_txdesc_ring *txdesc_ring,
+ struct edma_txdesc_pri **txdesc,
+ struct sk_buff *skb, u32 *hw_next_to_use,
+ struct edma_port_tx_stats *stats)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ u32 start_hw_next_to_use = 0, invalid_frag = 0;
+ struct edma_txdesc_pri *txd = *txdesc;
+ struct device *dev = ppe_dev->dev;
+ struct sk_buff *iter_skb = NULL;
+ u32 buf_len = 0, num_descs = 0;
+ u32 num_sg_frag_list = 0;
+
+ /* Head skb processed already. */
+ num_descs++;
+
+ if (unlikely(skb_has_frag_list(skb))) {
+ struct edma_txdesc_pri *start_desc = NULL;
+ u32 start_idx = 0, end_idx = 0;
+
+ /* Hold onto the index mapped to txd.
+ * This will be the index previous to that of current *hw_next_to_use.
+ */
+ start_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK)
+ & EDMA_TX_RING_SIZE_MASK);
+ start_desc = txd;
+ start_hw_next_to_use = *hw_next_to_use;
+
+ /* Walk through all fraglist skbs. */
+ skb_walk_frags(skb, iter_skb) {
+ dma_addr_t buff_addr;
+ u32 num_nr_frag = 0;
+
+ /* This case could happen during the packet decapsulation.
+ * All header content might be removed.
+ */
+ buf_len = skb_headlen(iter_skb);
+ if (unlikely(buf_len == 0))
+ goto skip_primary;
+
+ /* We make sure to flush this descriptor later. */
+ EDMA_TXDESC_MORE_BIT_SET(txd, 1);
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ txd = EDMA_TXDESC_PRI_DESC(txdesc_ring, *hw_next_to_use);
+ memset(txd, 0, sizeof(struct edma_txdesc_pri));
+ buff_addr = dma_map_single(dev, iter_skb->data,
+ buf_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, buff_addr)) {
+ dev_dbg(dev, "Unable to dma for fraglist\n");
+ goto dma_err;
+ }
+
+ EDMA_TXDESC_BUFFER_ADDR_SET(txd, buff_addr);
+ EDMA_TXDESC_DATA_LEN_SET(txd, buf_len);
+ EDMA_TXDESC_POOL_ID_SET(txd, 0);
+
+ *hw_next_to_use = (*hw_next_to_use + 1) & EDMA_TX_RING_SIZE_MASK;
+ num_descs += 1;
+ num_sg_frag_list += 1;
+
+ /* skb fraglist skb can have nr_frags. */
+skip_primary:
+ if (unlikely(skb_shinfo(iter_skb)->nr_frags)) {
+ num_nr_frag = edma_tx_skb_nr_frags(txdesc_ring, &txd,
+ iter_skb, hw_next_to_use,
+ &invalid_frag);
+ if (unlikely(!num_nr_frag)) {
+ dev_dbg(dev, "No descriptor available for ring %d\n",
+ txdesc_ring->id);
+ edma_tx_dma_unmap_frags(iter_skb, invalid_frag);
+ goto dma_err;
+ }
+
+ num_descs += num_nr_frag;
+ num_sg_frag_list += num_nr_frag;
+
+ /* Update fraglist with nr_frag stats. */
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_fraglist_with_nr_frags_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ }
+ }
+
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ /* This will be the index previous to
+ * that of current *hw_next_to_use.
+ */
+ end_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK) &
+ EDMA_TX_RING_SIZE_MASK);
+
+ /* Update frag_list stats. */
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_fraglist_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ } else {
+ /* Process skb with nr_frags. */
+ num_descs += edma_tx_skb_nr_frags(txdesc_ring, &txd, skb,
+ hw_next_to_use, &invalid_frag);
+ if (unlikely(!num_descs)) {
+ dev_dbg(dev, "No descriptor available for ring %d\n", txdesc_ring->id);
+ edma_tx_dma_unmap_frags(skb, invalid_frag);
+ *txdesc = NULL;
+ return num_descs;
+ }
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_nr_frag_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ }
+
+ dev_dbg(dev, "skb:%p num_descs_filled: %u, nr_frags %u, frag_list fragments %u\n",
+ skb, num_descs, skb_shinfo(skb)->nr_frags, num_sg_frag_list);
+
+ *txdesc = txd;
+
+ return num_descs;
+
+dma_err:
+ if (!num_sg_frag_list)
+ goto reset_state;
+
+ edma_tx_handle_dma_err(skb, num_sg_frag_list);
+
+reset_state:
+ *hw_next_to_use = start_hw_next_to_use;
+ *txdesc = NULL;
+
+ return 0;
+}
+
+static u32 edma_tx_avail_desc(struct edma_txdesc_ring *txdesc_ring,
+ u32 hw_next_to_use)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ u32 data = 0, avail = 0, hw_next_to_clean = 0;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 reg;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CONS_IDX(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ hw_next_to_clean = data & EDMA_TXDESC_CONS_IDX_MASK;
+
+ avail = EDMA_DESC_AVAIL_COUNT(hw_next_to_clean - 1,
+ hw_next_to_use, EDMA_TX_RING_SIZE);
+
+ return avail;
+}
+
+/**
+ * edma_tx_ring_xmit - Transmit a packet.
+ * @netdev: Netdevice.
+ * @skb: Socket Buffer.
+ * @txdesc_ring: Tx Descriptor ring.
+ * @stats: EDMA Tx Statistics.
+ *
+ * Check for available descriptors, fill the descriptors
+ * and transmit both linear and non linear packets.
+ *
+ * Return 0 on success, negative error code on failure.
+ */
+enum edma_tx_status edma_tx_ring_xmit(struct net_device *netdev,
+ struct sk_buff *skb, struct edma_txdesc_ring *txdesc_ring,
+ struct edma_port_tx_stats *stats)
+{
+ struct edma_txdesc_stats *txdesc_stats = &txdesc_ring->txdesc_stats;
+ struct edma_port_priv *port_priv = netdev_priv(netdev);
+ u32 num_tx_desc_needed = 0, num_desc_filled = 0;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct ppe_port *port = port_priv->ppe_port;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_pri *txdesc = NULL;
+ struct device *dev = ppe_dev->dev;
+ int port_id = port->port_id;
+ u32 hw_next_to_use = 0;
+ u32 reg;
+
+ hw_next_to_use = txdesc_ring->prod_idx;
+
+ if (unlikely(!(txdesc_ring->avail_desc))) {
+ txdesc_ring->avail_desc = edma_tx_avail_desc(txdesc_ring,
+ hw_next_to_use);
+ if (unlikely(!txdesc_ring->avail_desc)) {
+ netdev_dbg(netdev, "No available descriptors are present at %d ring\n",
+ txdesc_ring->id);
+
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+ }
+
+ /* Process head skb for linear skb.
+ * Process head skb + nr_frags + fraglist for non linear skb.
+ */
+ if (likely(!skb_is_nonlinear(skb))) {
+ txdesc = edma_tx_skb_first_desc(port_priv, txdesc_ring, skb,
+ &hw_next_to_use, stats);
+ if (unlikely(!txdesc)) {
+ netdev_dbg(netdev, "No descriptor available for ring %d\n",
+ txdesc_ring->id);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+
+ EDMA_TXDESC_ENDIAN_SET(txdesc);
+ num_desc_filled++;
+ } else {
+ num_tx_desc_needed = edma_tx_num_descs_for_sg(skb);
+
+ /* HW does not support TSO for packets with more than 32 segments.
+ * HW hangs up if it sees more than 32 segments. Kernel Perform GSO
+ * for such packets with netdev gso_max_segs set to 32.
+ */
+ if (unlikely(num_tx_desc_needed > EDMA_TX_TSO_SEG_MAX)) {
+ netdev_dbg(netdev, "Number of segments %u more than %u for %d ring\n",
+ num_tx_desc_needed, EDMA_TX_TSO_SEG_MAX, txdesc_ring->id);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->tso_max_seg_exceed;
+ u64_stats_update_end(&txdesc_stats->syncp);
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_tso_drop_pkts++;
+ u64_stats_update_end(&stats->syncp);
+
+ return EDMA_TX_FAIL;
+ }
+
+ if (unlikely(num_tx_desc_needed > txdesc_ring->avail_desc)) {
+ txdesc_ring->avail_desc = edma_tx_avail_desc(txdesc_ring,
+ hw_next_to_use);
+ if (num_tx_desc_needed > txdesc_ring->avail_desc) {
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ netdev_dbg(netdev, "Not enough available descriptors are present at %d ring for SG packet. Needed %d, currently available %d\n",
+ txdesc_ring->id, num_tx_desc_needed,
+ txdesc_ring->avail_desc);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+ }
+
+ txdesc = edma_tx_skb_first_desc(port_priv, txdesc_ring, skb,
+ &hw_next_to_use, stats);
+ if (unlikely(!txdesc)) {
+ netdev_dbg(netdev, "No non-linear descriptor available for ring %d\n",
+ txdesc_ring->id);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+
+ num_desc_filled = edma_tx_skb_sg_fill_desc(txdesc_ring,
+ &txdesc, skb, &hw_next_to_use, stats);
+ if (unlikely(!txdesc)) {
+ netdev_dbg(netdev, "No descriptor available for ring %d\n",
+ txdesc_ring->id);
+ dma_unmap_single(dev, virt_to_phys(skb->data),
+ skb->len, DMA_TO_DEVICE);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+ }
+
+ /* Set the skb pointer to the descriptor's opaque field/s
+ * on the last descriptor of the packet/SG packet.
+ */
+ EDMA_TXDESC_OPAQUE_SET(txdesc, skb);
+
+ /* Update producer index. */
+ txdesc_ring->prod_idx = hw_next_to_use & EDMA_TXDESC_PROD_IDX_MASK;
+ txdesc_ring->avail_desc -= num_desc_filled;
+
+ netdev_dbg(netdev, "%s: skb:%p tx_ring:%u proto:0x%x skb->len:%d\n port:%u prod_idx:%u ip_summed:0x%x\n",
+ netdev->name, skb, txdesc_ring->id, ntohs(skb->protocol),
+ skb->len, port_id, hw_next_to_use, skb->ip_summed);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_PROD_IDX(txdesc_ring->id);
+ regmap_write(regmap, reg, txdesc_ring->prod_idx);
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_pkts++;
+ stats->tx_bytes += skb->len;
+ u64_stats_update_end(&stats->syncp);
+
+ return EDMA_TX_OK;
+}
diff --git a/drivers/net/ethernet/qualcomm/ppe/edma_tx.h b/drivers/net/ethernet/qualcomm/ppe/edma_tx.h
new file mode 100644
index 000000000000..c09a4e0f6a42
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_tx.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __EDMA_TX__
+#define __EDMA_TX__
+
+#include "edma_port.h"
+
+#define EDMA_GET_DESC(R, i, type) (&(((type *)((R)->desc))[(i)]))
+#define EDMA_GET_PDESC(R, i, type) (&(((type *)((R)->pdesc))[(i)]))
+#define EDMA_GET_SDESC(R, i, type) (&(((type *)((R)->sdesc))[(i)]))
+#define EDMA_TXCMPL_DESC(R, i) EDMA_GET_DESC(R, i, \
+ struct edma_txcmpl_desc)
+#define EDMA_TXDESC_PRI_DESC(R, i) EDMA_GET_PDESC(R, i, \
+ struct edma_txdesc_pri)
+#define EDMA_TXDESC_SEC_DESC(R, i) EDMA_GET_SDESC(R, i, \
+ struct edma_txdesc_sec)
+
+#define EDMA_DESC_AVAIL_COUNT(head, tail, _max) ({ \
+ typeof(_max) (max) = (_max); \
+ ((((head) - (tail)) + \
+ (max)) & ((max) - 1)); })
+
+#define EDMA_TX_RING_SIZE 2048
+#define EDMA_TX_RING_SIZE_MASK (EDMA_TX_RING_SIZE - 1)
+
+/* Max segment processing capacity of HW for TSO. */
+#define EDMA_TX_TSO_SEG_MAX 32
+
+/* HW defined low and high MSS size. */
+#define EDMA_TX_TSO_MSS_MIN 256
+#define EDMA_TX_TSO_MSS_MAX 10240
+
+#define EDMA_DST_PORT_TYPE 2
+#define EDMA_DST_PORT_TYPE_SHIFT 28
+#define EDMA_DST_PORT_TYPE_MASK (0xf << EDMA_DST_PORT_TYPE_SHIFT)
+#define EDMA_DST_PORT_ID_SHIFT 16
+#define EDMA_DST_PORT_ID_MASK (0xfff << EDMA_DST_PORT_ID_SHIFT)
+
+#define EDMA_DST_PORT_TYPE_SET(x) (((x) << EDMA_DST_PORT_TYPE_SHIFT) & \
+ EDMA_DST_PORT_TYPE_MASK)
+#define EDMA_DST_PORT_ID_SET(x) (((x) << EDMA_DST_PORT_ID_SHIFT) & \
+ EDMA_DST_PORT_ID_MASK)
+#define EDMA_DST_INFO_SET(desc, x) ((desc)->word4 |= \
+ (EDMA_DST_PORT_TYPE_SET(EDMA_DST_PORT_TYPE) | EDMA_DST_PORT_ID_SET(x)))
+
+#define EDMA_TXDESC_TSO_ENABLE_MASK BIT(24)
+#define EDMA_TXDESC_TSO_ENABLE_SET(desc, x) ((desc)->word5 |= \
+ FIELD_PREP(EDMA_TXDESC_TSO_ENABLE_MASK, x))
+#define EDMA_TXDESC_MSS_MASK GENMASK(31, 16)
+#define EDMA_TXDESC_MSS_SET(desc, x) ((desc)->word6 |= \
+ FIELD_PREP(EDMA_TXDESC_MSS_MASK, x))
+#define EDMA_TXDESC_MORE_BIT_MASK BIT(30)
+#define EDMA_TXDESC_MORE_BIT_SET(desc, x) ((desc)->word1 |= \
+ FIELD_PREP(EDMA_TXDESC_MORE_BIT_MASK, x))
+
+#define EDMA_TXDESC_ADV_OFFSET_BIT BIT(31)
+#define EDMA_TXDESC_ADV_OFFLOAD_SET(desc) ((desc)->word5 |= \
+ FIELD_PREP(EDMA_TXDESC_ADV_OFFSET_BIT, 1))
+#define EDMA_TXDESC_IP_CSUM_BIT BIT(25)
+#define EDMA_TXDESC_IP_CSUM_SET(desc) ((desc)->word5 |= \
+ FIELD_PREP(EDMA_TXDESC_IP_CSUM_BIT, 1))
+
+#define EDMA_TXDESC_L4_CSUM_SET_MASK GENMASK(27, 26)
+#define EDMA_TXDESC_L4_CSUM_SET(desc) ((desc)->word5 |= \
+ (FIELD_PREP(EDMA_TXDESC_L4_CSUM_SET_MASK, 1)))
+
+#define EDMA_TXDESC_POOL_ID_SET_MASK GENMASK(24, 18)
+#define EDMA_TXDESC_POOL_ID_SET(desc, x) ((desc)->word5 |= \
+ (FIELD_PREP(EDMA_TXDESC_POOL_ID_SET_MASK, x)))
+
+#define EDMA_TXDESC_DATA_LEN_SET(desc, x) ((desc)->word5 |= ((x) & 0x1ffff))
+#define EDMA_TXDESC_SERVICE_CODE_MASK GENMASK(24, 16)
+#define EDMA_TXDESC_SERVICE_CODE_SET(desc, x) ((desc)->word1 |= \
+ (FIELD_PREP(EDMA_TXDESC_SERVICE_CODE_MASK, x)))
+#define EDMA_TXDESC_BUFFER_ADDR_SET(desc, addr) (((desc)->word0) = (addr))
+
+#ifdef __LP64__
+#define EDMA_TXDESC_OPAQUE_GET(_desc) ({ \
+ typeof(_desc) (desc) = (_desc); \
+ (((u64)(desc)->word3 << 32) | (desc)->word2); })
+
+#define EDMA_TXCMPL_OPAQUE_GET(_desc) ({ \
+ typeof(_desc) (desc) = (_desc); \
+ (((u64)(desc)->word1 << 32) | \
+ (desc)->word0); })
+
+#define EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr) ((desc)->word2 = \
+ (u32)(uintptr_t)(ptr))
+
+#define EDMA_TXDESC_OPAQUE_HI_SET(desc, ptr) ((desc)->word3 = \
+ (u32)((u64)(ptr) >> 32))
+
+#define EDMA_TXDESC_OPAQUE_SET(_desc, _ptr) do { \
+ typeof(_desc) (desc) = (_desc); \
+ typeof(_ptr) (ptr) = (_ptr); \
+ EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr); \
+ EDMA_TXDESC_OPAQUE_HI_SET(desc, ptr); \
+} while (0)
+#else
+#define EDMA_TXCMPL_OPAQUE_GET(desc) ((desc)->word0)
+#define EDMA_TXDESC_OPAQUE_GET(desc) ((desc)->word2)
+#define EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr) ((desc)->word2 = (u32)(uintptr_t)ptr)
+
+#define EDMA_TXDESC_OPAQUE_SET(desc, ptr) \
+ EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr)
+#endif
+#define EDMA_TXCMPL_MORE_BIT_MASK BIT(30)
+
+#define EDMA_TXCMPL_MORE_BIT_GET(desc) ((le32_to_cpu((__force __le32)((desc)->word2))) & \
+ EDMA_TXCMPL_MORE_BIT_MASK)
+
+#define EDMA_TXCOMP_RING_ERROR_MASK GENMASK(22, 0)
+
+#define EDMA_TXCOMP_RING_ERROR_GET(x) ((le32_to_cpu((__force __le32)x)) & \
+ EDMA_TXCOMP_RING_ERROR_MASK)
+
+#define EDMA_TXCOMP_POOL_ID_MASK GENMASK(5, 0)
+
+#define EDMA_TXCOMP_POOL_ID_GET(desc) ((le32_to_cpu((__force __le32)((desc)->word2))) & \
+ EDMA_TXCOMP_POOL_ID_MASK)
+
+/* Opaque values are set in word2 and word3,
+ * they are not accessed by the EDMA HW,
+ * so endianness conversion is not needed.
+ */
+#define EDMA_TXDESC_ENDIAN_SET(_desc) ({ \
+ typeof(_desc) (desc) = (_desc); \
+ cpu_to_le32s(&((desc)->word0)); \
+ cpu_to_le32s(&((desc)->word1)); \
+ cpu_to_le32s(&((desc)->word4)); \
+ cpu_to_le32s(&((desc)->word5)); \
+ cpu_to_le32s(&((desc)->word6)); \
+ cpu_to_le32s(&((desc)->word7)); \
+})
+
+/* EDMA Tx GSO status */
+enum edma_tx_status {
+ EDMA_TX_OK = 0, /* Tx success. */
+ EDMA_TX_FAIL_NO_DESC = 1, /* Not enough descriptors. */
+ EDMA_TX_FAIL = 2, /* Tx failure. */
+};
+
+/* EDMA TX GSO status */
+enum edma_tx_gso_status {
+ EDMA_TX_GSO_NOT_NEEDED = 0,
+ /* Packet has segment count less than TX_TSO_SEG_MAX. */
+ EDMA_TX_GSO_SUCCEED = 1,
+ /* GSO Succeed. */
+ EDMA_TX_GSO_FAIL = 2,
+ /* GSO failed, drop the packet. */
+};
+
+/**
+ * struct edma_txcmpl_stats - EDMA TX complete ring statistics.
+ * @invalid_buffer: Invalid buffer address received.
+ * @errors: Other Tx complete descriptor errors indicated by the hardware.
+ * @desc_with_more_bit: Packet's segment transmit count.
+ * @no_pending_desc: No descriptor is pending for processing.
+ * @syncp: Synchronization pointer.
+ */
+struct edma_txcmpl_stats {
+ u64 invalid_buffer;
+ u64 errors;
+ u64 desc_with_more_bit;
+ u64 no_pending_desc;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct edma_txdesc_stats - EDMA Tx descriptor ring statistics.
+ * @no_desc_avail: No descriptor available to transmit.
+ * @tso_max_seg_exceed: Packets extending EDMA_TX_TSO_SEG_MAX segments.
+ * @syncp: Synchronization pointer.
+ */
+struct edma_txdesc_stats {
+ u64 no_desc_avail;
+ u64 tso_max_seg_exceed;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct edma_txdesc_pri - EDMA primary TX descriptor.
+ * @word0: Low 32-bit of buffer address.
+ * @word1: Buffer recycling, PTP tag flag, PRI valid flag.
+ * @word2: Low 32-bit of opaque value.
+ * @word3: High 32-bit of opaque value.
+ * @word4: Source/Destination port info.
+ * @word5: VLAN offload, csum mode, ip_csum_en, tso_en, data len.
+ * @word6: MSS/hash_value/PTP tag, data offset.
+ * @word7: L4/L3 offset, PROT type, L2 type, CVLAN/SVLAN tag, service code.
+ */
+struct edma_txdesc_pri {
+ u32 word0;
+ u32 word1;
+ u32 word2;
+ u32 word3;
+ u32 word4;
+ u32 word5;
+ u32 word6;
+ u32 word7;
+};
+
+/**
+ * struct edma_txdesc_sec - EDMA secondary TX descriptor.
+ * @word0: Reserved.
+ * @word1: Custom csum offset, payload offset, TTL/NAT action.
+ * @word2: NAPT translated port, DSCP value, TTL value.
+ * @word3: Flow index value and valid flag.
+ * @word4: Reserved.
+ * @word5: Reserved.
+ * @word6: CVLAN/SVLAN command.
+ * @word7: CVLAN/SVLAN tag value.
+ */
+struct edma_txdesc_sec {
+ u32 word0;
+ u32 word1;
+ u32 word2;
+ u32 word3;
+ u32 word4;
+ u32 word5;
+ u32 word6;
+ u32 word7;
+};
+
+/**
+ * struct edma_txcmpl_desc - EDMA TX complete descriptor.
+ * @word0: Low 32-bit opaque value.
+ * @word1: High 32-bit opaque value.
+ * @word2: More fragment, transmit ring id, pool id.
+ * @word3: Error indications.
+ */
+struct edma_txcmpl_desc {
+ u32 word0;
+ u32 word1;
+ u32 word2;
+ u32 word3;
+};
+
+/**
+ * struct edma_txdesc_ring - EDMA TX descriptor ring
+ * @prod_idx: Producer index
+ * @id: Tx ring number
+ * @avail_desc: Number of available descriptor to process
+ * @pdesc: Primary descriptor ring virtual address
+ * @pdma: Primary descriptor ring physical address
+ * @sdesc: Secondary descriptor ring virtual address
+ * @tx_desc_stats: Tx descriptor ring statistics
+ * @sdma: Secondary descriptor ring physical address
+ * @count: Number of descriptors
+ * @fc_grp_id: Flow control group ID
+ */
+struct edma_txdesc_ring {
+ u32 prod_idx;
+ u32 id;
+ u32 avail_desc;
+ struct edma_txdesc_pri *pdesc;
+ dma_addr_t pdma;
+ struct edma_txdesc_sec *sdesc;
+ struct edma_txdesc_stats txdesc_stats;
+ dma_addr_t sdma;
+ u32 count;
+ u8 fc_grp_id;
+};
+
+/**
+ * struct edma_txcmpl_ring - EDMA TX complete ring
+ * @napi: NAPI
+ * @cons_idx: Consumer index
+ * @avail_pkt: Number of available packets to process
+ * @desc: Descriptor ring virtual address
+ * @id: Txcmpl ring number
+ * @tx_cmpl_stats: Tx complete ring statistics
+ * @dma: Descriptor ring physical address
+ * @count: Number of descriptors in the ring
+ * @napi_added: Flag to indicate NAPI add status
+ */
+struct edma_txcmpl_ring {
+ struct napi_struct napi;
+ u32 cons_idx;
+ u32 avail_pkt;
+ struct edma_txcmpl_desc *desc;
+ u32 id;
+ struct edma_txcmpl_stats txcmpl_stats;
+ dma_addr_t dma;
+ u32 count;
+ bool napi_added;
+};
+
+enum edma_tx_status edma_tx_ring_xmit(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct edma_txdesc_ring *txdesc_ring,
+ struct edma_port_tx_stats *stats);
+u32 edma_tx_complete(u32 work_to_do,
+ struct edma_txcmpl_ring *txcmpl_ring);
+irqreturn_t edma_tx_handle_irq(int irq, void *ctx);
+int edma_tx_napi_poll(struct napi_struct *napi, int budget);
+enum edma_tx_gso_status edma_tx_gso_segment(struct sk_buff *skb,
+ struct net_device *netdev, struct sk_buff **segs);
+
+#endif
--
2.45.2