mirror of
https://github.com/openwrt/openwrt.git
synced 2024-12-24 15:56:49 +00:00
396 lines
13 KiB
Diff
396 lines
13 KiB
Diff
|
From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
|
||
|
From: Sricharan R <sricharan@codeaurora.org>
|
||
|
Date: Mon, 28 Aug 2017 20:30:24 +0530
|
||
|
Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
|
||
|
|
||
|
The bam dmaengine has a circular FIFO to which we
|
||
|
add hw descriptors that describes the transaction.
|
||
|
The FIFO has space for about 4096 hw descriptors.
|
||
|
|
||
|
Currently we add one descriptor and wait for it to
|
||
|
complete with interrupt and then add the next pending
|
||
|
descriptor. In this way, the FIFO is underutilized
|
||
|
since only one descriptor is processed at a time, although
|
||
|
there is space in FIFO for the BAM to process more.
|
||
|
|
||
|
Instead keep adding descriptors to FIFO till its full,
|
||
|
that allows BAM to continue to work on the next descriptor
|
||
|
immediately after signalling completion interrupt for the
|
||
|
previous descriptor.
|
||
|
|
||
|
Also when the client has not set the DMA_PREP_INTERRUPT for
|
||
|
a descriptor, then do not configure BAM to trigger a interrupt
|
||
|
upon completion of that descriptor. This way we get a interrupt
|
||
|
only for the descriptor for which DMA_PREP_INTERRUPT was
|
||
|
requested and there signal completion of all the previous completed
|
||
|
descriptors. So we still do callbacks for all requested descriptors,
|
||
|
but just that the number of interrupts are reduced.
|
||
|
|
||
|
CURRENT:
|
||
|
|
||
|
------ ------- ---------------
|
||
|
|DES 0| |DESC 1| |DESC 2 + INT |
|
||
|
------ ------- ---------------
|
||
|
| | |
|
||
|
| | |
|
||
|
INTERRUPT: (INT) (INT) (INT)
|
||
|
CALLBACK: (CB) (CB) (CB)
|
||
|
|
||
|
MTD_SPEEDTEST READ PAGE: 3560 KiB/s
|
||
|
MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
|
||
|
IOZONE READ: 2456 KB/s
|
||
|
IOZONE WRITE: 1230 KB/s
|
||
|
|
||
|
bam dma interrupts (after tests): 96508
|
||
|
|
||
|
CHANGE:
|
||
|
|
||
|
------ ------- -------------
|
||
|
|DES 0| |DESC 1 |DESC 2 + INT |
|
||
|
------ ------- --------------
|
||
|
|
|
||
|
|
|
||
|
(INT)
|
||
|
(CB for 0, 1, 2)
|
||
|
|
||
|
MTD_SPEEDTEST READ PAGE: 3860 KiB/s
|
||
|
MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
|
||
|
IOZONE READ: 2677 KB/s
|
||
|
IOZONE WRITE: 1308 KB/s
|
||
|
|
||
|
bam dma interrupts (after tests): 58806
|
||
|
|
||
|
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
|
||
|
Reviewed-by: Andy Gross <andy.gross@linaro.org>
|
||
|
Tested-by: Abhishek Sahu <absahu@codeaurora.org>
|
||
|
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
|
||
|
---
|
||
|
drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
|
||
|
1 file changed, 109 insertions(+), 60 deletions(-)
|
||
|
|
||
|
--- a/drivers/dma/qcom/bam_dma.c
|
||
|
+++ b/drivers/dma/qcom/bam_dma.c
|
||
|
@@ -46,6 +46,7 @@
|
||
|
#include <linux/of_address.h>
|
||
|
#include <linux/of_irq.h>
|
||
|
#include <linux/of_dma.h>
|
||
|
+#include <linux/circ_buf.h>
|
||
|
#include <linux/clk.h>
|
||
|
#include <linux/dmaengine.h>
|
||
|
#include <linux/pm_runtime.h>
|
||
|
@@ -78,6 +79,8 @@ struct bam_async_desc {
|
||
|
|
||
|
struct bam_desc_hw *curr_desc;
|
||
|
|
||
|
+ /* list node for the desc in the bam_chan list of descriptors */
|
||
|
+ struct list_head desc_node;
|
||
|
enum dma_transfer_direction dir;
|
||
|
size_t length;
|
||
|
struct bam_desc_hw desc[0];
|
||
|
@@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
|
||
|
#define BAM_DESC_FIFO_SIZE SZ_32K
|
||
|
#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
|
||
|
#define BAM_FIFO_SIZE (SZ_32K - 8)
|
||
|
+#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\
|
||
|
+ MAX_DESCRIPTORS + 1) == 0)
|
||
|
|
||
|
struct bam_chan {
|
||
|
struct virt_dma_chan vc;
|
||
|
@@ -356,8 +361,6 @@ struct bam_chan {
|
||
|
/* configuration from device tree */
|
||
|
u32 id;
|
||
|
|
||
|
- struct bam_async_desc *curr_txd; /* current running dma */
|
||
|
-
|
||
|
/* runtime configuration */
|
||
|
struct dma_slave_config slave;
|
||
|
|
||
|
@@ -372,6 +375,8 @@ struct bam_chan {
|
||
|
unsigned int initialized; /* is the channel hw initialized? */
|
||
|
unsigned int paused; /* is the channel paused? */
|
||
|
unsigned int reconfigure; /* new slave config? */
|
||
|
+ /* list of descriptors currently processed */
|
||
|
+ struct list_head desc_list;
|
||
|
|
||
|
struct list_head node;
|
||
|
};
|
||
|
@@ -539,7 +544,7 @@ static void bam_free_chan(struct dma_cha
|
||
|
|
||
|
vchan_free_chan_resources(to_virt_chan(chan));
|
||
|
|
||
|
- if (bchan->curr_txd) {
|
||
|
+ if (!list_empty(&bchan->desc_list)) {
|
||
|
dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
|
||
|
goto err;
|
||
|
}
|
||
|
@@ -632,8 +637,6 @@ static struct dma_async_tx_descriptor *b
|
||
|
|
||
|
if (flags & DMA_PREP_INTERRUPT)
|
||
|
async_desc->flags |= DESC_FLAG_EOT;
|
||
|
- else
|
||
|
- async_desc->flags |= DESC_FLAG_INT;
|
||
|
|
||
|
async_desc->num_desc = num_alloc;
|
||
|
async_desc->curr_desc = async_desc->desc;
|
||
|
@@ -684,14 +687,16 @@ err_out:
|
||
|
static int bam_dma_terminate_all(struct dma_chan *chan)
|
||
|
{
|
||
|
struct bam_chan *bchan = to_bam_chan(chan);
|
||
|
+ struct bam_async_desc *async_desc, *tmp;
|
||
|
unsigned long flag;
|
||
|
LIST_HEAD(head);
|
||
|
|
||
|
/* remove all transactions, including active transaction */
|
||
|
spin_lock_irqsave(&bchan->vc.lock, flag);
|
||
|
- if (bchan->curr_txd) {
|
||
|
- list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
|
||
|
- bchan->curr_txd = NULL;
|
||
|
+ list_for_each_entry_safe(async_desc, tmp,
|
||
|
+ &bchan->desc_list, desc_node) {
|
||
|
+ list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
|
||
|
+ list_del(&async_desc->desc_node);
|
||
|
}
|
||
|
|
||
|
vchan_get_all_descriptors(&bchan->vc, &head);
|
||
|
@@ -763,9 +768,9 @@ static int bam_resume(struct dma_chan *c
|
||
|
*/
|
||
|
static u32 process_channel_irqs(struct bam_device *bdev)
|
||
|
{
|
||
|
- u32 i, srcs, pipe_stts;
|
||
|
+ u32 i, srcs, pipe_stts, offset, avail;
|
||
|
unsigned long flags;
|
||
|
- struct bam_async_desc *async_desc;
|
||
|
+ struct bam_async_desc *async_desc, *tmp;
|
||
|
|
||
|
srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
|
||
|
|
||
|
@@ -785,27 +790,40 @@ static u32 process_channel_irqs(struct b
|
||
|
writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
|
||
|
|
||
|
spin_lock_irqsave(&bchan->vc.lock, flags);
|
||
|
- async_desc = bchan->curr_txd;
|
||
|
|
||
|
- if (async_desc) {
|
||
|
- async_desc->num_desc -= async_desc->xfer_len;
|
||
|
- async_desc->curr_desc += async_desc->xfer_len;
|
||
|
- bchan->curr_txd = NULL;
|
||
|
+ offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
|
||
|
+ P_SW_OFSTS_MASK;
|
||
|
+ offset /= sizeof(struct bam_desc_hw);
|
||
|
+
|
||
|
+ /* Number of bytes available to read */
|
||
|
+ avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
|
||
|
+
|
||
|
+ list_for_each_entry_safe(async_desc, tmp,
|
||
|
+ &bchan->desc_list, desc_node) {
|
||
|
+ /* Not enough data to read */
|
||
|
+ if (avail < async_desc->xfer_len)
|
||
|
+ break;
|
||
|
|
||
|
/* manage FIFO */
|
||
|
bchan->head += async_desc->xfer_len;
|
||
|
bchan->head %= MAX_DESCRIPTORS;
|
||
|
|
||
|
+ async_desc->num_desc -= async_desc->xfer_len;
|
||
|
+ async_desc->curr_desc += async_desc->xfer_len;
|
||
|
+ avail -= async_desc->xfer_len;
|
||
|
+
|
||
|
/*
|
||
|
- * if complete, process cookie. Otherwise
|
||
|
+ * if complete, process cookie. Otherwise
|
||
|
* push back to front of desc_issued so that
|
||
|
* it gets restarted by the tasklet
|
||
|
*/
|
||
|
- if (!async_desc->num_desc)
|
||
|
+ if (!async_desc->num_desc) {
|
||
|
vchan_cookie_complete(&async_desc->vd);
|
||
|
- else
|
||
|
+ } else {
|
||
|
list_add(&async_desc->vd.node,
|
||
|
- &bchan->vc.desc_issued);
|
||
|
+ &bchan->vc.desc_issued);
|
||
|
+ }
|
||
|
+ list_del(&async_desc->desc_node);
|
||
|
}
|
||
|
|
||
|
spin_unlock_irqrestore(&bchan->vc.lock, flags);
|
||
|
@@ -867,6 +885,7 @@ static enum dma_status bam_tx_status(str
|
||
|
struct dma_tx_state *txstate)
|
||
|
{
|
||
|
struct bam_chan *bchan = to_bam_chan(chan);
|
||
|
+ struct bam_async_desc *async_desc;
|
||
|
struct virt_dma_desc *vd;
|
||
|
int ret;
|
||
|
size_t residue = 0;
|
||
|
@@ -882,11 +901,17 @@ static enum dma_status bam_tx_status(str
|
||
|
|
||
|
spin_lock_irqsave(&bchan->vc.lock, flags);
|
||
|
vd = vchan_find_desc(&bchan->vc, cookie);
|
||
|
- if (vd)
|
||
|
+ if (vd) {
|
||
|
residue = container_of(vd, struct bam_async_desc, vd)->length;
|
||
|
- else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
|
||
|
- for (i = 0; i < bchan->curr_txd->num_desc; i++)
|
||
|
- residue += bchan->curr_txd->curr_desc[i].size;
|
||
|
+ } else {
|
||
|
+ list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
|
||
|
+ if (async_desc->vd.tx.cookie != cookie)
|
||
|
+ continue;
|
||
|
+
|
||
|
+ for (i = 0; i < async_desc->num_desc; i++)
|
||
|
+ residue += async_desc->curr_desc[i].size;
|
||
|
+ }
|
||
|
+ }
|
||
|
|
||
|
spin_unlock_irqrestore(&bchan->vc.lock, flags);
|
||
|
|
||
|
@@ -927,63 +952,86 @@ static void bam_start_dma(struct bam_cha
|
||
|
{
|
||
|
struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
|
||
|
struct bam_device *bdev = bchan->bdev;
|
||
|
- struct bam_async_desc *async_desc;
|
||
|
+ struct bam_async_desc *async_desc = NULL;
|
||
|
struct bam_desc_hw *desc;
|
||
|
struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
|
||
|
sizeof(struct bam_desc_hw));
|
||
|
int ret;
|
||
|
+ unsigned int avail;
|
||
|
+ struct dmaengine_desc_callback cb;
|
||
|
|
||
|
lockdep_assert_held(&bchan->vc.lock);
|
||
|
|
||
|
if (!vd)
|
||
|
return;
|
||
|
|
||
|
- list_del(&vd->node);
|
||
|
-
|
||
|
- async_desc = container_of(vd, struct bam_async_desc, vd);
|
||
|
- bchan->curr_txd = async_desc;
|
||
|
-
|
||
|
ret = pm_runtime_get_sync(bdev->dev);
|
||
|
if (ret < 0)
|
||
|
return;
|
||
|
|
||
|
- /* on first use, initialize the channel hardware */
|
||
|
- if (!bchan->initialized)
|
||
|
- bam_chan_init_hw(bchan, async_desc->dir);
|
||
|
-
|
||
|
- /* apply new slave config changes, if necessary */
|
||
|
- if (bchan->reconfigure)
|
||
|
- bam_apply_new_config(bchan, async_desc->dir);
|
||
|
+ while (vd && !IS_BUSY(bchan)) {
|
||
|
+ list_del(&vd->node);
|
||
|
|
||
|
- desc = bchan->curr_txd->curr_desc;
|
||
|
+ async_desc = container_of(vd, struct bam_async_desc, vd);
|
||
|
|
||
|
- if (async_desc->num_desc > MAX_DESCRIPTORS)
|
||
|
- async_desc->xfer_len = MAX_DESCRIPTORS;
|
||
|
- else
|
||
|
- async_desc->xfer_len = async_desc->num_desc;
|
||
|
+ /* on first use, initialize the channel hardware */
|
||
|
+ if (!bchan->initialized)
|
||
|
+ bam_chan_init_hw(bchan, async_desc->dir);
|
||
|
|
||
|
- /* set any special flags on the last descriptor */
|
||
|
- if (async_desc->num_desc == async_desc->xfer_len)
|
||
|
- desc[async_desc->xfer_len - 1].flags |=
|
||
|
- cpu_to_le16(async_desc->flags);
|
||
|
- else
|
||
|
- desc[async_desc->xfer_len - 1].flags |=
|
||
|
- cpu_to_le16(DESC_FLAG_INT);
|
||
|
+ /* apply new slave config changes, if necessary */
|
||
|
+ if (bchan->reconfigure)
|
||
|
+ bam_apply_new_config(bchan, async_desc->dir);
|
||
|
+
|
||
|
+ desc = async_desc->curr_desc;
|
||
|
+ avail = CIRC_SPACE(bchan->tail, bchan->head,
|
||
|
+ MAX_DESCRIPTORS + 1);
|
||
|
+
|
||
|
+ if (async_desc->num_desc > avail)
|
||
|
+ async_desc->xfer_len = avail;
|
||
|
+ else
|
||
|
+ async_desc->xfer_len = async_desc->num_desc;
|
||
|
+
|
||
|
+ /* set any special flags on the last descriptor */
|
||
|
+ if (async_desc->num_desc == async_desc->xfer_len)
|
||
|
+ desc[async_desc->xfer_len - 1].flags |=
|
||
|
+ cpu_to_le16(async_desc->flags);
|
||
|
|
||
|
- if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
|
||
|
- u32 partial = MAX_DESCRIPTORS - bchan->tail;
|
||
|
+ vd = vchan_next_desc(&bchan->vc);
|
||
|
|
||
|
- memcpy(&fifo[bchan->tail], desc,
|
||
|
- partial * sizeof(struct bam_desc_hw));
|
||
|
- memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
|
||
|
+ dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
|
||
|
+
|
||
|
+ /*
|
||
|
+ * An interrupt is generated at this desc, if
|
||
|
+ * - FIFO is FULL.
|
||
|
+ * - No more descriptors to add.
|
||
|
+ * - If a callback completion was requested for this DESC,
|
||
|
+ * In this case, BAM will deliver the completion callback
|
||
|
+ * for this desc and continue processing the next desc.
|
||
|
+ */
|
||
|
+ if (((avail <= async_desc->xfer_len) || !vd ||
|
||
|
+ dmaengine_desc_callback_valid(&cb)) &&
|
||
|
+ !(async_desc->flags & DESC_FLAG_EOT))
|
||
|
+ desc[async_desc->xfer_len - 1].flags |=
|
||
|
+ cpu_to_le16(DESC_FLAG_INT);
|
||
|
+
|
||
|
+ if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
|
||
|
+ u32 partial = MAX_DESCRIPTORS - bchan->tail;
|
||
|
+
|
||
|
+ memcpy(&fifo[bchan->tail], desc,
|
||
|
+ partial * sizeof(struct bam_desc_hw));
|
||
|
+ memcpy(fifo, &desc[partial],
|
||
|
+ (async_desc->xfer_len - partial) *
|
||
|
sizeof(struct bam_desc_hw));
|
||
|
- } else {
|
||
|
- memcpy(&fifo[bchan->tail], desc,
|
||
|
- async_desc->xfer_len * sizeof(struct bam_desc_hw));
|
||
|
- }
|
||
|
+ } else {
|
||
|
+ memcpy(&fifo[bchan->tail], desc,
|
||
|
+ async_desc->xfer_len *
|
||
|
+ sizeof(struct bam_desc_hw));
|
||
|
+ }
|
||
|
|
||
|
- bchan->tail += async_desc->xfer_len;
|
||
|
- bchan->tail %= MAX_DESCRIPTORS;
|
||
|
+ bchan->tail += async_desc->xfer_len;
|
||
|
+ bchan->tail %= MAX_DESCRIPTORS;
|
||
|
+ list_add_tail(&async_desc->desc_node, &bchan->desc_list);
|
||
|
+ }
|
||
|
|
||
|
/* ensure descriptor writes and dma start not reordered */
|
||
|
wmb();
|
||
|
@@ -1012,7 +1060,7 @@ static void dma_tasklet(unsigned long da
|
||
|
bchan = &bdev->channels[i];
|
||
|
spin_lock_irqsave(&bchan->vc.lock, flags);
|
||
|
|
||
|
- if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
|
||
|
+ if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
|
||
|
bam_start_dma(bchan);
|
||
|
spin_unlock_irqrestore(&bchan->vc.lock, flags);
|
||
|
}
|
||
|
@@ -1033,7 +1081,7 @@ static void bam_issue_pending(struct dma
|
||
|
spin_lock_irqsave(&bchan->vc.lock, flags);
|
||
|
|
||
|
/* if work pending and idle, start a transaction */
|
||
|
- if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
|
||
|
+ if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
|
||
|
bam_start_dma(bchan);
|
||
|
|
||
|
spin_unlock_irqrestore(&bchan->vc.lock, flags);
|
||
|
@@ -1133,6 +1181,7 @@ static void bam_channel_init(struct bam_
|
||
|
|
||
|
vchan_init(&bchan->vc, &bdev->common);
|
||
|
bchan->vc.desc_free = bam_dma_free_desc;
|
||
|
+ INIT_LIST_HEAD(&bchan->desc_list);
|
||
|
}
|
||
|
|
||
|
static const struct of_device_id bam_of_match[] = {
|