123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395 |
- From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
- From: Sricharan R <[email protected]>
- Date: Mon, 28 Aug 2017 20:30:24 +0530
- Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
- The bam dmaengine has a circular FIFO to which we
- add hw descriptors that describes the transaction.
- The FIFO has space for about 4096 hw descriptors.
- Currently we add one descriptor and wait for it to
- complete with interrupt and then add the next pending
- descriptor. In this way, the FIFO is underutilized
- since only one descriptor is processed at a time, although
- there is space in FIFO for the BAM to process more.
- Instead keep adding descriptors to FIFO till its full,
- that allows BAM to continue to work on the next descriptor
- immediately after signalling completion interrupt for the
- previous descriptor.
- Also when the client has not set the DMA_PREP_INTERRUPT for
- a descriptor, then do not configure BAM to trigger a interrupt
- upon completion of that descriptor. This way we get a interrupt
- only for the descriptor for which DMA_PREP_INTERRUPT was
- requested and there signal completion of all the previous completed
- descriptors. So we still do callbacks for all requested descriptors,
- but just that the number of interrupts are reduced.
- CURRENT:
- ------ ------- ---------------
- |DES 0| |DESC 1| |DESC 2 + INT |
- ------ ------- ---------------
- | | |
- | | |
- INTERRUPT: (INT) (INT) (INT)
- CALLBACK: (CB) (CB) (CB)
- MTD_SPEEDTEST READ PAGE: 3560 KiB/s
- MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
- IOZONE READ: 2456 KB/s
- IOZONE WRITE: 1230 KB/s
- bam dma interrupts (after tests): 96508
- CHANGE:
- ------ ------- -------------
- |DES 0| |DESC 1 |DESC 2 + INT |
- ------ ------- --------------
- |
- |
- (INT)
- (CB for 0, 1, 2)
- MTD_SPEEDTEST READ PAGE: 3860 KiB/s
- MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
- IOZONE READ: 2677 KB/s
- IOZONE WRITE: 1308 KB/s
- bam dma interrupts (after tests): 58806
- Signed-off-by: Sricharan R <[email protected]>
- Reviewed-by: Andy Gross <[email protected]>
- Tested-by: Abhishek Sahu <[email protected]>
- Signed-off-by: Vinod Koul <[email protected]>
- ---
- drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
- 1 file changed, 109 insertions(+), 60 deletions(-)
- --- a/drivers/dma/qcom/bam_dma.c
- +++ b/drivers/dma/qcom/bam_dma.c
- @@ -46,6 +46,7 @@
- #include <linux/of_address.h>
- #include <linux/of_irq.h>
- #include <linux/of_dma.h>
- +#include <linux/circ_buf.h>
- #include <linux/clk.h>
- #include <linux/dmaengine.h>
- #include <linux/pm_runtime.h>
- @@ -78,6 +79,8 @@ struct bam_async_desc {
-
- struct bam_desc_hw *curr_desc;
-
- + /* list node for the desc in the bam_chan list of descriptors */
- + struct list_head desc_node;
- enum dma_transfer_direction dir;
- size_t length;
- struct bam_desc_hw desc[0];
- @@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
- #define BAM_DESC_FIFO_SIZE SZ_32K
- #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
- #define BAM_FIFO_SIZE (SZ_32K - 8)
- +#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\
- + MAX_DESCRIPTORS + 1) == 0)
-
- struct bam_chan {
- struct virt_dma_chan vc;
- @@ -356,8 +361,6 @@ struct bam_chan {
- /* configuration from device tree */
- u32 id;
-
- - struct bam_async_desc *curr_txd; /* current running dma */
- -
- /* runtime configuration */
- struct dma_slave_config slave;
-
- @@ -372,6 +375,8 @@ struct bam_chan {
- unsigned int initialized; /* is the channel hw initialized? */
- unsigned int paused; /* is the channel paused? */
- unsigned int reconfigure; /* new slave config? */
- + /* list of descriptors currently processed */
- + struct list_head desc_list;
-
- struct list_head node;
- };
- @@ -540,7 +545,7 @@ static void bam_free_chan(struct dma_cha
-
- vchan_free_chan_resources(to_virt_chan(chan));
-
- - if (bchan->curr_txd) {
- + if (!list_empty(&bchan->desc_list)) {
- dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
- goto err;
- }
- @@ -633,8 +638,6 @@ static struct dma_async_tx_descriptor *b
-
- if (flags & DMA_PREP_INTERRUPT)
- async_desc->flags |= DESC_FLAG_EOT;
- - else
- - async_desc->flags |= DESC_FLAG_INT;
-
- async_desc->num_desc = num_alloc;
- async_desc->curr_desc = async_desc->desc;
- @@ -685,14 +688,16 @@ err_out:
- static int bam_dma_terminate_all(struct dma_chan *chan)
- {
- struct bam_chan *bchan = to_bam_chan(chan);
- + struct bam_async_desc *async_desc, *tmp;
- unsigned long flag;
- LIST_HEAD(head);
-
- /* remove all transactions, including active transaction */
- spin_lock_irqsave(&bchan->vc.lock, flag);
- - if (bchan->curr_txd) {
- - list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
- - bchan->curr_txd = NULL;
- + list_for_each_entry_safe(async_desc, tmp,
- + &bchan->desc_list, desc_node) {
- + list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
- + list_del(&async_desc->desc_node);
- }
-
- vchan_get_all_descriptors(&bchan->vc, &head);
- @@ -764,9 +769,9 @@ static int bam_resume(struct dma_chan *c
- */
- static u32 process_channel_irqs(struct bam_device *bdev)
- {
- - u32 i, srcs, pipe_stts;
- + u32 i, srcs, pipe_stts, offset, avail;
- unsigned long flags;
- - struct bam_async_desc *async_desc;
- + struct bam_async_desc *async_desc, *tmp;
-
- srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
-
- @@ -786,27 +791,40 @@ static u32 process_channel_irqs(struct b
- writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
-
- spin_lock_irqsave(&bchan->vc.lock, flags);
- - async_desc = bchan->curr_txd;
-
- - if (async_desc) {
- - async_desc->num_desc -= async_desc->xfer_len;
- - async_desc->curr_desc += async_desc->xfer_len;
- - bchan->curr_txd = NULL;
- + offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
- + P_SW_OFSTS_MASK;
- + offset /= sizeof(struct bam_desc_hw);
- +
- + /* Number of bytes available to read */
- + avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
- +
- + list_for_each_entry_safe(async_desc, tmp,
- + &bchan->desc_list, desc_node) {
- + /* Not enough data to read */
- + if (avail < async_desc->xfer_len)
- + break;
-
- /* manage FIFO */
- bchan->head += async_desc->xfer_len;
- bchan->head %= MAX_DESCRIPTORS;
-
- + async_desc->num_desc -= async_desc->xfer_len;
- + async_desc->curr_desc += async_desc->xfer_len;
- + avail -= async_desc->xfer_len;
- +
- /*
- - * if complete, process cookie. Otherwise
- + * if complete, process cookie. Otherwise
- * push back to front of desc_issued so that
- * it gets restarted by the tasklet
- */
- - if (!async_desc->num_desc)
- + if (!async_desc->num_desc) {
- vchan_cookie_complete(&async_desc->vd);
- - else
- + } else {
- list_add(&async_desc->vd.node,
- - &bchan->vc.desc_issued);
- + &bchan->vc.desc_issued);
- + }
- + list_del(&async_desc->desc_node);
- }
-
- spin_unlock_irqrestore(&bchan->vc.lock, flags);
- @@ -868,6 +886,7 @@ static enum dma_status bam_tx_status(str
- struct dma_tx_state *txstate)
- {
- struct bam_chan *bchan = to_bam_chan(chan);
- + struct bam_async_desc *async_desc;
- struct virt_dma_desc *vd;
- int ret;
- size_t residue = 0;
- @@ -883,11 +902,17 @@ static enum dma_status bam_tx_status(str
-
- spin_lock_irqsave(&bchan->vc.lock, flags);
- vd = vchan_find_desc(&bchan->vc, cookie);
- - if (vd)
- + if (vd) {
- residue = container_of(vd, struct bam_async_desc, vd)->length;
- - else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
- - for (i = 0; i < bchan->curr_txd->num_desc; i++)
- - residue += bchan->curr_txd->curr_desc[i].size;
- + } else {
- + list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
- + if (async_desc->vd.tx.cookie != cookie)
- + continue;
- +
- + for (i = 0; i < async_desc->num_desc; i++)
- + residue += async_desc->curr_desc[i].size;
- + }
- + }
-
- spin_unlock_irqrestore(&bchan->vc.lock, flags);
-
- @@ -928,63 +953,86 @@ static void bam_start_dma(struct bam_cha
- {
- struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
- struct bam_device *bdev = bchan->bdev;
- - struct bam_async_desc *async_desc;
- + struct bam_async_desc *async_desc = NULL;
- struct bam_desc_hw *desc;
- struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
- sizeof(struct bam_desc_hw));
- int ret;
- + unsigned int avail;
- + struct dmaengine_desc_callback cb;
-
- lockdep_assert_held(&bchan->vc.lock);
-
- if (!vd)
- return;
-
- - list_del(&vd->node);
- -
- - async_desc = container_of(vd, struct bam_async_desc, vd);
- - bchan->curr_txd = async_desc;
- -
- ret = pm_runtime_get_sync(bdev->dev);
- if (ret < 0)
- return;
-
- - /* on first use, initialize the channel hardware */
- - if (!bchan->initialized)
- - bam_chan_init_hw(bchan, async_desc->dir);
- -
- - /* apply new slave config changes, if necessary */
- - if (bchan->reconfigure)
- - bam_apply_new_config(bchan, async_desc->dir);
- + while (vd && !IS_BUSY(bchan)) {
- + list_del(&vd->node);
-
- - desc = bchan->curr_txd->curr_desc;
- + async_desc = container_of(vd, struct bam_async_desc, vd);
-
- - if (async_desc->num_desc > MAX_DESCRIPTORS)
- - async_desc->xfer_len = MAX_DESCRIPTORS;
- - else
- - async_desc->xfer_len = async_desc->num_desc;
- + /* on first use, initialize the channel hardware */
- + if (!bchan->initialized)
- + bam_chan_init_hw(bchan, async_desc->dir);
-
- - /* set any special flags on the last descriptor */
- - if (async_desc->num_desc == async_desc->xfer_len)
- - desc[async_desc->xfer_len - 1].flags |=
- - cpu_to_le16(async_desc->flags);
- - else
- - desc[async_desc->xfer_len - 1].flags |=
- - cpu_to_le16(DESC_FLAG_INT);
- + /* apply new slave config changes, if necessary */
- + if (bchan->reconfigure)
- + bam_apply_new_config(bchan, async_desc->dir);
- +
- + desc = async_desc->curr_desc;
- + avail = CIRC_SPACE(bchan->tail, bchan->head,
- + MAX_DESCRIPTORS + 1);
- +
- + if (async_desc->num_desc > avail)
- + async_desc->xfer_len = avail;
- + else
- + async_desc->xfer_len = async_desc->num_desc;
- +
- + /* set any special flags on the last descriptor */
- + if (async_desc->num_desc == async_desc->xfer_len)
- + desc[async_desc->xfer_len - 1].flags |=
- + cpu_to_le16(async_desc->flags);
-
- - if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
- - u32 partial = MAX_DESCRIPTORS - bchan->tail;
- + vd = vchan_next_desc(&bchan->vc);
-
- - memcpy(&fifo[bchan->tail], desc,
- - partial * sizeof(struct bam_desc_hw));
- - memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
- + dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
- +
- + /*
- + * An interrupt is generated at this desc, if
- + * - FIFO is FULL.
- + * - No more descriptors to add.
- + * - If a callback completion was requested for this DESC,
- + * In this case, BAM will deliver the completion callback
- + * for this desc and continue processing the next desc.
- + */
- + if (((avail <= async_desc->xfer_len) || !vd ||
- + dmaengine_desc_callback_valid(&cb)) &&
- + !(async_desc->flags & DESC_FLAG_EOT))
- + desc[async_desc->xfer_len - 1].flags |=
- + cpu_to_le16(DESC_FLAG_INT);
- +
- + if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
- + u32 partial = MAX_DESCRIPTORS - bchan->tail;
- +
- + memcpy(&fifo[bchan->tail], desc,
- + partial * sizeof(struct bam_desc_hw));
- + memcpy(fifo, &desc[partial],
- + (async_desc->xfer_len - partial) *
- sizeof(struct bam_desc_hw));
- - } else {
- - memcpy(&fifo[bchan->tail], desc,
- - async_desc->xfer_len * sizeof(struct bam_desc_hw));
- - }
- + } else {
- + memcpy(&fifo[bchan->tail], desc,
- + async_desc->xfer_len *
- + sizeof(struct bam_desc_hw));
- + }
-
- - bchan->tail += async_desc->xfer_len;
- - bchan->tail %= MAX_DESCRIPTORS;
- + bchan->tail += async_desc->xfer_len;
- + bchan->tail %= MAX_DESCRIPTORS;
- + list_add_tail(&async_desc->desc_node, &bchan->desc_list);
- + }
-
- /* ensure descriptor writes and dma start not reordered */
- wmb();
- @@ -1013,7 +1061,7 @@ static void dma_tasklet(unsigned long da
- bchan = &bdev->channels[i];
- spin_lock_irqsave(&bchan->vc.lock, flags);
-
- - if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
- + if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
- bam_start_dma(bchan);
- spin_unlock_irqrestore(&bchan->vc.lock, flags);
- }
- @@ -1034,7 +1082,7 @@ static void bam_issue_pending(struct dma
- spin_lock_irqsave(&bchan->vc.lock, flags);
-
- /* if work pending and idle, start a transaction */
- - if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
- + if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
- bam_start_dma(bchan);
-
- spin_unlock_irqrestore(&bchan->vc.lock, flags);
- @@ -1138,6 +1186,7 @@ static void bam_channel_init(struct bam_
-
- vchan_init(&bchan->vc, &bdev->common);
- bchan->vc.desc_free = bam_dma_free_desc;
- + INIT_LIST_HEAD(&bchan->desc_list);
- }
-
- static const struct of_device_id bam_of_match[] = {
|