040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
  2. From: Sricharan R <[email protected]>
  3. Date: Mon, 28 Aug 2017 20:30:24 +0530
  4. Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
  5. The bam dmaengine has a circular FIFO to which we
  6. add hw descriptors that describes the transaction.
  7. The FIFO has space for about 4096 hw descriptors.
  8. Currently we add one descriptor and wait for it to
  9. complete with interrupt and then add the next pending
  10. descriptor. In this way, the FIFO is underutilized
  11. since only one descriptor is processed at a time, although
  12. there is space in FIFO for the BAM to process more.
  13. Instead keep adding descriptors to FIFO till its full,
  14. that allows BAM to continue to work on the next descriptor
  15. immediately after signalling completion interrupt for the
  16. previous descriptor.
  17. Also when the client has not set the DMA_PREP_INTERRUPT for
  18. a descriptor, then do not configure BAM to trigger a interrupt
  19. upon completion of that descriptor. This way we get a interrupt
  20. only for the descriptor for which DMA_PREP_INTERRUPT was
  21. requested and there signal completion of all the previous completed
  22. descriptors. So we still do callbacks for all requested descriptors,
  23. but just that the number of interrupts are reduced.
  24. CURRENT:
  25. ------ ------- ---------------
  26. |DES 0| |DESC 1| |DESC 2 + INT |
  27. ------ ------- ---------------
  28. | | |
  29. | | |
  30. INTERRUPT: (INT) (INT) (INT)
  31. CALLBACK: (CB) (CB) (CB)
  32. MTD_SPEEDTEST READ PAGE: 3560 KiB/s
  33. MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
  34. IOZONE READ: 2456 KB/s
  35. IOZONE WRITE: 1230 KB/s
  36. bam dma interrupts (after tests): 96508
  37. CHANGE:
  38. ------ ------- -------------
  39. |DES 0| |DESC 1 |DESC 2 + INT |
  40. ------ ------- --------------
  41. |
  42. |
  43. (INT)
  44. (CB for 0, 1, 2)
  45. MTD_SPEEDTEST READ PAGE: 3860 KiB/s
  46. MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
  47. IOZONE READ: 2677 KB/s
  48. IOZONE WRITE: 1308 KB/s
  49. bam dma interrupts (after tests): 58806
  50. Signed-off-by: Sricharan R <[email protected]>
  51. Reviewed-by: Andy Gross <[email protected]>
  52. Tested-by: Abhishek Sahu <[email protected]>
  53. Signed-off-by: Vinod Koul <[email protected]>
  54. ---
  55. drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
  56. 1 file changed, 109 insertions(+), 60 deletions(-)
  57. --- a/drivers/dma/qcom/bam_dma.c
  58. +++ b/drivers/dma/qcom/bam_dma.c
  59. @@ -46,6 +46,7 @@
  60. #include <linux/of_address.h>
  61. #include <linux/of_irq.h>
  62. #include <linux/of_dma.h>
  63. +#include <linux/circ_buf.h>
  64. #include <linux/clk.h>
  65. #include <linux/dmaengine.h>
  66. #include <linux/pm_runtime.h>
  67. @@ -78,6 +79,8 @@ struct bam_async_desc {
  68. struct bam_desc_hw *curr_desc;
  69. + /* list node for the desc in the bam_chan list of descriptors */
  70. + struct list_head desc_node;
  71. enum dma_transfer_direction dir;
  72. size_t length;
  73. struct bam_desc_hw desc[0];
  74. @@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
  75. #define BAM_DESC_FIFO_SIZE SZ_32K
  76. #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
  77. #define BAM_FIFO_SIZE (SZ_32K - 8)
  78. +#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\
  79. + MAX_DESCRIPTORS + 1) == 0)
  80. struct bam_chan {
  81. struct virt_dma_chan vc;
  82. @@ -356,8 +361,6 @@ struct bam_chan {
  83. /* configuration from device tree */
  84. u32 id;
  85. - struct bam_async_desc *curr_txd; /* current running dma */
  86. -
  87. /* runtime configuration */
  88. struct dma_slave_config slave;
  89. @@ -372,6 +375,8 @@ struct bam_chan {
  90. unsigned int initialized; /* is the channel hw initialized? */
  91. unsigned int paused; /* is the channel paused? */
  92. unsigned int reconfigure; /* new slave config? */
  93. + /* list of descriptors currently processed */
  94. + struct list_head desc_list;
  95. struct list_head node;
  96. };
  97. @@ -540,7 +545,7 @@ static void bam_free_chan(struct dma_cha
  98. vchan_free_chan_resources(to_virt_chan(chan));
  99. - if (bchan->curr_txd) {
  100. + if (!list_empty(&bchan->desc_list)) {
  101. dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
  102. goto err;
  103. }
  104. @@ -633,8 +638,6 @@ static struct dma_async_tx_descriptor *b
  105. if (flags & DMA_PREP_INTERRUPT)
  106. async_desc->flags |= DESC_FLAG_EOT;
  107. - else
  108. - async_desc->flags |= DESC_FLAG_INT;
  109. async_desc->num_desc = num_alloc;
  110. async_desc->curr_desc = async_desc->desc;
  111. @@ -685,14 +688,16 @@ err_out:
  112. static int bam_dma_terminate_all(struct dma_chan *chan)
  113. {
  114. struct bam_chan *bchan = to_bam_chan(chan);
  115. + struct bam_async_desc *async_desc, *tmp;
  116. unsigned long flag;
  117. LIST_HEAD(head);
  118. /* remove all transactions, including active transaction */
  119. spin_lock_irqsave(&bchan->vc.lock, flag);
  120. - if (bchan->curr_txd) {
  121. - list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
  122. - bchan->curr_txd = NULL;
  123. + list_for_each_entry_safe(async_desc, tmp,
  124. + &bchan->desc_list, desc_node) {
  125. + list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
  126. + list_del(&async_desc->desc_node);
  127. }
  128. vchan_get_all_descriptors(&bchan->vc, &head);
  129. @@ -764,9 +769,9 @@ static int bam_resume(struct dma_chan *c
  130. */
  131. static u32 process_channel_irqs(struct bam_device *bdev)
  132. {
  133. - u32 i, srcs, pipe_stts;
  134. + u32 i, srcs, pipe_stts, offset, avail;
  135. unsigned long flags;
  136. - struct bam_async_desc *async_desc;
  137. + struct bam_async_desc *async_desc, *tmp;
  138. srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
  139. @@ -786,27 +791,40 @@ static u32 process_channel_irqs(struct b
  140. writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
  141. spin_lock_irqsave(&bchan->vc.lock, flags);
  142. - async_desc = bchan->curr_txd;
  143. - if (async_desc) {
  144. - async_desc->num_desc -= async_desc->xfer_len;
  145. - async_desc->curr_desc += async_desc->xfer_len;
  146. - bchan->curr_txd = NULL;
  147. + offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
  148. + P_SW_OFSTS_MASK;
  149. + offset /= sizeof(struct bam_desc_hw);
  150. +
  151. + /* Number of bytes available to read */
  152. + avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
  153. +
  154. + list_for_each_entry_safe(async_desc, tmp,
  155. + &bchan->desc_list, desc_node) {
  156. + /* Not enough data to read */
  157. + if (avail < async_desc->xfer_len)
  158. + break;
  159. /* manage FIFO */
  160. bchan->head += async_desc->xfer_len;
  161. bchan->head %= MAX_DESCRIPTORS;
  162. + async_desc->num_desc -= async_desc->xfer_len;
  163. + async_desc->curr_desc += async_desc->xfer_len;
  164. + avail -= async_desc->xfer_len;
  165. +
  166. /*
  167. - * if complete, process cookie. Otherwise
  168. + * if complete, process cookie. Otherwise
  169. * push back to front of desc_issued so that
  170. * it gets restarted by the tasklet
  171. */
  172. - if (!async_desc->num_desc)
  173. + if (!async_desc->num_desc) {
  174. vchan_cookie_complete(&async_desc->vd);
  175. - else
  176. + } else {
  177. list_add(&async_desc->vd.node,
  178. - &bchan->vc.desc_issued);
  179. + &bchan->vc.desc_issued);
  180. + }
  181. + list_del(&async_desc->desc_node);
  182. }
  183. spin_unlock_irqrestore(&bchan->vc.lock, flags);
  184. @@ -868,6 +886,7 @@ static enum dma_status bam_tx_status(str
  185. struct dma_tx_state *txstate)
  186. {
  187. struct bam_chan *bchan = to_bam_chan(chan);
  188. + struct bam_async_desc *async_desc;
  189. struct virt_dma_desc *vd;
  190. int ret;
  191. size_t residue = 0;
  192. @@ -883,11 +902,17 @@ static enum dma_status bam_tx_status(str
  193. spin_lock_irqsave(&bchan->vc.lock, flags);
  194. vd = vchan_find_desc(&bchan->vc, cookie);
  195. - if (vd)
  196. + if (vd) {
  197. residue = container_of(vd, struct bam_async_desc, vd)->length;
  198. - else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
  199. - for (i = 0; i < bchan->curr_txd->num_desc; i++)
  200. - residue += bchan->curr_txd->curr_desc[i].size;
  201. + } else {
  202. + list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
  203. + if (async_desc->vd.tx.cookie != cookie)
  204. + continue;
  205. +
  206. + for (i = 0; i < async_desc->num_desc; i++)
  207. + residue += async_desc->curr_desc[i].size;
  208. + }
  209. + }
  210. spin_unlock_irqrestore(&bchan->vc.lock, flags);
  211. @@ -928,63 +953,86 @@ static void bam_start_dma(struct bam_cha
  212. {
  213. struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
  214. struct bam_device *bdev = bchan->bdev;
  215. - struct bam_async_desc *async_desc;
  216. + struct bam_async_desc *async_desc = NULL;
  217. struct bam_desc_hw *desc;
  218. struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
  219. sizeof(struct bam_desc_hw));
  220. int ret;
  221. + unsigned int avail;
  222. + struct dmaengine_desc_callback cb;
  223. lockdep_assert_held(&bchan->vc.lock);
  224. if (!vd)
  225. return;
  226. - list_del(&vd->node);
  227. -
  228. - async_desc = container_of(vd, struct bam_async_desc, vd);
  229. - bchan->curr_txd = async_desc;
  230. -
  231. ret = pm_runtime_get_sync(bdev->dev);
  232. if (ret < 0)
  233. return;
  234. - /* on first use, initialize the channel hardware */
  235. - if (!bchan->initialized)
  236. - bam_chan_init_hw(bchan, async_desc->dir);
  237. -
  238. - /* apply new slave config changes, if necessary */
  239. - if (bchan->reconfigure)
  240. - bam_apply_new_config(bchan, async_desc->dir);
  241. + while (vd && !IS_BUSY(bchan)) {
  242. + list_del(&vd->node);
  243. - desc = bchan->curr_txd->curr_desc;
  244. + async_desc = container_of(vd, struct bam_async_desc, vd);
  245. - if (async_desc->num_desc > MAX_DESCRIPTORS)
  246. - async_desc->xfer_len = MAX_DESCRIPTORS;
  247. - else
  248. - async_desc->xfer_len = async_desc->num_desc;
  249. + /* on first use, initialize the channel hardware */
  250. + if (!bchan->initialized)
  251. + bam_chan_init_hw(bchan, async_desc->dir);
  252. - /* set any special flags on the last descriptor */
  253. - if (async_desc->num_desc == async_desc->xfer_len)
  254. - desc[async_desc->xfer_len - 1].flags |=
  255. - cpu_to_le16(async_desc->flags);
  256. - else
  257. - desc[async_desc->xfer_len - 1].flags |=
  258. - cpu_to_le16(DESC_FLAG_INT);
  259. + /* apply new slave config changes, if necessary */
  260. + if (bchan->reconfigure)
  261. + bam_apply_new_config(bchan, async_desc->dir);
  262. +
  263. + desc = async_desc->curr_desc;
  264. + avail = CIRC_SPACE(bchan->tail, bchan->head,
  265. + MAX_DESCRIPTORS + 1);
  266. +
  267. + if (async_desc->num_desc > avail)
  268. + async_desc->xfer_len = avail;
  269. + else
  270. + async_desc->xfer_len = async_desc->num_desc;
  271. +
  272. + /* set any special flags on the last descriptor */
  273. + if (async_desc->num_desc == async_desc->xfer_len)
  274. + desc[async_desc->xfer_len - 1].flags |=
  275. + cpu_to_le16(async_desc->flags);
  276. - if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
  277. - u32 partial = MAX_DESCRIPTORS - bchan->tail;
  278. + vd = vchan_next_desc(&bchan->vc);
  279. - memcpy(&fifo[bchan->tail], desc,
  280. - partial * sizeof(struct bam_desc_hw));
  281. - memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
  282. + dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
  283. +
  284. + /*
  285. + * An interrupt is generated at this desc, if
  286. + * - FIFO is FULL.
  287. + * - No more descriptors to add.
  288. + * - If a callback completion was requested for this DESC,
  289. + * In this case, BAM will deliver the completion callback
  290. + * for this desc and continue processing the next desc.
  291. + */
  292. + if (((avail <= async_desc->xfer_len) || !vd ||
  293. + dmaengine_desc_callback_valid(&cb)) &&
  294. + !(async_desc->flags & DESC_FLAG_EOT))
  295. + desc[async_desc->xfer_len - 1].flags |=
  296. + cpu_to_le16(DESC_FLAG_INT);
  297. +
  298. + if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
  299. + u32 partial = MAX_DESCRIPTORS - bchan->tail;
  300. +
  301. + memcpy(&fifo[bchan->tail], desc,
  302. + partial * sizeof(struct bam_desc_hw));
  303. + memcpy(fifo, &desc[partial],
  304. + (async_desc->xfer_len - partial) *
  305. sizeof(struct bam_desc_hw));
  306. - } else {
  307. - memcpy(&fifo[bchan->tail], desc,
  308. - async_desc->xfer_len * sizeof(struct bam_desc_hw));
  309. - }
  310. + } else {
  311. + memcpy(&fifo[bchan->tail], desc,
  312. + async_desc->xfer_len *
  313. + sizeof(struct bam_desc_hw));
  314. + }
  315. - bchan->tail += async_desc->xfer_len;
  316. - bchan->tail %= MAX_DESCRIPTORS;
  317. + bchan->tail += async_desc->xfer_len;
  318. + bchan->tail %= MAX_DESCRIPTORS;
  319. + list_add_tail(&async_desc->desc_node, &bchan->desc_list);
  320. + }
  321. /* ensure descriptor writes and dma start not reordered */
  322. wmb();
  323. @@ -1013,7 +1061,7 @@ static void dma_tasklet(unsigned long da
  324. bchan = &bdev->channels[i];
  325. spin_lock_irqsave(&bchan->vc.lock, flags);
  326. - if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
  327. + if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
  328. bam_start_dma(bchan);
  329. spin_unlock_irqrestore(&bchan->vc.lock, flags);
  330. }
  331. @@ -1034,7 +1082,7 @@ static void bam_issue_pending(struct dma
  332. spin_lock_irqsave(&bchan->vc.lock, flags);
  333. /* if work pending and idle, start a transaction */
  334. - if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
  335. + if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
  336. bam_start_dma(bchan);
  337. spin_unlock_irqrestore(&bchan->vc.lock, flags);
  338. @@ -1138,6 +1186,7 @@ static void bam_channel_init(struct bam_
  339. vchan_init(&bchan->vc, &bdev->common);
  340. bchan->vc.desc_free = bam_dma_free_desc;
  341. + INIT_LIST_HEAD(&bchan->desc_list);
  342. }
  343. static const struct of_device_id bam_of_match[] = {