950-0882-dmaengine-dw-axi-dmac-Fixes-for-RP1.patch 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. From 0a1cd70189daec3baf4b4a233dd8e25ffbb9d512 Mon Sep 17 00:00:00 2001
  2. From: Phil Elwell <[email protected]>
  3. Date: Wed, 28 Apr 2021 17:46:01 +0100
  4. Subject: [PATCH] dmaengine: dw-axi-dmac: Fixes for RP1
  5. Don't assume that DMA addresses of devices are the same as their
  6. physical addresses - convert correctly.
  7. The CFG2 register layout is used when there are more than 8 channels,
  8. but also when configured for more than 16 target peripheral devices
  9. because the index of the handshake signal has to be made wider.
  10. Reset the DMAC on probe
  11. The driver goes to the trouble of tracking when transfers have been
  12. paused, but then doesn't report that state when queried.
  13. Not having APB registers is not an error - for most use cases it's
  14. not even of interest, it's expected. Demote the message to debug level,
  15. which is disabled by default.
  16. Each channel has a descriptor pool, which is shared between transfers.
  17. It is unsafe to treat the total number of descriptors allocated from a
  18. pool as the number allocated to a specific transfer; doing so leads
  19. to releasing buffers that shouldn't be released and walking off the
  20. ends of descriptor lists. Instead, give each transfer descriptor its
  21. own count.
  22. Support partial transfers:
  23. Some use cases involve streaming from a device where the transfer only
  24. proceeds when the device's FIFO occupancy exceeds a certain threshold.
  25. In such cases (e.g. when pulling data from a UART) it is important to
  26. know how much data has been transferred so far, in order that remaining
  27. bytes can be read from the FIFO directly by software.
  28. Add the necessary code to provide this "residue" value with a finer,
  29. sub-transfer granularity.
  30. In order to prevent the occasional byte getting stuck in the DMA
  31. controller's internal buffers, restrict the destination memory width
  32. to the source register width.
  33. Signed-off-by: Phil Elwell <[email protected]>
  34. ---
  35. .../dma/dw-axi-dmac/dw-axi-dmac-platform.c | 136 +++++++++++++++---
  36. drivers/dma/dw-axi-dmac/dw-axi-dmac.h | 3 +
  37. 2 files changed, 118 insertions(+), 21 deletions(-)
  38. --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
  39. +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
  40. @@ -12,6 +12,7 @@
  41. #include <linux/device.h>
  42. #include <linux/dmaengine.h>
  43. #include <linux/dmapool.h>
  44. +#include <linux/dma-direct.h>
  45. #include <linux/dma-mapping.h>
  46. #include <linux/err.h>
  47. #include <linux/interrupt.h>
  48. @@ -79,6 +80,17 @@ axi_chan_iowrite64(struct axi_dma_chan *
  49. iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4);
  50. }
  51. +static inline u64
  52. +axi_chan_ioread64(struct axi_dma_chan *chan, u32 reg)
  53. +{
  54. + /*
  55. + * We split one 64 bit read into two 32 bit reads as some HW doesn't
  56. + * support 64 bit access.
  57. + */
  58. + return ((u64)ioread32(chan->chan_regs + reg + 4) << 32) +
  59. + ioread32(chan->chan_regs + reg);
  60. +}
  61. +
  62. static inline void axi_chan_config_write(struct axi_dma_chan *chan,
  63. struct axi_dma_chan_config *config)
  64. {
  65. @@ -86,7 +98,7 @@ static inline void axi_chan_config_write
  66. cfg_lo = (config->dst_multblk_type << CH_CFG_L_DST_MULTBLK_TYPE_POS |
  67. config->src_multblk_type << CH_CFG_L_SRC_MULTBLK_TYPE_POS);
  68. - if (chan->chip->dw->hdata->reg_map_8_channels) {
  69. + if (!chan->chip->dw->hdata->reg_map_cfg2) {
  70. cfg_hi = config->tt_fc << CH_CFG_H_TT_FC_POS |
  71. config->hs_sel_src << CH_CFG_H_HS_SEL_SRC_POS |
  72. config->hs_sel_dst << CH_CFG_H_HS_SEL_DST_POS |
  73. @@ -214,7 +226,18 @@ static void axi_dma_hw_init(struct axi_d
  74. {
  75. int ret;
  76. u32 i;
  77. + int retries = 1000;
  78. + axi_dma_iowrite32(chip, DMAC_RESET, 1);
  79. + while (axi_dma_ioread32(chip, DMAC_RESET)) {
  80. + retries--;
  81. + if (!retries) {
  82. + dev_err(chip->dev, "%s: DMAC failed to reset\n",
  83. + __func__);
  84. + return;
  85. + }
  86. + cpu_relax();
  87. + }
  88. for (i = 0; i < chip->dw->hdata->nr_channels; i++) {
  89. axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
  90. axi_chan_disable(&chip->dw->chan[i]);
  91. @@ -276,7 +299,7 @@ static struct axi_dma_lli *axi_desc_get(
  92. static void axi_desc_put(struct axi_dma_desc *desc)
  93. {
  94. struct axi_dma_chan *chan = desc->chan;
  95. - int count = atomic_read(&chan->descs_allocated);
  96. + u32 count = desc->hw_desc_count;
  97. struct axi_dma_hw_desc *hw_desc;
  98. int descs_put;
  99. @@ -298,6 +321,48 @@ static void vchan_desc_put(struct virt_d
  100. axi_desc_put(vd_to_axi_desc(vdesc));
  101. }
  102. +static u32 axi_dma_desc_src_pos(struct axi_dma_desc *desc, dma_addr_t addr)
  103. +{
  104. + unsigned int idx = 0;
  105. + u32 pos = 0;
  106. +
  107. + while (pos < desc->length) {
  108. + struct axi_dma_hw_desc *hw_desc = &desc->hw_desc[idx++];
  109. + u32 len = hw_desc->len;
  110. + dma_addr_t start = le64_to_cpu(hw_desc->lli->sar);
  111. +
  112. + if (addr >= start && addr <= (start + len)) {
  113. + pos += addr - start;
  114. + break;
  115. + }
  116. +
  117. + pos += len;
  118. + }
  119. +
  120. + return pos;
  121. +}
  122. +
  123. +static u32 axi_dma_desc_dst_pos(struct axi_dma_desc *desc, dma_addr_t addr)
  124. +{
  125. + unsigned int idx = 0;
  126. + u32 pos = 0;
  127. +
  128. + while (pos < desc->length) {
  129. + struct axi_dma_hw_desc *hw_desc = &desc->hw_desc[idx++];
  130. + u32 len = hw_desc->len;
  131. + dma_addr_t start = le64_to_cpu(hw_desc->lli->dar);
  132. +
  133. + if (addr >= start && addr <= (start + len)) {
  134. + pos += addr - start;
  135. + break;
  136. + }
  137. +
  138. + pos += len;
  139. + }
  140. +
  141. + return pos;
  142. +}
  143. +
  144. static enum dma_status
  145. dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
  146. struct dma_tx_state *txstate)
  147. @@ -307,10 +372,7 @@ dma_chan_tx_status(struct dma_chan *dcha
  148. enum dma_status status;
  149. u32 completed_length;
  150. unsigned long flags;
  151. - u32 completed_blocks;
  152. size_t bytes = 0;
  153. - u32 length;
  154. - u32 len;
  155. status = dma_cookie_status(dchan, cookie, txstate);
  156. if (status == DMA_COMPLETE || !txstate)
  157. @@ -319,16 +381,31 @@ dma_chan_tx_status(struct dma_chan *dcha
  158. spin_lock_irqsave(&chan->vc.lock, flags);
  159. vdesc = vchan_find_desc(&chan->vc, cookie);
  160. - if (vdesc) {
  161. - length = vd_to_axi_desc(vdesc)->length;
  162. - completed_blocks = vd_to_axi_desc(vdesc)->completed_blocks;
  163. - len = vd_to_axi_desc(vdesc)->hw_desc[0].len;
  164. - completed_length = completed_blocks * len;
  165. - bytes = length - completed_length;
  166. + if (vdesc && vdesc == vchan_next_desc(&chan->vc)) {
  167. + /* This descriptor is in-progress */
  168. + struct axi_dma_desc *desc = vd_to_axi_desc(vdesc);
  169. + dma_addr_t addr;
  170. +
  171. + if (chan->direction == DMA_MEM_TO_DEV) {
  172. + addr = axi_chan_ioread64(chan, CH_SAR);
  173. + completed_length = axi_dma_desc_src_pos(desc, addr);
  174. + } else if (chan->direction == DMA_DEV_TO_MEM) {
  175. + addr = axi_chan_ioread64(chan, CH_DAR);
  176. + completed_length = axi_dma_desc_dst_pos(desc, addr);
  177. + } else {
  178. + completed_length = 0;
  179. + }
  180. + bytes = desc->length - completed_length;
  181. + } else if (vdesc) {
  182. + /* Still in the queue so not started */
  183. + bytes = vd_to_axi_desc(vdesc)->length;
  184. }
  185. - spin_unlock_irqrestore(&chan->vc.lock, flags);
  186. + if (chan->is_paused && status == DMA_IN_PROGRESS)
  187. + status = DMA_PAUSED;
  188. +
  189. dma_set_residue(txstate, bytes);
  190. + spin_unlock_irqrestore(&chan->vc.lock, flags);
  191. return status;
  192. }
  193. @@ -516,7 +593,7 @@ static void dw_axi_dma_set_hw_channel(st
  194. unsigned long reg_value, val;
  195. if (!chip->apb_regs) {
  196. - dev_err(chip->dev, "apb_regs not initialized\n");
  197. + dev_dbg(chip->dev, "apb_regs not initialized\n");
  198. return;
  199. }
  200. @@ -620,18 +697,25 @@ static int dw_axi_dma_set_hw_desc(struct
  201. switch (chan->direction) {
  202. case DMA_MEM_TO_DEV:
  203. reg_width = __ffs(chan->config.dst_addr_width);
  204. - device_addr = chan->config.dst_addr;
  205. + device_addr = phys_to_dma(chan->chip->dev, chan->config.dst_addr);
  206. ctllo = reg_width << CH_CTL_L_DST_WIDTH_POS |
  207. mem_width << CH_CTL_L_SRC_WIDTH_POS |
  208. + DWAXIDMAC_BURST_TRANS_LEN_1 << CH_CTL_L_DST_MSIZE_POS |
  209. + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
  210. DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_DST_INC_POS |
  211. DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS;
  212. block_ts = len >> mem_width;
  213. break;
  214. case DMA_DEV_TO_MEM:
  215. reg_width = __ffs(chan->config.src_addr_width);
  216. - device_addr = chan->config.src_addr;
  217. + /* Prevent partial access units getting lost */
  218. + if (mem_width > reg_width)
  219. + mem_width = reg_width;
  220. + device_addr = phys_to_dma(chan->chip->dev, chan->config.src_addr);
  221. ctllo = reg_width << CH_CTL_L_SRC_WIDTH_POS |
  222. mem_width << CH_CTL_L_DST_WIDTH_POS |
  223. + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
  224. + DWAXIDMAC_BURST_TRANS_LEN_1 << CH_CTL_L_SRC_MSIZE_POS |
  225. DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
  226. DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_SRC_INC_POS;
  227. block_ts = len >> reg_width;
  228. @@ -667,9 +751,6 @@ static int dw_axi_dma_set_hw_desc(struct
  229. }
  230. hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1);
  231. -
  232. - ctllo |= DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
  233. - DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS;
  234. hw_desc->lli->ctl_lo = cpu_to_le32(ctllo);
  235. set_desc_src_master(hw_desc);
  236. @@ -764,6 +845,8 @@ dw_axi_dma_chan_prep_cyclic(struct dma_c
  237. src_addr += segment_len;
  238. }
  239. + desc->hw_desc_count = total_segments;
  240. +
  241. llp = desc->hw_desc[0].llp;
  242. /* Managed transfer list */
  243. @@ -843,6 +926,8 @@ dw_axi_dma_chan_prep_slave_sg(struct dma
  244. } while (len >= segment_len);
  245. }
  246. + desc->hw_desc_count = loop;
  247. +
  248. /* Set end-of-link to the last link descriptor of list */
  249. set_desc_last(&desc->hw_desc[num_sgs - 1]);
  250. @@ -950,6 +1035,8 @@ dma_chan_prep_dma_memcpy(struct dma_chan
  251. num++;
  252. }
  253. + desc->hw_desc_count = num;
  254. +
  255. /* Set end-of-link to the last link descriptor of list */
  256. set_desc_last(&desc->hw_desc[num - 1]);
  257. /* Managed transfer list */
  258. @@ -998,7 +1085,7 @@ static void axi_chan_dump_lli(struct axi
  259. static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
  260. struct axi_dma_desc *desc_head)
  261. {
  262. - int count = atomic_read(&chan->descs_allocated);
  263. + u32 count = desc_head->hw_desc_count;
  264. int i;
  265. for (i = 0; i < count; i++)
  266. @@ -1041,11 +1128,11 @@ out:
  267. static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
  268. {
  269. - int count = atomic_read(&chan->descs_allocated);
  270. struct axi_dma_hw_desc *hw_desc;
  271. struct axi_dma_desc *desc;
  272. struct virt_dma_desc *vd;
  273. unsigned long flags;
  274. + u32 count;
  275. u64 llp;
  276. int i;
  277. @@ -1067,6 +1154,7 @@ static void axi_chan_block_xfer_complete
  278. if (chan->cyclic) {
  279. desc = vd_to_axi_desc(vd);
  280. if (desc) {
  281. + count = desc->hw_desc_count;
  282. llp = lo_hi_readq(chan->chan_regs + CH_LLP);
  283. for (i = 0; i < count; i++) {
  284. hw_desc = &desc->hw_desc[i];
  285. @@ -1310,6 +1398,8 @@ static int parse_device_properties(struc
  286. chip->dw->hdata->nr_channels = tmp;
  287. if (tmp <= DMA_REG_MAP_CH_REF)
  288. chip->dw->hdata->reg_map_8_channels = true;
  289. + else
  290. + chip->dw->hdata->reg_map_cfg2 = true;
  291. ret = device_property_read_u32(dev, "snps,dma-masters", &tmp);
  292. if (ret)
  293. @@ -1319,6 +1409,10 @@ static int parse_device_properties(struc
  294. chip->dw->hdata->nr_masters = tmp;
  295. + ret = device_property_read_u32(dev, "snps,dma-targets", &tmp);
  296. + if (!ret && tmp > 16)
  297. + chip->dw->hdata->reg_map_cfg2 = true;
  298. +
  299. ret = device_property_read_u32(dev, "snps,data-width", &tmp);
  300. if (ret)
  301. return ret;
  302. --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
  303. +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
  304. @@ -32,6 +32,8 @@ struct dw_axi_dma_hcfg {
  305. u32 axi_rw_burst_len;
  306. /* Register map for DMAX_NUM_CHANNELS <= 8 */
  307. bool reg_map_8_channels;
  308. + /* Register map for DMAX_NUM_CHANNELS > 8 || DMAX_NUM_HS_IF > 16*/
  309. + bool reg_map_cfg2;
  310. bool restrict_axi_burst_len;
  311. };
  312. @@ -100,6 +102,7 @@ struct axi_dma_desc {
  313. struct virt_dma_desc vd;
  314. struct axi_dma_chan *chan;
  315. + u32 hw_desc_count;
  316. u32 completed_blocks;
  317. u32 length;
  318. u32 period_len;