003-v6.13-mmc-mtk-sd-Implement-Host-Software-Queue-for-eMMC.patch 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. From 7e9ddd7d45897b15a64c4a3c88f2f7909bf49749 Mon Sep 17 00:00:00 2001
  2. From: AngeloGioacchino Del Regno <[email protected]>
  3. Date: Mon, 30 Sep 2024 11:01:56 +0200
  4. Subject: [PATCH] mmc: mtk-sd: Implement Host Software Queue for eMMC and SD
  5. Card
  6. Add support for Host Software Queue (HSQ) and enable it when the
  7. controller instance does not have Command Queue Engine HW support.
  8. It was chosen to enable HSQ only for eMMC and SD/MicroSD cards
  9. and not for SDIO as performance improvements are seen only for
  10. the former.
  11. Performance was measured with a SanDisk Extreme Ultra A2 MicroSD
  12. card in a MediaTek MT8195T Acer Chromebook Spin 513 (CP513-2H),
  13. by running FIO (bs=4k) on an ArchLinux userspace.
  14. .... Summarizing ....
  15. Random read: +24.28% IOPS, +24.29% BW
  16. Sequential read: +3.14% IOPS, +3.49% BW
  17. Random RW (avg): +50.53% IOPS, +50.68% BW
  18. Below, more data from the benchmarks.
  19. Before:
  20. - Random read: IOPS=1643, BW=6574KiB/s
  21. bw ( KiB/s): min= 4578, max= 7440, per=99.95%, avg=6571.55, stdev=74.16, samples=953
  22. iops : min= 1144, max= 1860, avg=1642.14, stdev=18.54, samples=953
  23. lat (msec) : 100=0.01%, 250=0.12%, 500=0.38%, 750=97.89%, 1000=1.44%, 2000=0.16%
  24. - Sequential read: IOPS=19.1k, BW=74.4MiB/s
  25. bw ( KiB/s): min=12288, max=118483, per=100.00%, avg=76293.38, stdev=1971.42, samples=956
  26. iops : min= 3072, max=29620, avg=19072.14, stdev=492.87, samples=956
  27. lat (msec) : 4=0.01%, 10=0.01%, 20=0.21%, 50=23.95%, 100=75.67%, 250=0.05%, 500=0.03%, 750=0.08%
  28. - Random R/W: read: IOPS=282, BW=1129KiB/s (1156kB/s) write: IOPS=284, BW=1136KiB/s
  29. read bw ( KiB/s): min= 31, max= 3496, per=100.00%, avg=1703.67, stdev=155.42, samples=630
  30. read iops : min= 7, max= 873, avg=425.22, stdev=38.85, samples=630
  31. wri bw ( KiB/s): min= 31, max= 3443, per=100.00%, avg=1674.27, stdev=164.23, samples=644
  32. wri iops : min= 7, max= 860, avg=417.87, stdev=41.03, samples=644
  33. lat (msec) : 250=0.13%, 500=0.44%, 750=0.84%, 1000=22.29%, 2000=74.01%, >=2000=2.30%
  34. After:
  35. - Random read: IOPS=2042, BW=8171KiB/s
  36. bw ( KiB/s): min= 4907, max= 9072, per=99.94%, avg=8166.80, stdev=93.77, samples=954
  37. iops : min= 1226, max= 2268, avg=2040.78, stdev=23.41, samples=954
  38. lat (msec) : 100=0.03%, 250=0.13%, 500=52.88%, 750=46.64%, 1000=0.32%
  39. - Sequential read: IOPS=19.7k, BW=77.0MiB/s
  40. bw ( KiB/s): min=67980, max=94248, per=100.00%, avg=78894.27, stdev=1475.07, samples=956
  41. iops : min=16994, max=23562, avg=19722.45, stdev=368.76, samples=956
  42. lat (msec) : 4=0.01%, 10=0.01%, 20=0.05%, 50=28.78%, 100=71.14%, 250=0.01%, 500=0.02%
  43. - Random R/W: read: IOPS=424, BW=1699KiB/s write: IOPS=428, BW=1714KiB/s
  44. read bw ( KiB/s): min= 228, max= 2856, per=100.00%, avg=1796.60, stdev=112.59, samples=901
  45. read iops : min= 54, max= 712, avg=447.81, stdev=28.21, samples=901
  46. wri bw ( KiB/s): min= 28, max= 2904, per=100.00%, avg=1780.11, stdev=128.27, samples=916
  47. wri iops : min= 4, max= 724, avg=443.69, stdev=32.14, samples=916
  48. Signed-off-by: AngeloGioacchino Del Regno <[email protected]>
  49. Link: https://lore.kernel.org/r/[email protected]
  50. Signed-off-by: Ulf Hansson <[email protected]>
  51. ---
  52. drivers/mmc/host/Kconfig | 1 +
  53. drivers/mmc/host/mtk-sd.c | 49 +++++++++++++++++++++++++++++++++++++--
  54. 2 files changed, 48 insertions(+), 2 deletions(-)
  55. --- a/drivers/mmc/host/Kconfig
  56. +++ b/drivers/mmc/host/Kconfig
  57. @@ -1009,6 +1009,7 @@ config MMC_MTK
  58. depends on COMMON_CLK
  59. select REGULATOR
  60. select MMC_CQHCI
  61. + select MMC_HSQ
  62. help
  63. This selects the MediaTek(R) Secure digital and Multimedia card Interface.
  64. If you have a machine with a integrated SD/MMC card reader, say Y or M here.
  65. --- a/drivers/mmc/host/mtk-sd.c
  66. +++ b/drivers/mmc/host/mtk-sd.c
  67. @@ -33,6 +33,7 @@
  68. #include <linux/mmc/slot-gpio.h>
  69. #include "cqhci.h"
  70. +#include "mmc_hsq.h"
  71. #define MAX_BD_NUM 1024
  72. #define MSDC_NR_CLOCKS 3
  73. @@ -475,6 +476,7 @@ struct msdc_host {
  74. bool hs400_tuning; /* hs400 mode online tuning */
  75. bool internal_cd; /* Use internal card-detect logic */
  76. bool cqhci; /* support eMMC hw cmdq */
  77. + bool hsq_en; /* Host Software Queue is enabled */
  78. struct msdc_save_para save_para; /* used when gate HCLK */
  79. struct msdc_tune_para def_tune_para; /* default tune setting */
  80. struct msdc_tune_para saved_tune_para; /* tune result of CMD21/CMD19 */
  81. @@ -1171,7 +1173,9 @@ static void msdc_track_cmd_data(struct m
  82. static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
  83. {
  84. + struct mmc_host *mmc = mmc_from_priv(host);
  85. unsigned long flags;
  86. + bool hsq_req_done;
  87. /*
  88. * No need check the return value of cancel_delayed_work, as only ONE
  89. @@ -1179,6 +1183,27 @@ static void msdc_request_done(struct msd
  90. */
  91. cancel_delayed_work(&host->req_timeout);
  92. + /*
  93. + * If the request was handled from Host Software Queue, there's almost
  94. + * nothing to do here, and we also don't need to reset mrq as any race
  95. + * condition would not have any room to happen, since HSQ stores the
  96. + * "scheduled" mrqs in an internal array of mrq slots anyway.
  97. + * However, if the controller experienced an error, we still want to
  98. + * reset it as soon as possible.
  99. + *
  100. + * Note that non-HSQ requests will still be happening at times, even
  101. + * though it is enabled, and that's what is going to reset host->mrq.
  102. + * Also, msdc_unprepare_data() is going to be called by HSQ when needed
  103. + * as HSQ request finalization will eventually call the .post_req()
  104. + * callback of this driver which, in turn, unprepares the data.
  105. + */
  106. + hsq_req_done = host->hsq_en ? mmc_hsq_finalize_request(mmc, mrq) : false;
  107. + if (hsq_req_done) {
  108. + if (host->error)
  109. + msdc_reset_hw(host);
  110. + return;
  111. + }
  112. +
  113. spin_lock_irqsave(&host->lock, flags);
  114. host->mrq = NULL;
  115. spin_unlock_irqrestore(&host->lock, flags);
  116. @@ -1188,7 +1213,7 @@ static void msdc_request_done(struct msd
  117. msdc_unprepare_data(host, mrq->data);
  118. if (host->error)
  119. msdc_reset_hw(host);
  120. - mmc_request_done(mmc_from_priv(host), mrq);
  121. + mmc_request_done(mmc, mrq);
  122. if (host->dev_comp->recheck_sdio_irq)
  123. msdc_recheck_sdio_irq(host);
  124. }
  125. @@ -1348,7 +1373,7 @@ static void msdc_ops_request(struct mmc_
  126. struct msdc_host *host = mmc_priv(mmc);
  127. host->error = 0;
  128. - WARN_ON(host->mrq);
  129. + WARN_ON(!host->hsq_en && host->mrq);
  130. host->mrq = mrq;
  131. if (mrq->data) {
  132. @@ -2925,6 +2950,19 @@ static int msdc_drv_probe(struct platfor
  133. mmc->max_seg_size = 64 * 1024;
  134. /* Reduce CIT to 0x40 that corresponds to 2.35us */
  135. msdc_cqe_cit_cal(host, 2350);
  136. + } else if (mmc->caps2 & MMC_CAP2_NO_SDIO) {
  137. + /* Use HSQ on eMMC/SD (but not on SDIO) if HW CQE not supported */
  138. + struct mmc_hsq *hsq = devm_kzalloc(&pdev->dev, sizeof(*hsq), GFP_KERNEL);
  139. + if (!hsq) {
  140. + ret = -ENOMEM;
  141. + goto release;
  142. + }
  143. +
  144. + ret = mmc_hsq_init(hsq, mmc);
  145. + if (ret)
  146. + goto release;
  147. +
  148. + host->hsq_en = true;
  149. }
  150. ret = devm_request_irq(&pdev->dev, host->irq, msdc_irq,
  151. @@ -3050,6 +3088,9 @@ static int __maybe_unused msdc_runtime_s
  152. struct mmc_host *mmc = dev_get_drvdata(dev);
  153. struct msdc_host *host = mmc_priv(mmc);
  154. + if (host->hsq_en)
  155. + mmc_hsq_suspend(mmc);
  156. +
  157. msdc_save_reg(host);
  158. if (sdio_irq_claimed(mmc)) {
  159. @@ -3080,6 +3121,10 @@ static int __maybe_unused msdc_runtime_r
  160. pinctrl_select_state(host->pinctrl, host->pins_uhs);
  161. enable_irq(host->irq);
  162. }
  163. +
  164. + if (host->hsq_en)
  165. + mmc_hsq_resume(mmc);
  166. +
  167. return 0;
  168. }