020-0017-crypto-crypto4xx-add-backlog-queue-support.patch 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. From 8ef8d195430ca3542d0434cf25e5115484b9fa32 Mon Sep 17 00:00:00 2001
  2. From: Christian Lamparter <[email protected]>
  3. Date: Wed, 4 Oct 2017 01:00:09 +0200
  4. Subject: [PATCH 17/25] crypto: crypto4xx - add backlog queue support
  5. Previously, If the crypto4xx driver used all available
  6. security contexts, it would simply refuse new requests
  7. with -EAGAIN. CRYPTO_TFM_REQ_MAY_BACKLOG was ignored.
  8. in case of dm-crypt.c's crypt_convert() function this was
  9. causing the following errors to manifest, if the system was
  10. pushed hard enough:
  11. | EXT4-fs warning (dm-1): ext4_end_bio:314: I/O error -5 writing to ino ..
  12. | EXT4-fs warning (dm-1): ext4_end_bio:314: I/O error -5 writing to ino ..
  13. | EXT4-fs warning (dm-1): ext4_end_bio:314: I/O error -5 writing to ino ..
  14. | JBD2: Detected IO errors while flushing file data on dm-1-8
  15. | Aborting journal on device dm-1-8.
  16. | EXT4-fs error : ext4_journal_check_start:56: Detected aborted journal
  17. | EXT4-fs (dm-1): Remounting filesystem read-only
  18. | EXT4-fs : ext4_writepages: jbd2_start: 2048 pages, inode 498...; err -30
  19. (This did cause corruptions due to failed writes)
  20. To fix this mess, the crypto4xx driver needs to notifiy the
  21. user to slow down. This can be achieved by returning -EBUSY
  22. on requests, once the crypto hardware was falling behind.
  23. Note: -EBUSY has two different meanings. Setting the flag
  24. CRYPTO_TFM_REQ_MAY_BACKLOG implies that the request was
  25. successfully queued, by the crypto driver. To achieve this
  26. requirement, the implementation introduces a threshold check and
  27. adds logic to the completion routines in much the same way as
  28. AMD's Cryptographic Coprocessor (CCP) driver do.
  29. Note2: Tests showed that dm-crypt starved ipsec traffic.
  30. Under load, ipsec links dropped to 0 Kbits/s. This is because
  31. dm-crypt's callback would instantly queue the next request.
  32. In order to not starve ipsec, the driver reserves a small
  33. portion of the available crypto contexts for this purpose.
  34. Signed-off-by: Christian Lamparter <[email protected]>
  35. Signed-off-by: Herbert Xu <[email protected]>
  36. ---
  37. drivers/crypto/amcc/crypto4xx_core.c | 47 ++++++++++++++++++++++++++++++------
  38. drivers/crypto/amcc/crypto4xx_core.h | 3 ++-
  39. 2 files changed, 41 insertions(+), 9 deletions(-)
  40. --- a/drivers/crypto/amcc/crypto4xx_core.c
  41. +++ b/drivers/crypto/amcc/crypto4xx_core.c
  42. @@ -39,6 +39,7 @@
  43. #include <crypto/ctr.h>
  44. #include <crypto/sha.h>
  45. #include <crypto/scatterwalk.h>
  46. +#include <crypto/internal/skcipher.h>
  47. #include "crypto4xx_reg_def.h"
  48. #include "crypto4xx_core.h"
  49. #include "crypto4xx_sa.h"
  50. @@ -573,8 +574,10 @@ static u32 crypto4xx_ablkcipher_done(str
  51. dst->offset, dst->length, DMA_FROM_DEVICE);
  52. }
  53. crypto4xx_ret_sg_desc(dev, pd_uinfo);
  54. - if (ablk_req->base.complete != NULL)
  55. - ablk_req->base.complete(&ablk_req->base, 0);
  56. +
  57. + if (pd_uinfo->state & PD_ENTRY_BUSY)
  58. + ablkcipher_request_complete(ablk_req, -EINPROGRESS);
  59. + ablkcipher_request_complete(ablk_req, 0);
  60. return 0;
  61. }
  62. @@ -591,9 +594,10 @@ static u32 crypto4xx_ahash_done(struct c
  63. crypto4xx_copy_digest_to_dst(pd_uinfo,
  64. crypto_tfm_ctx(ahash_req->base.tfm));
  65. crypto4xx_ret_sg_desc(dev, pd_uinfo);
  66. - /* call user provided callback function x */
  67. - if (ahash_req->base.complete != NULL)
  68. - ahash_req->base.complete(&ahash_req->base, 0);
  69. +
  70. + if (pd_uinfo->state & PD_ENTRY_BUSY)
  71. + ahash_request_complete(ahash_req, -EINPROGRESS);
  72. + ahash_request_complete(ahash_req, 0);
  73. return 0;
  74. }
  75. @@ -704,6 +708,7 @@ u32 crypto4xx_build_pd(struct crypto_asy
  76. struct pd_uinfo *pd_uinfo = NULL;
  77. unsigned int nbytes = datalen, idx;
  78. u32 gd_idx = 0;
  79. + bool is_busy;
  80. /* figure how many gd is needed */
  81. num_gd = sg_nents_for_len(src, datalen);
  82. @@ -734,6 +739,31 @@ u32 crypto4xx_build_pd(struct crypto_asy
  83. * already got must be return the original place.
  84. */
  85. spin_lock_irqsave(&dev->core_dev->lock, flags);
  86. + /*
  87. + * Let the caller know to slow down, once more than 13/16ths = 81%
  88. + * of the available data contexts are being used simultaneously.
  89. + *
  90. + * With PPC4XX_NUM_PD = 256, this will leave a "backlog queue" for
  91. + * 31 more contexts. Before new requests have to be rejected.
  92. + */
  93. + if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) {
  94. + is_busy = ((dev->pdr_head - dev->pdr_tail) % PPC4XX_NUM_PD) >=
  95. + ((PPC4XX_NUM_PD * 13) / 16);
  96. + } else {
  97. + /*
  98. + * To fix contention issues between ipsec (no blacklog) and
  99. + * dm-crypto (backlog) reserve 32 entries for "no backlog"
  100. + * data contexts.
  101. + */
  102. + is_busy = ((dev->pdr_head - dev->pdr_tail) % PPC4XX_NUM_PD) >=
  103. + ((PPC4XX_NUM_PD * 15) / 16);
  104. +
  105. + if (is_busy) {
  106. + spin_unlock_irqrestore(&dev->core_dev->lock, flags);
  107. + return -EBUSY;
  108. + }
  109. + }
  110. +
  111. if (num_gd) {
  112. fst_gd = crypto4xx_get_n_gd(dev, num_gd);
  113. if (fst_gd == ERING_WAS_FULL) {
  114. @@ -888,11 +918,12 @@ u32 crypto4xx_build_pd(struct crypto_asy
  115. sa->sa_command_1.bf.hash_crypto_offset = 0;
  116. pd->pd_ctl.w = ctx->pd_ctl;
  117. pd->pd_ctl_len.w = 0x00400000 | datalen;
  118. - pd_uinfo->state = PD_ENTRY_INUSE;
  119. + pd_uinfo->state = PD_ENTRY_INUSE | (is_busy ? PD_ENTRY_BUSY : 0);
  120. +
  121. wmb();
  122. /* write any value to push engine to read a pd */
  123. writel(1, dev->ce_base + CRYPTO4XX_INT_DESCR_RD);
  124. - return -EINPROGRESS;
  125. + return is_busy ? -EBUSY : -EINPROGRESS;
  126. }
  127. /**
  128. @@ -997,7 +1028,7 @@ static void crypto4xx_bh_tasklet_cb(unsi
  129. tail = core_dev->dev->pdr_tail;
  130. pd_uinfo = &core_dev->dev->pdr_uinfo[tail];
  131. pd = &core_dev->dev->pdr[tail];
  132. - if ((pd_uinfo->state == PD_ENTRY_INUSE) &&
  133. + if ((pd_uinfo->state & PD_ENTRY_INUSE) &&
  134. pd->pd_ctl.bf.pe_done &&
  135. !pd->pd_ctl.bf.host_ready) {
  136. pd->pd_ctl.bf.pe_done = 0;
  137. --- a/drivers/crypto/amcc/crypto4xx_core.h
  138. +++ b/drivers/crypto/amcc/crypto4xx_core.h
  139. @@ -44,7 +44,8 @@
  140. #define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1)
  141. #define PPC4XX_SD_BUFFER_SIZE 2048
  142. -#define PD_ENTRY_INUSE 1
  143. +#define PD_ENTRY_BUSY BIT(1)
  144. +#define PD_ENTRY_INUSE BIT(0)
  145. #define PD_ENTRY_FREE 0
  146. #define ERING_WAS_FULL 0xffffffff