ml_dsa_ntt.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /*
  2. * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.
  3. *
  4. * Licensed under the Apache License 2.0 (the "License"). You may not use
  5. * this file except in compliance with the License. You can obtain a copy
  6. * in the file LICENSE in the source distribution or at
  7. * https://www.openssl.org/source/license.html
  8. */
  9. #include "ml_dsa_local.h"
  10. #include "ml_dsa_poly.h"
  11. /*
  12. * This file has multiple parts required for fast matrix multiplication,
  13. * 1) NTT (See https://eprint.iacr.org/2024/585.pdf)
  14. * NTT and NTT inverse transformations are Discrete Fourier Transforms in a
  15. * polynomial ring. Fast-Fourier Transformations can then be applied to make
  16. * multiplications n log(n). This uses the symmetry of the transformation to
  17. * reduce computations.
  18. *
  19. * 2) Montgomery multiplication
  20. * The multiplication of a.b mod q requires division by q which is a slow operation.
  21. *
  22. * When many multiplications mod q are required montgomery multiplication
  23. * can be used. This requires a number R > q such that R & q are coprime
  24. * (i.e. GCD(R, q) = 1), so that division happens using R instead of q.
  25. * If r is a power of 2 then this division can be done as a bit shift.
  26. *
  27. * Given that q = 2^23 - 2^13 + 1
  28. * We can chose a Montgomery multiplier of R = 2^32.
  29. *
  30. * To transform |a| into Montgomery form |m| we use
  31. * m = a mod q * ((2^32)*(2^32) mod q)
  32. * which is then Montgomery reduced, removing the excess factor of R = 2^32.
  33. */
  34. /*
  35. * The table in FIPS 204 Appendix B uses the following formula
  36. * zeta[k]= 1753^bitrev(k) mod q for (k = 1..255) (The first value is not used).
  37. *
  38. * As this implementation uses montgomery form with a multiplier of 2^32.
  39. * The values need to be transformed i.e.
  40. *
  41. * zetasMontgomery[k] = reduce_montgomery(zeta[k] * (2^32 * 2^32 mod(q)))
  42. * reduce_montgomery() is defined below..
  43. */
  44. static const uint32_t zetas_montgomery[256] = {
  45. 4193792, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468,
  46. 1826347, 2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103,
  47. 2725464, 1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868,
  48. 6262231, 4520680, 6980856, 5102745, 1757237, 8360995, 4010497, 280005,
  49. 2706023, 95776, 3077325, 3530437, 6718724, 4788269, 5842901, 3915439,
  50. 4519302, 5336701, 3574422, 5512770, 3539968, 8079950, 2348700, 7841118,
  51. 6681150, 6736599, 3505694, 4558682, 3507263, 6239768, 6779997, 3699596,
  52. 811944, 531354, 954230, 3881043, 3900724, 5823537, 2071892, 5582638,
  53. 4450022, 6851714, 4702672, 5339162, 6927966, 3475950, 2176455, 6795196,
  54. 7122806, 1939314, 4296819, 7380215, 5190273, 5223087, 4747489, 126922,
  55. 3412210, 7396998, 2147896, 2715295, 5412772, 4686924, 7969390, 5903370,
  56. 7709315, 7151892, 8357436, 7072248, 7998430, 1349076, 1852771, 6949987,
  57. 5037034, 264944, 508951, 3097992, 44288, 7280319, 904516, 3958618,
  58. 4656075, 8371839, 1653064, 5130689, 2389356, 8169440, 759969, 7063561,
  59. 189548, 4827145, 3159746, 6529015, 5971092, 8202977, 1315589, 1341330,
  60. 1285669, 6795489, 7567685, 6940675, 5361315, 4499357, 4751448, 3839961,
  61. 2091667, 3407706, 2316500, 3817976, 5037939, 2244091, 5933984, 4817955,
  62. 266997, 2434439, 7144689, 3513181, 4860065, 4621053, 7183191, 5187039,
  63. 900702, 1859098, 909542, 819034, 495491, 6767243, 8337157, 7857917,
  64. 7725090, 5257975, 2031748, 3207046, 4823422, 7855319, 7611795, 4784579,
  65. 342297, 286988, 5942594, 4108315, 3437287, 5038140, 1735879, 203044,
  66. 2842341, 2691481, 5790267, 1265009, 4055324, 1247620, 2486353, 1595974,
  67. 4613401, 1250494, 2635921, 4832145, 5386378, 1869119, 1903435, 7329447,
  68. 7047359, 1237275, 5062207, 6950192, 7929317, 1312455, 3306115, 6417775,
  69. 7100756, 1917081, 5834105, 7005614, 1500165, 777191, 2235880, 3406031,
  70. 7838005, 5548557, 6709241, 6533464, 5796124, 4656147, 594136, 4603424,
  71. 6366809, 2432395, 2454455, 8215696, 1957272, 3369112, 185531, 7173032,
  72. 5196991, 162844, 1616392, 3014001, 810149, 1652634, 4686184, 6581310,
  73. 5341501, 3523897, 3866901, 269760, 2213111, 7404533, 1717735, 472078,
  74. 7953734, 1723600, 6577327, 1910376, 6712985, 7276084, 8119771, 4546524,
  75. 5441381, 6144432, 7959518, 6094090, 183443, 7403526, 1612842, 4834730,
  76. 7826001, 3919660, 8332111, 7018208, 3937738, 1400424, 7534263, 1976782
  77. };
  78. /*
  79. * @brief When multiplying 2 numbers mod q that are in montgomery form, the
  80. * product mod q needs to be multiplied by 2^-32 to be in montgomery form.
  81. * See FIPS 204, Algorithm 49, MontgomeryReduce()
  82. * Note it is slightly different due to the input range being positive
  83. *
  84. * @param a is the result of a multiply of 2 numbers in montgomery form,
  85. * in the range 0...(2^32)*q
  86. * @returns The Montgomery form of 'a' with multiplier 2^32 in the range 0..q-1
  87. * The result is congruent to x * 2^-32 mod q
  88. */
  89. static uint32_t reduce_montgomery(uint64_t a)
  90. {
  91. uint64_t t = (uint32_t)a * (uint32_t)ML_DSA_Q_NEG_INV; /* a * -qinv */
  92. uint64_t b = a + t * ML_DSA_Q; /* a - t * q */
  93. uint32_t c = b >> 32; /* /2^32 = 0..2q */
  94. return reduce_once(c); /* 0..q */
  95. }
  96. /*
  97. * @brief Multiply two polynomials in the number theoretically transformed state.
  98. * See FIPS 204, Algorithm 45, MultiplyNTT()
  99. * This function has been modified to use montgomery multiplication
  100. *
  101. * @param lhs A polynomial multiplicand
  102. * @param rhs A polynomial multiplier
  103. * @param out The returned result of the polynomial multiply
  104. */
  105. void ossl_ml_dsa_poly_ntt_mult(const POLY *lhs, const POLY *rhs, POLY *out)
  106. {
  107. int i;
  108. for (i = 0; i < ML_DSA_NUM_POLY_COEFFICIENTS; i++)
  109. out->coeff[i] =
  110. reduce_montgomery((uint64_t)lhs->coeff[i] * (uint64_t)rhs->coeff[i]);
  111. }
  112. /*
  113. * In place number theoretic transform of a given polynomial.
  114. *
  115. * See FIPS 204, Algorithm 41, NTT()
  116. * This function uses montgomery multiplication.
  117. *
  118. * @param p a polynomial that is used as the input, that is replaced with
  119. * the NTT of the polynomial
  120. */
  121. void ossl_ml_dsa_poly_ntt(POLY *p)
  122. {
  123. int i, j, k;
  124. int step;
  125. int offset = ML_DSA_NUM_POLY_COEFFICIENTS;
  126. /* Step: 1, 2, 4, 8, ..., 128 */
  127. for (step = 1; step < ML_DSA_NUM_POLY_COEFFICIENTS; step <<= 1) {
  128. k = 0;
  129. offset >>= 1; /* Offset: 128, 64, 32, 16, ..., 1 */
  130. for (i = 0; i < step; i++) {
  131. const uint32_t z_step_root = zetas_montgomery[step + i];
  132. for (j = k; j < k + offset; j++) {
  133. uint32_t w_even = p->coeff[j];
  134. uint32_t t_odd =
  135. reduce_montgomery((uint64_t)z_step_root
  136. * (uint64_t)p->coeff[j + offset]);
  137. p->coeff[j] = reduce_once(w_even + t_odd);
  138. p->coeff[j + offset] = mod_sub(w_even, t_odd);
  139. }
  140. k += 2 * offset;
  141. }
  142. }
  143. }
  144. /*
  145. * @brief In place inverse number theoretic transform of a given polynomial.
  146. * See FIPS 204, Algorithm 42, NTT^-1()
  147. *
  148. * @param p a polynomial that is used as the input, that is overwritten with
  149. * the inverse of the NTT.
  150. */
  151. void ossl_ml_dsa_poly_ntt_inverse(POLY *p)
  152. {
  153. /*
  154. * Step: 128, 64, 32, 16, ..., 1
  155. * Offset: 1, 2, 4, 8, ..., 128
  156. */
  157. int i, j, k, offset, step = ML_DSA_NUM_POLY_COEFFICIENTS;
  158. /*
  159. * The multiplicative inverse of 256 mod q, in Montgomery form is
  160. * ((256^-1 mod q) * ((2^32 * 2^32) mod q)) mod q = (8347681 * 2365951) mod 8380417
  161. */
  162. static const uint32_t inverse_degree_montgomery = 41978;
  163. for (offset = 1; offset < ML_DSA_NUM_POLY_COEFFICIENTS; offset <<= 1) {
  164. step >>= 1;
  165. k = 0;
  166. for (i = 0; i < step; i++) {
  167. const uint32_t step_root =
  168. ML_DSA_Q - zetas_montgomery[step + (step - 1 - i)];
  169. for (j = k; j < k + offset; j++) {
  170. uint32_t even = p->coeff[j];
  171. uint32_t odd = p->coeff[j + offset];
  172. p->coeff[j] = reduce_once(odd + even);
  173. p->coeff[j + offset] =
  174. reduce_montgomery((uint64_t)step_root
  175. * (uint64_t)(ML_DSA_Q + even - odd));
  176. }
  177. k += 2 * offset;
  178. }
  179. }
  180. for (i = 0; i < ML_DSA_NUM_POLY_COEFFICIENTS; i++)
  181. p->coeff[i] = reduce_montgomery((uint64_t)p->coeff[i] *
  182. (uint64_t)inverse_degree_montgomery);
  183. }