mpint_i.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /*
  2. * mpint_i.h: definitions used internally by the bignum code, and
  3. * also a few other vaguely-bignum-like places.
  4. */
  5. /* ----------------------------------------------------------------------
  6. * The assorted conditional definitions of BignumInt and multiply
  7. * macros used throughout the bignum code to treat numbers as arrays
  8. * of the most conveniently sized word for the target machine.
  9. * Exported so that other code (e.g. poly1305) can use it too.
  10. *
  11. * This code must export, in whatever ifdef branch it ends up in:
  12. *
  13. * - two types: 'BignumInt' and 'BignumCarry'. BignumInt is an
  14. * unsigned integer type which will be used as the base word size
  15. * for all bignum operations. BignumCarry is an unsigned integer
  16. * type used to hold the carry flag taken as input and output by
  17. * the BignumADC macro (see below).
  18. *
  19. * - five constant macros:
  20. * + BIGNUM_INT_BITS, the number of bits in BignumInt,
  21. * + BIGNUM_INT_BYTES, the number of bytes that works out to
  22. * + BIGNUM_TOP_BIT, the BignumInt value consisting of only the top bit
  23. * + BIGNUM_INT_MASK, the BignumInt value with all bits set
  24. * + BIGNUM_INT_BITS_BITS, log to the base 2 of BIGNUM_INT_BITS.
  25. *
  26. * - four statement macros: BignumADC, BignumMUL, BignumMULADD,
  27. * BignumMULADD2. These do various kinds of multi-word arithmetic,
  28. * and all produce two output values.
  29. * * BignumADC(ret,retc,a,b,c) takes input BignumInt values a,b
  30. * and a BignumCarry c, and outputs a BignumInt ret = a+b+c and
  31. * a BignumCarry retc which is the carry off the top of that
  32. * addition.
  33. * * BignumMUL(rh,rl,a,b) returns the two halves of the
  34. * double-width product a*b.
  35. * * BignumMULADD(rh,rl,a,b,addend) returns the two halves of the
  36. * double-width value a*b + addend.
  37. * * BignumMULADD2(rh,rl,a,b,addend1,addend2) returns the two
  38. * halves of the double-width value a*b + addend1 + addend2.
  39. *
  40. * Every branch of the main ifdef below defines the type BignumInt and
  41. * the value BIGNUM_INT_BITS_BITS. The other constant macros are
  42. * filled in by common code further down.
  43. *
  44. * Most branches also define a macro DEFINE_BIGNUMDBLINT containing a
  45. * typedef statement which declares a type _twice_ the length of a
  46. * BignumInt. This causes the common code further down to produce a
  47. * default implementation of the four statement macros in terms of
  48. * that double-width type, and also to defined BignumCarry to be
  49. * BignumInt.
  50. *
  51. * However, if a particular compile target does not have a type twice
  52. * the length of the BignumInt you want to use but it does provide
  53. * some alternative means of doing add-with-carry and double-word
  54. * multiply, then the ifdef branch in question can just define
  55. * BignumCarry and the four statement macros itself, and that's fine
  56. * too.
  57. */
  58. /* You can lower the BignumInt size by defining BIGNUM_OVERRIDE on the
  59. * command line to be your chosen max value of BIGNUM_INT_BITS_BITS */
  60. #define BB_OK(b) (!defined BIGNUM_OVERRIDE || BIGNUM_OVERRIDE >= b)
  61. #if defined __SIZEOF_INT128__ && BB_OK(6)
  62. /*
  63. * 64-bit BignumInt using gcc/clang style 128-bit BignumDblInt.
  64. *
  65. * gcc and clang both provide a __uint128_t type on 64-bit targets
  66. * (and, when they do, indicate its presence by the above macro),
  67. * using the same 'two machine registers' kind of code generation
  68. * that 32-bit targets use for 64-bit ints.
  69. */
  70. typedef unsigned long long BignumInt;
  71. #define BIGNUM_INT_BITS_BITS 6
  72. #define DEFINE_BIGNUMDBLINT typedef __uint128_t BignumDblInt
  73. #elif defined _MSC_VER && defined _M_AMD64 && BB_OK(6)
  74. /*
  75. * 64-bit BignumInt, using Visual Studio x86-64 compiler intrinsics.
  76. *
  77. * 64-bit Visual Studio doesn't provide very much in the way of help
  78. * here: there's no int128 type, and also no inline assembler giving
  79. * us direct access to the x86-64 MUL or ADC instructions. However,
  80. * there are compiler intrinsics giving us that access, so we can
  81. * use those - though it turns out we have to be a little careful,
  82. * since they seem to generate wrong code if their pointer-typed
  83. * output parameters alias their inputs. Hence all the internal temp
  84. * variables inside the macros.
  85. */
  86. #include <intrin.h>
  87. typedef unsigned char BignumCarry; /* the type _addcarry_u64 likes to use */
  88. typedef unsigned __int64 BignumInt;
  89. #define BIGNUM_INT_BITS_BITS 6
  90. #define BignumADC(ret, retc, a, b, c) do \
  91. { \
  92. BignumInt ADC_tmp; \
  93. (retc) = _addcarry_u64(c, a, b, &ADC_tmp); \
  94. (ret) = ADC_tmp; \
  95. } while (0)
  96. #define BignumMUL(rh, rl, a, b) do \
  97. { \
  98. BignumInt MULADD_hi; \
  99. (rl) = _umul128(a, b, &MULADD_hi); \
  100. (rh) = MULADD_hi; \
  101. } while (0)
  102. #define BignumMULADD(rh, rl, a, b, addend) do \
  103. { \
  104. BignumInt MULADD_lo, MULADD_hi; \
  105. MULADD_lo = _umul128(a, b, &MULADD_hi); \
  106. MULADD_hi += _addcarry_u64(0, MULADD_lo, (addend), &(rl)); \
  107. (rh) = MULADD_hi; \
  108. } while (0)
  109. #define BignumMULADD2(rh, rl, a, b, addend1, addend2) do \
  110. { \
  111. BignumInt MULADD_lo1, MULADD_lo2, MULADD_hi; \
  112. MULADD_lo1 = _umul128(a, b, &MULADD_hi); \
  113. MULADD_hi += _addcarry_u64(0, MULADD_lo1, (addend1), &MULADD_lo2); \
  114. MULADD_hi += _addcarry_u64(0, MULADD_lo2, (addend2), &(rl)); \
  115. (rh) = MULADD_hi; \
  116. } while (0)
  117. #elif (defined __GNUC__ || defined _LLP64 || __STDC__ >= 199901L) && BB_OK(5)
  118. /* 32-bit BignumInt, using C99 unsigned long long as BignumDblInt */
  119. typedef unsigned int BignumInt;
  120. #define BIGNUM_INT_BITS_BITS 5
  121. #define DEFINE_BIGNUMDBLINT typedef unsigned long long BignumDblInt
  122. #elif defined _MSC_VER && BB_OK(5) || defined(MPEXT)
  123. /* 32-bit BignumInt, using Visual Studio __int64 as BignumDblInt */
  124. typedef unsigned int BignumInt;
  125. #define BIGNUM_INT_BITS_BITS 5
  126. #define DEFINE_BIGNUMDBLINT typedef unsigned __int64 BignumDblInt
  127. #elif defined _LP64 && BB_OK(5)
  128. /*
  129. * 32-bit BignumInt, using unsigned long itself as BignumDblInt.
  130. *
  131. * Only for platforms where long is 64 bits, of course.
  132. */
  133. typedef unsigned int BignumInt;
  134. #define BIGNUM_INT_BITS_BITS 5
  135. #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
  136. #elif BB_OK(4)
  137. /*
  138. * 16-bit BignumInt, using unsigned long as BignumDblInt.
  139. *
  140. * This is the final fallback for real emergencies: C89 guarantees
  141. * unsigned short/long to be at least the required sizes, so this
  142. * should work on any C implementation at all. But it'll be
  143. * noticeably slow, so if you find yourself in this case you
  144. * probably want to move heaven and earth to find an alternative!
  145. */
  146. typedef unsigned short BignumInt;
  147. #define BIGNUM_INT_BITS_BITS 4
  148. #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
  149. #else
  150. /* Should only get here if BB_OK(4) evaluated false, i.e. the
  151. * command line defined BIGNUM_OVERRIDE to an absurdly small
  152. * value. */
  153. #error Must define BIGNUM_OVERRIDE to at least 4
  154. #endif
  155. #undef BB_OK
  156. /*
  157. * Common code across all branches of that ifdef: define all the
  158. * easy constant macros in terms of BIGNUM_INT_BITS_BITS.
  159. */
  160. #define BIGNUM_INT_BITS (1 << BIGNUM_INT_BITS_BITS)
  161. #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
  162. #define BIGNUM_TOP_BIT (((BignumInt)1) << (BIGNUM_INT_BITS-1))
  163. #define BIGNUM_INT_MASK (BIGNUM_TOP_BIT | (BIGNUM_TOP_BIT-1))
  164. /*
  165. * Just occasionally, we might need a GET_nnBIT_xSB_FIRST macro to
  166. * operate on whatever BignumInt is.
  167. */
  168. #if BIGNUM_INT_BITS_BITS == 4
  169. #define GET_BIGNUMINT_MSB_FIRST GET_16BIT_MSB_FIRST
  170. #define GET_BIGNUMINT_LSB_FIRST GET_16BIT_LSB_FIRST
  171. #define PUT_BIGNUMINT_MSB_FIRST PUT_16BIT_MSB_FIRST
  172. #define PUT_BIGNUMINT_LSB_FIRST PUT_16BIT_LSB_FIRST
  173. #elif BIGNUM_INT_BITS_BITS == 5
  174. #define GET_BIGNUMINT_MSB_FIRST GET_32BIT_MSB_FIRST
  175. #define GET_BIGNUMINT_LSB_FIRST GET_32BIT_LSB_FIRST
  176. #define PUT_BIGNUMINT_MSB_FIRST PUT_32BIT_MSB_FIRST
  177. #define PUT_BIGNUMINT_LSB_FIRST PUT_32BIT_LSB_FIRST
  178. #elif BIGNUM_INT_BITS_BITS == 6
  179. #define GET_BIGNUMINT_MSB_FIRST GET_64BIT_MSB_FIRST
  180. #define GET_BIGNUMINT_LSB_FIRST GET_64BIT_LSB_FIRST
  181. #define PUT_BIGNUMINT_MSB_FIRST PUT_64BIT_MSB_FIRST
  182. #define PUT_BIGNUMINT_LSB_FIRST PUT_64BIT_LSB_FIRST
  183. #else
  184. #error Ran out of options for GET_BIGNUMINT_xSB_FIRST
  185. #endif
  186. /*
  187. * Common code across _most_ branches of the ifdef: define a set of
  188. * statement macros in terms of the BignumDblInt type provided. In
  189. * this case, we also define BignumCarry to be the same thing as
  190. * BignumInt, for simplicity.
  191. */
  192. #ifdef DEFINE_BIGNUMDBLINT
  193. typedef BignumInt BignumCarry;
  194. #define BignumADC(ret, retc, a, b, c) do \
  195. { \
  196. DEFINE_BIGNUMDBLINT; \
  197. BignumDblInt ADC_temp = (BignumInt)(a); \
  198. ADC_temp += (BignumInt)(b); \
  199. ADC_temp += (c); \
  200. (ret) = (BignumInt)ADC_temp; \
  201. (retc) = (BignumCarry)(ADC_temp >> BIGNUM_INT_BITS); \
  202. } while (0)
  203. #define BignumMUL(rh, rl, a, b) do \
  204. { \
  205. DEFINE_BIGNUMDBLINT; \
  206. BignumDblInt MUL_temp = (BignumInt)(a); \
  207. MUL_temp *= (BignumInt)(b); \
  208. (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
  209. (rl) = (BignumInt)(MUL_temp); \
  210. } while (0)
  211. #define BignumMULADD(rh, rl, a, b, addend) do \
  212. { \
  213. DEFINE_BIGNUMDBLINT; \
  214. BignumDblInt MUL_temp = (BignumInt)(a); \
  215. MUL_temp *= (BignumInt)(b); \
  216. MUL_temp += (BignumInt)(addend); \
  217. (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
  218. (rl) = (BignumInt)(MUL_temp); \
  219. } while (0)
  220. #define BignumMULADD2(rh, rl, a, b, addend1, addend2) do \
  221. { \
  222. DEFINE_BIGNUMDBLINT; \
  223. BignumDblInt MUL_temp = (BignumInt)(a); \
  224. MUL_temp *= (BignumInt)(b); \
  225. MUL_temp += (BignumInt)(addend1); \
  226. MUL_temp += (BignumInt)(addend2); \
  227. (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
  228. (rl) = (BignumInt)(MUL_temp); \
  229. } while (0)
  230. #endif /* DEFINE_BIGNUMDBLINT */
  231. /* ----------------------------------------------------------------------
  232. * Data structures used inside bignum.c.
  233. */
  234. struct mp_int {
  235. size_t nw;
  236. BignumInt *w;
  237. };
  238. struct MontyContext {
  239. /*
  240. * The actual modulus.
  241. */
  242. mp_int *m;
  243. /*
  244. * Montgomery multiplication works by selecting a value r > m,
  245. * coprime to m, which is really easy to divide by. In binary
  246. * arithmetic, that means making it a power of 2; in fact we make
  247. * it a whole number of BignumInt.
  248. *
  249. * We don't store r directly as an mp_int (there's no need). But
  250. * its value is 2^rbits; we also store rw = rbits/BIGNUM_INT_BITS
  251. * (the corresponding word offset within an mp_int).
  252. *
  253. * pw is the number of words needed to store an mp_int you're
  254. * doing reduction on: it has to be big enough to hold the sum of
  255. * an input value up to m^2 plus an extra addend up to m*r.
  256. */
  257. size_t rbits, rw, pw;
  258. /*
  259. * The key step in Montgomery reduction requires the inverse of -m
  260. * mod r.
  261. */
  262. mp_int *minus_minv_mod_r;
  263. /*
  264. * r^1, r^2 and r^3 mod m, which are used for various purposes.
  265. *
  266. * (Annoyingly, this is one of the rare cases where it would have
  267. * been nicer to have a Pascal-style 1-indexed array. I couldn't
  268. * _quite_ bring myself to put a gratuitous zero element in here.
  269. * So you just have to live with getting r^k by taking the [k-1]th
  270. * element of this array.)
  271. */
  272. mp_int *powers_of_r_mod_m[3];
  273. /*
  274. * Persistent scratch space from which monty_* functions can
  275. * allocate storage for intermediate values.
  276. */
  277. mp_int *scratch;
  278. };