bn_exp.c 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395
  1. /* crypto/bn/bn_exp.c */
  2. /* Copyright (C) 1995-1998 Eric Young ([email protected])
  3. * All rights reserved.
  4. *
  5. * This package is an SSL implementation written
  6. * by Eric Young ([email protected]).
  7. * The implementation was written so as to conform with Netscapes SSL.
  8. *
  9. * This library is free for commercial and non-commercial use as long as
  10. * the following conditions are aheared to. The following conditions
  11. * apply to all code found in this distribution, be it the RC4, RSA,
  12. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  13. * included with this distribution is covered by the same copyright terms
  14. * except that the holder is Tim Hudson ([email protected]).
  15. *
  16. * Copyright remains Eric Young's, and as such any Copyright notices in
  17. * the code are not to be removed.
  18. * If this package is used in a product, Eric Young should be given attribution
  19. * as the author of the parts of the library used.
  20. * This can be in the form of a textual message at program startup or
  21. * in documentation (online or textual) provided with the package.
  22. *
  23. * Redistribution and use in source and binary forms, with or without
  24. * modification, are permitted provided that the following conditions
  25. * are met:
  26. * 1. Redistributions of source code must retain the copyright
  27. * notice, this list of conditions and the following disclaimer.
  28. * 2. Redistributions in binary form must reproduce the above copyright
  29. * notice, this list of conditions and the following disclaimer in the
  30. * documentation and/or other materials provided with the distribution.
  31. * 3. All advertising materials mentioning features or use of this software
  32. * must display the following acknowledgement:
  33. * "This product includes cryptographic software written by
  34. * Eric Young ([email protected])"
  35. * The word 'cryptographic' can be left out if the rouines from the library
  36. * being used are not cryptographic related :-).
  37. * 4. If you include any Windows specific code (or a derivative thereof) from
  38. * the apps directory (application code) you must include an acknowledgement:
  39. * "This product includes software written by Tim Hudson ([email protected])"
  40. *
  41. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  42. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  43. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  44. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  45. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  46. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  47. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  48. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  49. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  50. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  51. * SUCH DAMAGE.
  52. *
  53. * The licence and distribution terms for any publically available version or
  54. * derivative of this code cannot be changed. i.e. this code cannot simply be
  55. * copied and put under another distribution licence
  56. * [including the GNU Public Licence.]
  57. */
  58. /* ====================================================================
  59. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  60. *
  61. * Redistribution and use in source and binary forms, with or without
  62. * modification, are permitted provided that the following conditions
  63. * are met:
  64. *
  65. * 1. Redistributions of source code must retain the above copyright
  66. * notice, this list of conditions and the following disclaimer.
  67. *
  68. * 2. Redistributions in binary form must reproduce the above copyright
  69. * notice, this list of conditions and the following disclaimer in
  70. * the documentation and/or other materials provided with the
  71. * distribution.
  72. *
  73. * 3. All advertising materials mentioning features or use of this
  74. * software must display the following acknowledgment:
  75. * "This product includes software developed by the OpenSSL Project
  76. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  77. *
  78. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  79. * endorse or promote products derived from this software without
  80. * prior written permission. For written permission, please contact
  81. * [email protected].
  82. *
  83. * 5. Products derived from this software may not be called "OpenSSL"
  84. * nor may "OpenSSL" appear in their names without prior written
  85. * permission of the OpenSSL Project.
  86. *
  87. * 6. Redistributions of any form whatsoever must retain the following
  88. * acknowledgment:
  89. * "This product includes software developed by the OpenSSL Project
  90. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  91. *
  92. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  93. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  94. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  95. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  96. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  97. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  98. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  99. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  100. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  101. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  102. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  103. * OF THE POSSIBILITY OF SUCH DAMAGE.
  104. * ====================================================================
  105. *
  106. * This product includes cryptographic software written by Eric Young
  107. * ([email protected]). This product includes software written by Tim
  108. * Hudson ([email protected]).
  109. *
  110. */
  111. #include "cryptlib.h"
  112. #include "bn_lcl.h"
  113. #include <stdlib.h>
  114. #ifdef _WIN32
  115. # include <malloc.h>
  116. # ifndef alloca
  117. # define alloca _alloca
  118. # endif
  119. #elif defined(__GNUC__)
  120. # ifndef alloca
  121. # define alloca(s) __builtin_alloca((s))
  122. # endif
  123. #elif defined(__sun)
  124. # include <alloca.h>
  125. #endif
  126. #include "rsaz_exp.h"
  127. #undef SPARC_T4_MONT
  128. #if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
  129. # include "sparc_arch.h"
  130. extern unsigned int OPENSSL_sparcv9cap_P[];
  131. # define SPARC_T4_MONT
  132. #endif
  133. /* maximum precomputation table size for *variable* sliding windows */
  134. #define TABLE_SIZE 32
  135. /* this one works - simple but works */
  136. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
  137. {
  138. int i, bits, ret = 0;
  139. BIGNUM *v, *rr;
  140. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  141. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  142. BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  143. return -1;
  144. }
  145. BN_CTX_start(ctx);
  146. if ((r == a) || (r == p))
  147. rr = BN_CTX_get(ctx);
  148. else
  149. rr = r;
  150. v = BN_CTX_get(ctx);
  151. if (rr == NULL || v == NULL)
  152. goto err;
  153. if (BN_copy(v, a) == NULL)
  154. goto err;
  155. bits = BN_num_bits(p);
  156. if (BN_is_odd(p)) {
  157. if (BN_copy(rr, a) == NULL)
  158. goto err;
  159. } else {
  160. if (!BN_one(rr))
  161. goto err;
  162. }
  163. for (i = 1; i < bits; i++) {
  164. if (!BN_sqr(v, v, ctx))
  165. goto err;
  166. if (BN_is_bit_set(p, i)) {
  167. if (!BN_mul(rr, rr, v, ctx))
  168. goto err;
  169. }
  170. }
  171. if (r != rr)
  172. BN_copy(r, rr);
  173. ret = 1;
  174. err:
  175. BN_CTX_end(ctx);
  176. bn_check_top(r);
  177. return (ret);
  178. }
  179. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  180. BN_CTX *ctx)
  181. {
  182. int ret;
  183. bn_check_top(a);
  184. bn_check_top(p);
  185. bn_check_top(m);
  186. /*-
  187. * For even modulus m = 2^k*m_odd, it might make sense to compute
  188. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  189. * exponentiation for the odd part), using appropriate exponent
  190. * reductions, and combine the results using the CRT.
  191. *
  192. * For now, we use Montgomery only if the modulus is odd; otherwise,
  193. * exponentiation using the reciprocal-based quick remaindering
  194. * algorithm is used.
  195. *
  196. * (Timing obtained with expspeed.c [computations a^p mod m
  197. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  198. * 4096, 8192 bits], compared to the running time of the
  199. * standard algorithm:
  200. *
  201. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  202. * 55 .. 77 % [UltraSparc processor, but
  203. * debug-solaris-sparcv8-gcc conf.]
  204. *
  205. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  206. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  207. *
  208. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  209. * at 2048 and more bits, but at 512 and 1024 bits, it was
  210. * slower even than the standard algorithm!
  211. *
  212. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  213. * should be obtained when the new Montgomery reduction code
  214. * has been integrated into OpenSSL.)
  215. */
  216. #define MONT_MUL_MOD
  217. #define MONT_EXP_WORD
  218. #define RECP_MUL_MOD
  219. #ifdef MONT_MUL_MOD
  220. /*
  221. * I have finally been able to take out this pre-condition of the top bit
  222. * being set. It was caused by an error in BN_div with negatives. There
  223. * was also another problem when for a^b%m a >= m. eay 07-May-97
  224. */
  225. /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
  226. if (BN_is_odd(m)) {
  227. # ifdef MONT_EXP_WORD
  228. if (a->top == 1 && !a->neg
  229. && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
  230. BN_ULONG A = a->d[0];
  231. ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  232. } else
  233. # endif
  234. ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  235. } else
  236. #endif
  237. #ifdef RECP_MUL_MOD
  238. {
  239. ret = BN_mod_exp_recp(r, a, p, m, ctx);
  240. }
  241. #else
  242. {
  243. ret = BN_mod_exp_simple(r, a, p, m, ctx);
  244. }
  245. #endif
  246. bn_check_top(r);
  247. return (ret);
  248. }
  249. int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  250. const BIGNUM *m, BN_CTX *ctx)
  251. {
  252. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  253. int start = 1;
  254. BIGNUM *aa;
  255. /* Table of variables obtained from 'ctx' */
  256. BIGNUM *val[TABLE_SIZE];
  257. BN_RECP_CTX recp;
  258. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  259. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  260. BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  261. return -1;
  262. }
  263. bits = BN_num_bits(p);
  264. if (bits == 0) {
  265. ret = BN_one(r);
  266. return ret;
  267. }
  268. BN_CTX_start(ctx);
  269. aa = BN_CTX_get(ctx);
  270. val[0] = BN_CTX_get(ctx);
  271. if (!aa || !val[0])
  272. goto err;
  273. BN_RECP_CTX_init(&recp);
  274. if (m->neg) {
  275. /* ignore sign of 'm' */
  276. if (!BN_copy(aa, m))
  277. goto err;
  278. aa->neg = 0;
  279. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
  280. goto err;
  281. } else {
  282. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
  283. goto err;
  284. }
  285. if (!BN_nnmod(val[0], a, m, ctx))
  286. goto err; /* 1 */
  287. if (BN_is_zero(val[0])) {
  288. BN_zero(r);
  289. ret = 1;
  290. goto err;
  291. }
  292. window = BN_window_bits_for_exponent_size(bits);
  293. if (window > 1) {
  294. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
  295. goto err; /* 2 */
  296. j = 1 << (window - 1);
  297. for (i = 1; i < j; i++) {
  298. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  299. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
  300. goto err;
  301. }
  302. }
  303. start = 1; /* This is used to avoid multiplication etc
  304. * when there is only the value '1' in the
  305. * buffer. */
  306. wvalue = 0; /* The 'value' of the window */
  307. wstart = bits - 1; /* The top bit of the window */
  308. wend = 0; /* The bottom bit of the window */
  309. if (!BN_one(r))
  310. goto err;
  311. for (;;) {
  312. if (BN_is_bit_set(p, wstart) == 0) {
  313. if (!start)
  314. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
  315. goto err;
  316. if (wstart == 0)
  317. break;
  318. wstart--;
  319. continue;
  320. }
  321. /*
  322. * We now have wstart on a 'set' bit, we now need to work out how bit
  323. * a window to do. To do this we need to scan forward until the last
  324. * set bit before the end of the window
  325. */
  326. j = wstart;
  327. wvalue = 1;
  328. wend = 0;
  329. for (i = 1; i < window; i++) {
  330. if (wstart - i < 0)
  331. break;
  332. if (BN_is_bit_set(p, wstart - i)) {
  333. wvalue <<= (i - wend);
  334. wvalue |= 1;
  335. wend = i;
  336. }
  337. }
  338. /* wend is the size of the current window */
  339. j = wend + 1;
  340. /* add the 'bytes above' */
  341. if (!start)
  342. for (i = 0; i < j; i++) {
  343. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
  344. goto err;
  345. }
  346. /* wvalue will be an odd number < 2^window */
  347. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
  348. goto err;
  349. /* move the 'window' down further */
  350. wstart -= wend + 1;
  351. wvalue = 0;
  352. start = 0;
  353. if (wstart < 0)
  354. break;
  355. }
  356. ret = 1;
  357. err:
  358. BN_CTX_end(ctx);
  359. BN_RECP_CTX_free(&recp);
  360. bn_check_top(r);
  361. return (ret);
  362. }
  363. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  364. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  365. {
  366. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  367. int start = 1;
  368. BIGNUM *d, *r;
  369. const BIGNUM *aa;
  370. /* Table of variables obtained from 'ctx' */
  371. BIGNUM *val[TABLE_SIZE];
  372. BN_MONT_CTX *mont = NULL;
  373. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  374. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
  375. }
  376. bn_check_top(a);
  377. bn_check_top(p);
  378. bn_check_top(m);
  379. if (!BN_is_odd(m)) {
  380. BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
  381. return (0);
  382. }
  383. bits = BN_num_bits(p);
  384. if (bits == 0) {
  385. ret = BN_one(rr);
  386. return ret;
  387. }
  388. BN_CTX_start(ctx);
  389. d = BN_CTX_get(ctx);
  390. r = BN_CTX_get(ctx);
  391. val[0] = BN_CTX_get(ctx);
  392. if (!d || !r || !val[0])
  393. goto err;
  394. /*
  395. * If this is not done, things will break in the montgomery part
  396. */
  397. if (in_mont != NULL)
  398. mont = in_mont;
  399. else {
  400. if ((mont = BN_MONT_CTX_new()) == NULL)
  401. goto err;
  402. if (!BN_MONT_CTX_set(mont, m, ctx))
  403. goto err;
  404. }
  405. if (a->neg || BN_ucmp(a, m) >= 0) {
  406. if (!BN_nnmod(val[0], a, m, ctx))
  407. goto err;
  408. aa = val[0];
  409. } else
  410. aa = a;
  411. if (BN_is_zero(aa)) {
  412. BN_zero(rr);
  413. ret = 1;
  414. goto err;
  415. }
  416. if (!BN_to_montgomery(val[0], aa, mont, ctx))
  417. goto err; /* 1 */
  418. window = BN_window_bits_for_exponent_size(bits);
  419. if (window > 1) {
  420. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
  421. goto err; /* 2 */
  422. j = 1 << (window - 1);
  423. for (i = 1; i < j; i++) {
  424. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  425. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
  426. goto err;
  427. }
  428. }
  429. start = 1; /* This is used to avoid multiplication etc
  430. * when there is only the value '1' in the
  431. * buffer. */
  432. wvalue = 0; /* The 'value' of the window */
  433. wstart = bits - 1; /* The top bit of the window */
  434. wend = 0; /* The bottom bit of the window */
  435. #if 1 /* by Shay Gueron's suggestion */
  436. j = m->top; /* borrow j */
  437. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  438. if (bn_wexpand(r, j) == NULL)
  439. goto err;
  440. /* 2^(top*BN_BITS2) - m */
  441. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  442. for (i = 1; i < j; i++)
  443. r->d[i] = (~m->d[i]) & BN_MASK2;
  444. r->top = j;
  445. /*
  446. * Upper words will be zero if the corresponding words of 'm' were
  447. * 0xfff[...], so decrement r->top accordingly.
  448. */
  449. bn_correct_top(r);
  450. } else
  451. #endif
  452. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
  453. goto err;
  454. for (;;) {
  455. if (BN_is_bit_set(p, wstart) == 0) {
  456. if (!start) {
  457. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  458. goto err;
  459. }
  460. if (wstart == 0)
  461. break;
  462. wstart--;
  463. continue;
  464. }
  465. /*
  466. * We now have wstart on a 'set' bit, we now need to work out how bit
  467. * a window to do. To do this we need to scan forward until the last
  468. * set bit before the end of the window
  469. */
  470. j = wstart;
  471. wvalue = 1;
  472. wend = 0;
  473. for (i = 1; i < window; i++) {
  474. if (wstart - i < 0)
  475. break;
  476. if (BN_is_bit_set(p, wstart - i)) {
  477. wvalue <<= (i - wend);
  478. wvalue |= 1;
  479. wend = i;
  480. }
  481. }
  482. /* wend is the size of the current window */
  483. j = wend + 1;
  484. /* add the 'bytes above' */
  485. if (!start)
  486. for (i = 0; i < j; i++) {
  487. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  488. goto err;
  489. }
  490. /* wvalue will be an odd number < 2^window */
  491. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
  492. goto err;
  493. /* move the 'window' down further */
  494. wstart -= wend + 1;
  495. wvalue = 0;
  496. start = 0;
  497. if (wstart < 0)
  498. break;
  499. }
  500. #if defined(SPARC_T4_MONT)
  501. if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
  502. j = mont->N.top; /* borrow j */
  503. val[0]->d[0] = 1; /* borrow val[0] */
  504. for (i = 1; i < j; i++)
  505. val[0]->d[i] = 0;
  506. val[0]->top = j;
  507. if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
  508. goto err;
  509. } else
  510. #endif
  511. if (!BN_from_montgomery(rr, r, mont, ctx))
  512. goto err;
  513. ret = 1;
  514. err:
  515. if ((in_mont == NULL) && (mont != NULL))
  516. BN_MONT_CTX_free(mont);
  517. BN_CTX_end(ctx);
  518. bn_check_top(rr);
  519. return (ret);
  520. }
  521. #if defined(SPARC_T4_MONT)
  522. static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
  523. {
  524. BN_ULONG ret = 0;
  525. int wordpos;
  526. wordpos = bitpos / BN_BITS2;
  527. bitpos %= BN_BITS2;
  528. if (wordpos >= 0 && wordpos < a->top) {
  529. ret = a->d[wordpos] & BN_MASK2;
  530. if (bitpos) {
  531. ret >>= bitpos;
  532. if (++wordpos < a->top)
  533. ret |= a->d[wordpos] << (BN_BITS2 - bitpos);
  534. }
  535. }
  536. return ret & BN_MASK2;
  537. }
  538. #endif
  539. /*
  540. * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  541. * layout so that accessing any of these table values shows the same access
  542. * pattern as far as cache lines are concerned. The following functions are
  543. * used to transfer a BIGNUM from/to that table.
  544. */
  545. static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
  546. unsigned char *buf, int idx,
  547. int width)
  548. {
  549. size_t i, j;
  550. if (top > b->top)
  551. top = b->top; /* this works because 'buf' is explicitly
  552. * zeroed */
  553. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  554. buf[j] = ((unsigned char *)b->d)[i];
  555. }
  556. return 1;
  557. }
  558. static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
  559. unsigned char *buf, int idx,
  560. int width)
  561. {
  562. size_t i, j;
  563. if (bn_wexpand(b, top) == NULL)
  564. return 0;
  565. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  566. ((unsigned char *)b->d)[i] = buf[j];
  567. }
  568. b->top = top;
  569. bn_correct_top(b);
  570. return 1;
  571. }
  572. /*
  573. * Given a pointer value, compute the next address that is a cache line
  574. * multiple.
  575. */
  576. #define MOD_EXP_CTIME_ALIGN(x_) \
  577. ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  578. /*
  579. * This variant of BN_mod_exp_mont() uses fixed windows and the special
  580. * precomputation memory layout to limit data-dependency to a minimum to
  581. * protect secret exponents (cf. the hyper-threading timing attacks pointed
  582. * out by Colin Percival,
  583. * http://www.daemong-consideredperthreading-considered-harmful/)
  584. */
  585. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  586. const BIGNUM *m, BN_CTX *ctx,
  587. BN_MONT_CTX *in_mont)
  588. {
  589. int i, bits, ret = 0, window, wvalue;
  590. int top;
  591. BN_MONT_CTX *mont = NULL;
  592. int numPowers;
  593. unsigned char *powerbufFree = NULL;
  594. int powerbufLen = 0;
  595. unsigned char *powerbuf = NULL;
  596. BIGNUM tmp, am;
  597. #if defined(SPARC_T4_MONT)
  598. unsigned int t4 = 0;
  599. #endif
  600. bn_check_top(a);
  601. bn_check_top(p);
  602. bn_check_top(m);
  603. if (!BN_is_odd(m)) {
  604. BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
  605. return (0);
  606. }
  607. top = m->top;
  608. bits = BN_num_bits(p);
  609. if (bits == 0) {
  610. ret = BN_one(rr);
  611. return ret;
  612. }
  613. BN_CTX_start(ctx);
  614. /*
  615. * Allocate a montgomery context if it was not supplied by the caller. If
  616. * this is not done, things will break in the montgomery part.
  617. */
  618. if (in_mont != NULL)
  619. mont = in_mont;
  620. else {
  621. if ((mont = BN_MONT_CTX_new()) == NULL)
  622. goto err;
  623. if (!BN_MONT_CTX_set(mont, m, ctx))
  624. goto err;
  625. }
  626. #ifdef RSAZ_ENABLED
  627. /*
  628. * If the size of the operands allow it, perform the optimized
  629. * RSAZ exponentiation. For further information see
  630. * crypto/bn/rsaz_exp.c and accompanying assembly modules.
  631. */
  632. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
  633. && rsaz_avx2_eligible()) {
  634. if (NULL == bn_wexpand(rr, 16))
  635. goto err;
  636. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
  637. mont->n0[0]);
  638. rr->top = 16;
  639. rr->neg = 0;
  640. bn_correct_top(rr);
  641. ret = 1;
  642. goto err;
  643. } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
  644. if (NULL == bn_wexpand(rr, 8))
  645. goto err;
  646. RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
  647. rr->top = 8;
  648. rr->neg = 0;
  649. bn_correct_top(rr);
  650. ret = 1;
  651. goto err;
  652. }
  653. #endif
  654. /* Get the window size to use with size of p. */
  655. window = BN_window_bits_for_ctime_exponent_size(bits);
  656. #if defined(SPARC_T4_MONT)
  657. if (window >= 5 && (top & 15) == 0 && top <= 64 &&
  658. (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
  659. (CFR_MONTMUL | CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0]))
  660. window = 5;
  661. else
  662. #endif
  663. #if defined(OPENSSL_BN_ASM_MONT5)
  664. if (window >= 5) {
  665. window = 5; /* ~5% improvement for RSA2048 sign, and even
  666. * for RSA4096 */
  667. if ((top & 7) == 0)
  668. powerbufLen += 2 * top * sizeof(m->d[0]);
  669. }
  670. #endif
  671. (void)0;
  672. /*
  673. * Allocate a buffer large enough to hold all of the pre-computed powers
  674. * of am, am itself and tmp.
  675. */
  676. numPowers = 1 << window;
  677. powerbufLen += sizeof(m->d[0]) * (top * numPowers +
  678. ((2 * top) >
  679. numPowers ? (2 * top) : numPowers));
  680. #ifdef alloca
  681. if (powerbufLen < 3072)
  682. powerbufFree =
  683. alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  684. else
  685. #endif
  686. if ((powerbufFree =
  687. (unsigned char *)OPENSSL_malloc(powerbufLen +
  688. MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
  689. == NULL)
  690. goto err;
  691. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  692. memset(powerbuf, 0, powerbufLen);
  693. #ifdef alloca
  694. if (powerbufLen < 3072)
  695. powerbufFree = NULL;
  696. #endif
  697. /* lay down tmp and am right after powers table */
  698. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  699. am.d = tmp.d + top;
  700. tmp.top = am.top = 0;
  701. tmp.dmax = am.dmax = top;
  702. tmp.neg = am.neg = 0;
  703. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  704. /* prepare a^0 in Montgomery domain */
  705. #if 1 /* by Shay Gueron's suggestion */
  706. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  707. /* 2^(top*BN_BITS2) - m */
  708. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  709. for (i = 1; i < top; i++)
  710. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  711. tmp.top = top;
  712. } else
  713. #endif
  714. if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
  715. goto err;
  716. /* prepare a^1 in Montgomery domain */
  717. if (a->neg || BN_ucmp(a, m) >= 0) {
  718. if (!BN_mod(&am, a, m, ctx))
  719. goto err;
  720. if (!BN_to_montgomery(&am, &am, mont, ctx))
  721. goto err;
  722. } else if (!BN_to_montgomery(&am, a, mont, ctx))
  723. goto err;
  724. #if defined(SPARC_T4_MONT)
  725. if (t4) {
  726. typedef int (*bn_pwr5_mont_f) (BN_ULONG *tp, const BN_ULONG *np,
  727. const BN_ULONG *n0, const void *table,
  728. int power, int bits);
  729. int bn_pwr5_mont_t4_8(BN_ULONG *tp, const BN_ULONG *np,
  730. const BN_ULONG *n0, const void *table,
  731. int power, int bits);
  732. int bn_pwr5_mont_t4_16(BN_ULONG *tp, const BN_ULONG *np,
  733. const BN_ULONG *n0, const void *table,
  734. int power, int bits);
  735. int bn_pwr5_mont_t4_24(BN_ULONG *tp, const BN_ULONG *np,
  736. const BN_ULONG *n0, const void *table,
  737. int power, int bits);
  738. int bn_pwr5_mont_t4_32(BN_ULONG *tp, const BN_ULONG *np,
  739. const BN_ULONG *n0, const void *table,
  740. int power, int bits);
  741. static const bn_pwr5_mont_f pwr5_funcs[4] = {
  742. bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16,
  743. bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32
  744. };
  745. bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1];
  746. typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
  747. const void *bp, const BN_ULONG *np,
  748. const BN_ULONG *n0);
  749. int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const void *bp,
  750. const BN_ULONG *np, const BN_ULONG *n0);
  751. int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
  752. const void *bp, const BN_ULONG *np,
  753. const BN_ULONG *n0);
  754. int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
  755. const void *bp, const BN_ULONG *np,
  756. const BN_ULONG *n0);
  757. int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
  758. const void *bp, const BN_ULONG *np,
  759. const BN_ULONG *n0);
  760. static const bn_mul_mont_f mul_funcs[4] = {
  761. bn_mul_mont_t4_8, bn_mul_mont_t4_16,
  762. bn_mul_mont_t4_24, bn_mul_mont_t4_32
  763. };
  764. bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1];
  765. void bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap,
  766. const void *bp, const BN_ULONG *np,
  767. const BN_ULONG *n0, int num);
  768. void bn_mul_mont_t4(BN_ULONG *rp, const BN_ULONG *ap,
  769. const void *bp, const BN_ULONG *np,
  770. const BN_ULONG *n0, int num);
  771. void bn_mul_mont_gather5_t4(BN_ULONG *rp, const BN_ULONG *ap,
  772. const void *table, const BN_ULONG *np,
  773. const BN_ULONG *n0, int num, int power);
  774. void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num,
  775. void *table, size_t power);
  776. void bn_gather5_t4(BN_ULONG *out, size_t num,
  777. void *table, size_t power);
  778. void bn_flip_t4(BN_ULONG *dst, BN_ULONG *src, size_t num);
  779. BN_ULONG *np = mont->N.d, *n0 = mont->n0;
  780. int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less
  781. * than 32 */
  782. /*
  783. * BN_to_montgomery can contaminate words above .top [in
  784. * BN_DEBUG[_DEBUG] build]...
  785. */
  786. for (i = am.top; i < top; i++)
  787. am.d[i] = 0;
  788. for (i = tmp.top; i < top; i++)
  789. tmp.d[i] = 0;
  790. bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0);
  791. bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1);
  792. if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) &&
  793. !(*mul_worker) (tmp.d, am.d, am.d, np, n0))
  794. bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top);
  795. bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2);
  796. for (i = 3; i < 32; i++) {
  797. /* Calculate a^i = a^(i-1) * a */
  798. if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) &&
  799. !(*mul_worker) (tmp.d, tmp.d, am.d, np, n0))
  800. bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top);
  801. bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i);
  802. }
  803. /* switch to 64-bit domain */
  804. np = alloca(top * sizeof(BN_ULONG));
  805. top /= 2;
  806. bn_flip_t4(np, mont->N.d, top);
  807. bits--;
  808. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  809. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  810. bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
  811. /*
  812. * Scan the exponent one window at a time starting from the most
  813. * significant bits.
  814. */
  815. while (bits >= 0) {
  816. if (bits < stride)
  817. stride = bits + 1;
  818. bits -= stride;
  819. wvalue = bn_get_bits(p, bits + 1);
  820. if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
  821. continue;
  822. /* retry once and fall back */
  823. if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
  824. continue;
  825. bits += stride - 5;
  826. wvalue >>= stride - 5;
  827. wvalue &= 31;
  828. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  829. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  830. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  831. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  832. bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
  833. bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top,
  834. wvalue);
  835. }
  836. bn_flip_t4(tmp.d, tmp.d, top);
  837. top *= 2;
  838. /* back to 32-bit domain */
  839. tmp.top = top;
  840. bn_correct_top(&tmp);
  841. OPENSSL_cleanse(np, top * sizeof(BN_ULONG));
  842. } else
  843. #endif
  844. #if defined(OPENSSL_BN_ASM_MONT5)
  845. if (window == 5 && top > 1) {
  846. /*
  847. * This optimization uses ideas from http://eprint.iacr.org/2011/239,
  848. * specifically optimization of cache-timing attack countermeasures
  849. * and pre-computation optimization.
  850. */
  851. /*
  852. * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  853. * 512-bit RSA is hardly relevant, we omit it to spare size...
  854. */
  855. void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
  856. const void *table, const BN_ULONG *np,
  857. const BN_ULONG *n0, int num, int power);
  858. void bn_scatter5(const BN_ULONG *inp, size_t num,
  859. void *table, size_t power);
  860. void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
  861. void bn_power5(BN_ULONG *rp, const BN_ULONG *ap,
  862. const void *table, const BN_ULONG *np,
  863. const BN_ULONG *n0, int num, int power);
  864. int bn_get_bits5(const BN_ULONG *ap, int off);
  865. int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
  866. const BN_ULONG *not_used, const BN_ULONG *np,
  867. const BN_ULONG *n0, int num);
  868. BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
  869. /*
  870. * BN_to_montgomery can contaminate words above .top [in
  871. * BN_DEBUG[_DEBUG] build]...
  872. */
  873. for (i = am.top; i < top; i++)
  874. am.d[i] = 0;
  875. for (i = tmp.top; i < top; i++)
  876. tmp.d[i] = 0;
  877. if (top & 7)
  878. np2 = np;
  879. else
  880. for (np2 = am.d + top, i = 0; i < top; i++)
  881. np2[2 * i] = np[i];
  882. bn_scatter5(tmp.d, top, powerbuf, 0);
  883. bn_scatter5(am.d, am.top, powerbuf, 1);
  884. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  885. bn_scatter5(tmp.d, top, powerbuf, 2);
  886. # if 0
  887. for (i = 3; i < 32; i++) {
  888. /* Calculate a^i = a^(i-1) * a */
  889. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  890. bn_scatter5(tmp.d, top, powerbuf, i);
  891. }
  892. # else
  893. /* same as above, but uses squaring for 1/2 of operations */
  894. for (i = 4; i < 32; i *= 2) {
  895. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  896. bn_scatter5(tmp.d, top, powerbuf, i);
  897. }
  898. for (i = 3; i < 8; i += 2) {
  899. int j;
  900. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  901. bn_scatter5(tmp.d, top, powerbuf, i);
  902. for (j = 2 * i; j < 32; j *= 2) {
  903. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  904. bn_scatter5(tmp.d, top, powerbuf, j);
  905. }
  906. }
  907. for (; i < 16; i += 2) {
  908. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  909. bn_scatter5(tmp.d, top, powerbuf, i);
  910. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  911. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  912. }
  913. for (; i < 32; i += 2) {
  914. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  915. bn_scatter5(tmp.d, top, powerbuf, i);
  916. }
  917. # endif
  918. bits--;
  919. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  920. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  921. bn_gather5(tmp.d, top, powerbuf, wvalue);
  922. /*
  923. * Scan the exponent one window at a time starting from the most
  924. * significant bits.
  925. */
  926. if (top & 7)
  927. while (bits >= 0) {
  928. for (wvalue = 0, i = 0; i < 5; i++, bits--)
  929. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  930. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  931. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  932. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  933. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  934. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  935. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
  936. wvalue);
  937. } else {
  938. while (bits >= 0) {
  939. wvalue = bn_get_bits5(p->d, bits - 4);
  940. bits -= 5;
  941. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  942. }
  943. }
  944. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
  945. tmp.top = top;
  946. bn_correct_top(&tmp);
  947. if (ret) {
  948. if (!BN_copy(rr, &tmp))
  949. ret = 0;
  950. goto err; /* non-zero ret means it's not error */
  951. }
  952. } else
  953. #endif
  954. {
  955. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
  956. goto err;
  957. if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
  958. goto err;
  959. /*
  960. * If the window size is greater than 1, then calculate
  961. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
  962. * powers could instead be computed as (a^(i/2))^2 to use the slight
  963. * performance advantage of sqr over mul).
  964. */
  965. if (window > 1) {
  966. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
  967. goto err;
  968. if (!MOD_EXP_CTIME_COPY_TO_PREBUF
  969. (&tmp, top, powerbuf, 2, numPowers))
  970. goto err;
  971. for (i = 3; i < numPowers; i++) {
  972. /* Calculate a^i = a^(i-1) * a */
  973. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
  974. goto err;
  975. if (!MOD_EXP_CTIME_COPY_TO_PREBUF
  976. (&tmp, top, powerbuf, i, numPowers))
  977. goto err;
  978. }
  979. }
  980. bits--;
  981. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
  982. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  983. if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
  984. (&tmp, top, powerbuf, wvalue, numPowers))
  985. goto err;
  986. /*
  987. * Scan the exponent one window at a time starting from the most
  988. * significant bits.
  989. */
  990. while (bits >= 0) {
  991. wvalue = 0; /* The 'value' of the window */
  992. /* Scan the window, squaring the result as we go */
  993. for (i = 0; i < window; i++, bits--) {
  994. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
  995. goto err;
  996. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  997. }
  998. /*
  999. * Fetch the appropriate pre-computed value from the pre-buf
  1000. */
  1001. if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
  1002. (&am, top, powerbuf, wvalue, numPowers))
  1003. goto err;
  1004. /* Multiply the result into the intermediate result */
  1005. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
  1006. goto err;
  1007. }
  1008. }
  1009. /* Convert the final result from montgomery to standard format */
  1010. #if defined(SPARC_T4_MONT)
  1011. if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
  1012. am.d[0] = 1; /* borrow am */
  1013. for (i = 1; i < top; i++)
  1014. am.d[i] = 0;
  1015. if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx))
  1016. goto err;
  1017. } else
  1018. #endif
  1019. if (!BN_from_montgomery(rr, &tmp, mont, ctx))
  1020. goto err;
  1021. ret = 1;
  1022. err:
  1023. if ((in_mont == NULL) && (mont != NULL))
  1024. BN_MONT_CTX_free(mont);
  1025. if (powerbuf != NULL) {
  1026. OPENSSL_cleanse(powerbuf, powerbufLen);
  1027. if (powerbufFree)
  1028. OPENSSL_free(powerbufFree);
  1029. }
  1030. BN_CTX_end(ctx);
  1031. return (ret);
  1032. }
  1033. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1034. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
  1035. {
  1036. BN_MONT_CTX *mont = NULL;
  1037. int b, bits, ret = 0;
  1038. int r_is_one;
  1039. BN_ULONG w, next_w;
  1040. BIGNUM *d, *r, *t;
  1041. BIGNUM *swap_tmp;
  1042. #define BN_MOD_MUL_WORD(r, w, m) \
  1043. (BN_mul_word(r, (w)) && \
  1044. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  1045. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  1046. /*
  1047. * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  1048. * probably more overhead than always using BN_mod (which uses BN_copy if
  1049. * a similar test returns true).
  1050. */
  1051. /*
  1052. * We can use BN_mod and do not need BN_nnmod because our accumulator is
  1053. * never negative (the result of BN_mod does not depend on the sign of
  1054. * the modulus).
  1055. */
  1056. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  1057. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  1058. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1059. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1060. BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1061. return -1;
  1062. }
  1063. bn_check_top(p);
  1064. bn_check_top(m);
  1065. if (!BN_is_odd(m)) {
  1066. BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
  1067. return (0);
  1068. }
  1069. if (m->top == 1)
  1070. a %= m->d[0]; /* make sure that 'a' is reduced */
  1071. bits = BN_num_bits(p);
  1072. if (bits == 0) {
  1073. /* x**0 mod 1 is still zero. */
  1074. if (BN_is_one(m)) {
  1075. ret = 1;
  1076. BN_zero(rr);
  1077. } else
  1078. ret = BN_one(rr);
  1079. return ret;
  1080. }
  1081. if (a == 0) {
  1082. BN_zero(rr);
  1083. ret = 1;
  1084. return ret;
  1085. }
  1086. BN_CTX_start(ctx);
  1087. d = BN_CTX_get(ctx);
  1088. r = BN_CTX_get(ctx);
  1089. t = BN_CTX_get(ctx);
  1090. if (d == NULL || r == NULL || t == NULL)
  1091. goto err;
  1092. if (in_mont != NULL)
  1093. mont = in_mont;
  1094. else {
  1095. if ((mont = BN_MONT_CTX_new()) == NULL)
  1096. goto err;
  1097. if (!BN_MONT_CTX_set(mont, m, ctx))
  1098. goto err;
  1099. }
  1100. r_is_one = 1; /* except for Montgomery factor */
  1101. /* bits-1 >= 0 */
  1102. /* The result is accumulated in the product r*w. */
  1103. w = a; /* bit 'bits-1' of 'p' is always set */
  1104. for (b = bits - 2; b >= 0; b--) {
  1105. /* First, square r*w. */
  1106. next_w = w * w;
  1107. if ((next_w / w) != w) { /* overflow */
  1108. if (r_is_one) {
  1109. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  1110. goto err;
  1111. r_is_one = 0;
  1112. } else {
  1113. if (!BN_MOD_MUL_WORD(r, w, m))
  1114. goto err;
  1115. }
  1116. next_w = 1;
  1117. }
  1118. w = next_w;
  1119. if (!r_is_one) {
  1120. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  1121. goto err;
  1122. }
  1123. /* Second, multiply r*w by 'a' if exponent bit is set. */
  1124. if (BN_is_bit_set(p, b)) {
  1125. next_w = w * a;
  1126. if ((next_w / a) != w) { /* overflow */
  1127. if (r_is_one) {
  1128. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  1129. goto err;
  1130. r_is_one = 0;
  1131. } else {
  1132. if (!BN_MOD_MUL_WORD(r, w, m))
  1133. goto err;
  1134. }
  1135. next_w = a;
  1136. }
  1137. w = next_w;
  1138. }
  1139. }
  1140. /* Finally, set r:=r*w. */
  1141. if (w != 1) {
  1142. if (r_is_one) {
  1143. if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
  1144. goto err;
  1145. r_is_one = 0;
  1146. } else {
  1147. if (!BN_MOD_MUL_WORD(r, w, m))
  1148. goto err;
  1149. }
  1150. }
  1151. if (r_is_one) { /* can happen only if a == 1 */
  1152. if (!BN_one(rr))
  1153. goto err;
  1154. } else {
  1155. if (!BN_from_montgomery(rr, r, mont, ctx))
  1156. goto err;
  1157. }
  1158. ret = 1;
  1159. err:
  1160. if ((in_mont == NULL) && (mont != NULL))
  1161. BN_MONT_CTX_free(mont);
  1162. BN_CTX_end(ctx);
  1163. bn_check_top(rr);
  1164. return (ret);
  1165. }
  1166. /* The old fallback, simple version :-) */
  1167. int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  1168. const BIGNUM *m, BN_CTX *ctx)
  1169. {
  1170. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  1171. int start = 1;
  1172. BIGNUM *d;
  1173. /* Table of variables obtained from 'ctx' */
  1174. BIGNUM *val[TABLE_SIZE];
  1175. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1176. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1177. BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1178. return -1;
  1179. }
  1180. bits = BN_num_bits(p);
  1181. if (bits == 0) {
  1182. ret = BN_one(r);
  1183. return ret;
  1184. }
  1185. BN_CTX_start(ctx);
  1186. d = BN_CTX_get(ctx);
  1187. val[0] = BN_CTX_get(ctx);
  1188. if (!d || !val[0])
  1189. goto err;
  1190. if (!BN_nnmod(val[0], a, m, ctx))
  1191. goto err; /* 1 */
  1192. if (BN_is_zero(val[0])) {
  1193. BN_zero(r);
  1194. ret = 1;
  1195. goto err;
  1196. }
  1197. window = BN_window_bits_for_exponent_size(bits);
  1198. if (window > 1) {
  1199. if (!BN_mod_mul(d, val[0], val[0], m, ctx))
  1200. goto err; /* 2 */
  1201. j = 1 << (window - 1);
  1202. for (i = 1; i < j; i++) {
  1203. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  1204. !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
  1205. goto err;
  1206. }
  1207. }
  1208. start = 1; /* This is used to avoid multiplication etc
  1209. * when there is only the value '1' in the
  1210. * buffer. */
  1211. wvalue = 0; /* The 'value' of the window */
  1212. wstart = bits - 1; /* The top bit of the window */
  1213. wend = 0; /* The bottom bit of the window */
  1214. if (!BN_one(r))
  1215. goto err;
  1216. for (;;) {
  1217. if (BN_is_bit_set(p, wstart) == 0) {
  1218. if (!start)
  1219. if (!BN_mod_mul(r, r, r, m, ctx))
  1220. goto err;
  1221. if (wstart == 0)
  1222. break;
  1223. wstart--;
  1224. continue;
  1225. }
  1226. /*
  1227. * We now have wstart on a 'set' bit, we now need to work out how bit
  1228. * a window to do. To do this we need to scan forward until the last
  1229. * set bit before the end of the window
  1230. */
  1231. j = wstart;
  1232. wvalue = 1;
  1233. wend = 0;
  1234. for (i = 1; i < window; i++) {
  1235. if (wstart - i < 0)
  1236. break;
  1237. if (BN_is_bit_set(p, wstart - i)) {
  1238. wvalue <<= (i - wend);
  1239. wvalue |= 1;
  1240. wend = i;
  1241. }
  1242. }
  1243. /* wend is the size of the current window */
  1244. j = wend + 1;
  1245. /* add the 'bytes above' */
  1246. if (!start)
  1247. for (i = 0; i < j; i++) {
  1248. if (!BN_mod_mul(r, r, r, m, ctx))
  1249. goto err;
  1250. }
  1251. /* wvalue will be an odd number < 2^window */
  1252. if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
  1253. goto err;
  1254. /* move the 'window' down further */
  1255. wstart -= wend + 1;
  1256. wvalue = 0;
  1257. start = 0;
  1258. if (wstart < 0)
  1259. break;
  1260. }
  1261. ret = 1;
  1262. err:
  1263. BN_CTX_end(ctx);
  1264. bn_check_top(r);
  1265. return (ret);
  1266. }