ntru.c 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581
  1. /*
  2. * Implementation of OpenSSH 9.x's hybrid key exchange protocol
  3. * [email protected] .
  4. *
  5. * This consists of the 'Streamlined NTRU Prime' quantum-resistant
  6. * cryptosystem, run in parallel with ordinary Curve25519 to generate
  7. * a shared secret combining the output of both systems.
  8. *
  9. * (Hence, even if you don't trust this newfangled NTRU Prime thing at
  10. * all, it's at least no _less_ secure than the kex you were using
  11. * already.)
  12. *
  13. * References for the NTRU Prime cryptosystem, up to and including
  14. * binary encodings of public and private keys and the exact preimages
  15. * of the hashes used in key exchange:
  16. *
  17. * https://ntruprime.cr.yp.to/
  18. * https://ntruprime.cr.yp.to/nist/ntruprime-20201007.pdf
  19. *
  20. * The SSH protocol layer is not documented anywhere I could find (as
  21. * of 2022-04-15, not even in OpenSSH's PROTOCOL.* files). I had to
  22. * read OpenSSH's source code to find out how it worked, and the
  23. * answer is as follows:
  24. *
  25. * This hybrid kex method is treated for SSH purposes as a form of
  26. * elliptic-curve Diffie-Hellman, and shares the same SSH message
  27. * sequence: client sends SSH2_MSG_KEX_ECDH_INIT containing its public
  28. * half, server responds with SSH2_MSG_KEX_ECDH_REPLY containing _its_
  29. * public half plus the host key and signature on the shared secret.
  30. *
  31. * (This is a bit of a fudge, because unlike actual ECDH, this kex
  32. * method is asymmetric: one side sends a public key, and the other
  33. * side encrypts something with it and sends the ciphertext back. So
  34. * while the normal ECDH implementations can compute the two sides
  35. * independently in parallel, this system reusing the same messages
  36. * has to be serial. But the order of the messages _is_ firmly
  37. * specified in SSH ECDH, so it works anyway.)
  38. *
  39. * For this kex method, SSH2_MSG_KEX_ECDH_INIT still contains a single
  40. * SSH 'string', which consists of the concatenation of a Streamlined
  41. * NTRU Prime public key with the Curve25519 public value. (Both of
  42. * these have fixed length in bytes, so there's no ambiguity in the
  43. * concatenation.)
  44. *
  45. * SSH2_MSG_KEX_ECDH_REPLY is mostly the same as usual. The only
  46. * string in the packet that varies is the second one, which would
  47. * normally contain the server's public elliptic curve point. Instead,
  48. * it now contains the concatenation of
  49. *
  50. * - a Streamlined NTRU Prime ciphertext
  51. * - the 'confirmation hash' specified in ntruprime-20201007.pdf,
  52. * hashing the plaintext of that ciphertext together with the
  53. * public key
  54. * - the Curve25519 public point as usual.
  55. *
  56. * Again, all three of those elements have fixed lengths.
  57. *
  58. * The client decrypts the ciphertext, checks the confirmation hash,
  59. * and if successful, generates the 'session hash' specified in
  60. * ntruprime-20201007.pdf, which is 32 bytes long and is the ultimate
  61. * output of the Streamlined NTRU Prime key exchange.
  62. *
  63. * The output of the hybrid kex method as a whole is an SSH 'string'
  64. * of length 64 containing the SHA-512 hash of the concatenatio of
  65. *
  66. * - the Streamlined NTRU Prime session hash (32 bytes)
  67. * - the Curve25519 shared secret (32 bytes).
  68. *
  69. * That string is included directly into the SSH exchange hash and key
  70. * derivation hashes, in place of the mpint that comes out of most
  71. * other kex methods.
  72. */
  73. #include <stdio.h>
  74. #include <stdlib.h>
  75. #include <assert.h>
  76. #include "putty.h"
  77. #include "ssh.h"
  78. #include "mpint.h"
  79. #include "ntru.h"
  80. #include "smallmoduli.h"
  81. /* Invert x mod q, assuming it's nonzero. (For time-safety, no check
  82. * is made for zero; it just returns 0.)
  83. *
  84. * Expects qrecip == reciprocal_for_reduction(q). (But it's passed in
  85. * as a parameter to save recomputing it, on the theory that the
  86. * caller will have had it lying around already in most cases.) */
  87. static uint16_t invert(uint16_t x, uint16_t q, uint64_t qrecip)
  88. {
  89. /* Fermat inversion: compute x^(q-2), since x^(q-1) == 1. */
  90. uint32_t sq = x, bit = 1, acc = 1, exp = q-2;
  91. while (1) {
  92. if (exp & bit) {
  93. acc = reduce(acc * sq, q, qrecip);
  94. exp &= ~bit;
  95. if (!exp)
  96. return acc;
  97. }
  98. sq = reduce(sq * sq, q, qrecip);
  99. bit <<= 1;
  100. }
  101. }
  102. /* Check whether x == 0, time-safely, and return 1 if it is or 0 otherwise. */
  103. static unsigned iszero(uint16_t x)
  104. {
  105. return 1 & ~((x + 0xFFFF) >> 16);
  106. }
  107. /*
  108. * Handy macros to cut down on all those extra function parameters. In
  109. * the common case where a function is working mod the same modulus
  110. * throughout (and has called it q), you can just write 'SETUP;' at
  111. * the top and then call REDUCE(...) and INVERT(...) without having to
  112. * write out q and qrecip every time.
  113. */
  114. #define SETUP uint64_t qrecip = reciprocal_for_reduction(q)
  115. #define REDUCE(x) reduce(x, q, qrecip)
  116. #define INVERT(x) invert(x, q, qrecip)
  117. /* ----------------------------------------------------------------------
  118. * Quotient-ring functions.
  119. *
  120. * NTRU Prime works with two similar but different quotient rings:
  121. *
  122. * Z_q[x] / <x^p-x-1> where p,q are the prime parameters of the system
  123. * Z_3[x] / <x^p-x-1> with the same p, but coefficients mod 3.
  124. *
  125. * The former is a field (every nonzero element is invertible),
  126. * because the system parameters are chosen such that x^p-x-1 is
  127. * invertible over Z_q. The latter is not a field (or not necessarily,
  128. * and in particular, not for the value of p we use here).
  129. *
  130. * In these core functions, you pass in the modulus you want as the
  131. * parameter q, which is either the 'real' q specified in the system
  132. * parameters, or 3 if you're doing one of the mod-3 parts of the
  133. * algorithm.
  134. */
  135. /*
  136. * Multiply two elements of a quotient ring.
  137. *
  138. * 'a' and 'b' are arrays of exactly p coefficients, with constant
  139. * term first. 'out' is an array the same size to write the inverse
  140. * into.
  141. */
  142. void ntru_ring_multiply(uint16_t *out, const uint16_t *a, const uint16_t *b,
  143. unsigned p, unsigned q)
  144. {
  145. SETUP;
  146. /*
  147. * Strategy: just compute the full product with 2p coefficients,
  148. * and then reduce it mod x^p-x-1 by working downwards from the
  149. * top coefficient replacing x^{p+k} with (x+1)x^k for k = ...,1,0.
  150. *
  151. * Possibly some speed could be gained here by doing the recursive
  152. * Karatsuba optimisation for the initial multiplication? But I
  153. * haven't tried it.
  154. */
  155. uint32_t *unreduced = snewn(2*p, uint32_t);
  156. for (unsigned i = 0; i < 2*p; i++)
  157. unreduced[i] = 0;
  158. for (unsigned i = 0; i < p; i++)
  159. for (unsigned j = 0; j < p; j++)
  160. unreduced[i+j] = REDUCE(unreduced[i+j] + a[i] * b[j]);
  161. for (unsigned i = 2*p - 1; i >= p; i--) {
  162. unreduced[i-p] += unreduced[i];
  163. unreduced[i-p+1] += unreduced[i];
  164. unreduced[i] = 0;
  165. }
  166. for (unsigned i = 0; i < p; i++)
  167. out[i] = REDUCE(unreduced[i]);
  168. smemclr(unreduced, 2*p * sizeof(*unreduced));
  169. sfree(unreduced);
  170. }
  171. /*
  172. * Invert an element of the quotient ring.
  173. *
  174. * 'in' is an array of exactly p coefficients, with constant term
  175. * first. 'out' is an array the same size to write the inverse into.
  176. *
  177. * Method: essentially Stein's gcd algorithm, taking the gcd of the
  178. * input (regarded as an element of Z_q[x] proper) and x^p-x-1. Given
  179. * two polynomials over a field which are not both divisible by x, you
  180. * can find their gcd by iterating the following procedure:
  181. *
  182. * - if one is divisible by x, divide off x
  183. * - otherwise, subtract from the higher-degree one whatever scalar
  184. * multiple of the lower-degree one will make it divisible by x,
  185. * and _then_ divide off x
  186. *
  187. * Neither of these types of step changes the gcd of the two
  188. * polynomials.
  189. *
  190. * Each step reduces the sum of the two polynomials' degree by at
  191. * least one, as long as at least one of the degrees is positive.
  192. * (Maybe more than one if all the stars align in the second case, if
  193. * the subtraction cancels the leading term as well as the constant
  194. * term.) So in at most deg A + deg B steps, we must have reached the
  195. * situation where both polys are constants; in one more step after
  196. * that, one of them will be zero; and in one step after _that_, the
  197. * zero one will reliably be the one we're dividing by x. Or rather,
  198. * that's what happens in the case where A,B are coprime; if not, then
  199. * one hits zero while the other is still nonzero.
  200. *
  201. * In a normal gcd algorithm, you'd track a linear combination of the
  202. * two original polynomials that yields each working value, and end up
  203. * with a linear combination of the inputs that yields the gcd. In
  204. * this algorithm, the 'divide off x' step makes that awkward - but we
  205. * can solve that by instead multiplying by the inverse of x in the
  206. * ring that we want our answer to be valid in! And since the modulus
  207. * polynomial of the ring is x^p-x-1, the inverse of x is easy to
  208. * calculate, because it's always just x^{p-1} - 1, which is also very
  209. * easy to multiply by.
  210. */
  211. unsigned ntru_ring_invert(uint16_t *out, const uint16_t *in,
  212. unsigned p, unsigned q)
  213. {
  214. SETUP;
  215. /* Size of the polynomial arrays we'll work with */
  216. const size_t SIZE = p+1;
  217. /* Number of steps of the algorithm is the max possible value of
  218. * deg A + deg B + 2, where deg A <= p-1 and deg B = p */
  219. const size_t STEPS = 2*p + 1;
  220. /* Our two working polynomials */
  221. uint16_t *A = snewn(SIZE, uint16_t);
  222. uint16_t *B = snewn(SIZE, uint16_t);
  223. /* Coefficient of the input value in each one */
  224. uint16_t *Ac = snewn(SIZE, uint16_t);
  225. uint16_t *Bc = snewn(SIZE, uint16_t);
  226. /* Initialise A to the input, and Ac correspondingly to 1 */
  227. memcpy(A, in, p*sizeof(uint16_t));
  228. A[p] = 0;
  229. Ac[0] = 1;
  230. for (size_t i = 1; i < SIZE; i++)
  231. Ac[i] = 0;
  232. /* Initialise B to the quotient polynomial of the ring, x^p-x-1
  233. * And Bc = 0 */
  234. B[0] = B[1] = q-1;
  235. for (size_t i = 2; i < p; i++)
  236. B[i] = 0;
  237. B[p] = 1;
  238. for (size_t i = 0; i < SIZE; i++)
  239. Bc[i] = 0;
  240. /* Run the gcd-finding algorithm. */
  241. for (size_t i = 0; i < STEPS; i++) {
  242. /*
  243. * First swap round so that A is the one we'll be dividing by x.
  244. *
  245. * In the case where one of the two polys has a zero constant
  246. * term, it's that one. In the other case, it's the one of
  247. * smaller degree. We must compute both, and choose between
  248. * them in a side-channel-safe way.
  249. */
  250. unsigned x_divides_A = iszero(A[0]);
  251. unsigned x_divides_B = iszero(B[0]);
  252. unsigned B_is_bigger = 0;
  253. {
  254. unsigned not_seen_top_term_of_A = 1, not_seen_top_term_of_B = 1;
  255. for (size_t j = SIZE; j-- > 0 ;) {
  256. not_seen_top_term_of_A &= iszero(A[j]);
  257. not_seen_top_term_of_B &= iszero(B[j]);
  258. B_is_bigger |= (~not_seen_top_term_of_B &
  259. not_seen_top_term_of_A);
  260. }
  261. }
  262. unsigned need_swap = x_divides_B | (~x_divides_A & B_is_bigger);
  263. uint16_t swap_mask = -need_swap;
  264. for (size_t j = 0; j < SIZE; j++) {
  265. uint16_t diff = (A[j] ^ B[j]) & swap_mask;
  266. A[j] ^= diff;
  267. B[j] ^= diff;
  268. }
  269. for (size_t j = 0; j < SIZE; j++) {
  270. uint16_t diff = (Ac[j] ^ Bc[j]) & swap_mask;
  271. Ac[j] ^= diff;
  272. Bc[j] ^= diff;
  273. }
  274. /*
  275. * Replace A with a linear combination of both A and B that
  276. * has constant term zero, which we do by calculating
  277. *
  278. * (constant term of B) * A - (constant term of A) * B
  279. *
  280. * In one of the two cases, A's constant term is already zero,
  281. * so the coefficient of B will be zero too; hence, this will
  282. * do nothing useful (it will merely scale A by some scalar
  283. * value), but it will take the same length of time as doing
  284. * something, which is just what we want.
  285. */
  286. uint16_t Amult = B[0], Bmult = q - A[0];
  287. for (size_t j = 0; j < SIZE; j++)
  288. A[j] = REDUCE(Amult * A[j] + Bmult * B[j]);
  289. /* And do the same transformation to Ac */
  290. for (size_t j = 0; j < SIZE; j++)
  291. Ac[j] = REDUCE(Amult * Ac[j] + Bmult * Bc[j]);
  292. /*
  293. * Now divide A by x, and compensate by multiplying Ac by
  294. * x^{p-1}-1 mod x^p-x-1.
  295. *
  296. * That multiplication is particularly easy, precisely because
  297. * x^{p-1}-1 is the multiplicative inverse of x! Each x^n term
  298. * for n>0 just moves down to the x^{n-1} term, and only the
  299. * constant term has to be dealt with in an interesting way.
  300. */
  301. for (size_t j = 1; j < SIZE; j++)
  302. A[j-1] = A[j];
  303. A[SIZE-1] = 0;
  304. uint16_t Ac0 = Ac[0];
  305. for (size_t j = 1; j < p; j++)
  306. Ac[j-1] = Ac[j];
  307. Ac[p-1] = Ac0;
  308. Ac[0] = REDUCE(Ac[0] + q - Ac0);
  309. }
  310. /*
  311. * Now we expect that A is 0, and B is a constant. If so, then
  312. * they are coprime, and we're going to return success. If not,
  313. * they have a common factor.
  314. */
  315. unsigned success = iszero(A[0]) & (1 ^ iszero(B[0]));
  316. for (size_t j = 1; j < SIZE; j++)
  317. success &= iszero(A[j]) & iszero(B[j]);
  318. /*
  319. * So we're going to return Bc, but first, scale it by the
  320. * multiplicative inverse of the constant we ended up with in
  321. * B[0].
  322. */
  323. uint16_t scale = INVERT(B[0]);
  324. for (size_t i = 0; i < p; i++)
  325. out[i] = REDUCE(scale * Bc[i]);
  326. smemclr(A, SIZE * sizeof(*A));
  327. sfree(A);
  328. smemclr(B, SIZE * sizeof(*B));
  329. sfree(B);
  330. smemclr(Ac, SIZE * sizeof(*Ac));
  331. sfree(Ac);
  332. smemclr(Bc, SIZE * sizeof(*Bc));
  333. sfree(Bc);
  334. return success;
  335. }
  336. /*
  337. * Given an array of values mod q, convert each one to its
  338. * minimum-absolute-value representative, and then reduce mod 3.
  339. *
  340. * Output values are 0, 1 and 0xFFFF, representing -1.
  341. *
  342. * (Normally our arrays of uint16_t are in 'minimal non-negative
  343. * residue' form, so the output of this function is unusual. But it's
  344. * useful to have it in this form so that it can be reused by
  345. * ntru_round3. You can put it back to the usual representation using
  346. * ntru_normalise, below.)
  347. */
  348. void ntru_mod3(uint16_t *out, const uint16_t *in, unsigned p, unsigned q)
  349. {
  350. uint64_t qrecip = reciprocal_for_reduction(q);
  351. uint64_t recip3 = reciprocal_for_reduction(3);
  352. unsigned bias = q/2;
  353. uint16_t adjust = 3 - reduce(bias-1, 3, recip3);
  354. for (unsigned i = 0; i < p; i++) {
  355. uint16_t val = reduce(in[i] + bias, q, qrecip);
  356. uint16_t residue = reduce(val + adjust, 3, recip3);
  357. out[i] = residue - 1;
  358. }
  359. }
  360. /*
  361. * Given an array of values mod q, round each one to the nearest
  362. * multiple of 3 to its minimum-absolute-value representative.
  363. *
  364. * Output values are signed integers coerced to uint16_t, so again,
  365. * use ntru_normalise afterwards to put them back to normal.
  366. */
  367. void ntru_round3(uint16_t *out, const uint16_t *in, unsigned p, unsigned q)
  368. {
  369. SETUP;
  370. unsigned bias = q/2;
  371. ntru_mod3(out, in, p, q);
  372. for (unsigned i = 0; i < p; i++)
  373. out[i] = REDUCE(in[i] + bias) - bias - out[i];
  374. }
  375. /*
  376. * Given an array of signed integers coerced to uint16_t in the range
  377. * [-q/2,+q/2], normalise them back to mod q values.
  378. */
  379. static void ntru_normalise(uint16_t *out, const uint16_t *in,
  380. unsigned p, unsigned q)
  381. {
  382. for (unsigned i = 0; i < p; i++)
  383. out[i] = in[i] + q * (in[i] >> 15);
  384. }
  385. /*
  386. * Given an array of values mod q, add a constant to each one.
  387. */
  388. void ntru_bias(uint16_t *out, const uint16_t *in, unsigned bias,
  389. unsigned p, unsigned q)
  390. {
  391. SETUP;
  392. for (unsigned i = 0; i < p; i++)
  393. out[i] = REDUCE(in[i] + bias);
  394. }
  395. /*
  396. * Given an array of values mod q, multiply each one by a constant.
  397. */
  398. void ntru_scale(uint16_t *out, const uint16_t *in, uint16_t scale,
  399. unsigned p, unsigned q)
  400. {
  401. SETUP;
  402. for (unsigned i = 0; i < p; i++)
  403. out[i] = REDUCE(in[i] * scale);
  404. }
  405. /*
  406. * Given an array of values mod 3, convert them to values mod q in a
  407. * way that maps -1,0,+1 to -1,0,+1.
  408. */
  409. static void ntru_expand(
  410. uint16_t *out, const uint16_t *in, unsigned p, unsigned q)
  411. {
  412. for (size_t i = 0; i < p; i++) {
  413. uint16_t v = in[i];
  414. /* Map 2 to q-1, and leave 0 and 1 unchanged */
  415. v += (v >> 1) * (q-3);
  416. out[i] = v;
  417. }
  418. }
  419. /* ----------------------------------------------------------------------
  420. * Implement the binary encoding from ntruprime-20201007.pdf, which is
  421. * used to encode public keys and ciphertexts (though not plaintexts,
  422. * which are done in a much simpler way).
  423. *
  424. * The general idea is that your encoder takes as input a list of
  425. * small non-negative integers (r_i), and a sequence of limits (m_i)
  426. * such that 0 <= r_i < m_i, and emits a sequence of bytes that encode
  427. * all of these as tightly as reasonably possible.
  428. *
  429. * That's more general than is really needed, because in both the
  430. * actual uses of this encoding, the input m_i are all the same! But
  431. * the array of (r_i,m_i) pairs evolves during encoding, so they don't
  432. * _stay_ all the same, so you still have to have all the generality.
  433. *
  434. * The encoding process makes a number of passes along the list of
  435. * inputs. In each step, pairs of adjacent numbers are combined into
  436. * one larger one by turning (r_i,m_i) and (r_{i+1},m_{i+1}) into the
  437. * pair (r_i + m_i r_{i+1}, m_i m_{i+1}), i.e. so that the original
  438. * numbers could be recovered by taking the quotient and remaiinder of
  439. * the new r value by m_i. Then, if the new m_i is at least 2^14, we
  440. * emit the low 8 bits of r_i to the output stream and reduce r_i and
  441. * its limit correspondingly. So at the end of the pass, we've got
  442. * half as many numbers still to encode, they're all still not too
  443. * big, and we've emitted some amount of data into the output. Then do
  444. * another pass, keep going until there's only one number left, and
  445. * emit it little-endian.
  446. *
  447. * That's all very well, but how do you decode it again? DJB exhibits
  448. * a pair of recursive functions that are supposed to be mutually
  449. * inverse, but I didn't have any confidence that I'd be able to debug
  450. * them sensibly if they turned out not to be (or rather, if I
  451. * implemented one of them wrong). So I came up with my own strategy
  452. * instead.
  453. *
  454. * In my strategy, we start by processing just the (m_i) into an
  455. * 'encoding schedule' consisting of a sequence of simple
  456. * instructions. The instructions operate on a FIFO queue of numbers,
  457. * initialised to the original (r_i). The three instruction types are:
  458. *
  459. * - 'COMBINE': consume two numbers a,b from the head of the queue,
  460. * combine them by calculating a + m*b for some specified m, and
  461. * push the result on the tail of the queue.
  462. *
  463. * - 'BYTE': divide the tail element of the queue by 2^8 and emit the
  464. * low bits into the output stream.
  465. *
  466. * - 'COPY': pop a number from the head of the queue and push it
  467. * straight back on the tail. (Used for handling the leftover
  468. * element at the end of a pass if the input to the pass was a list
  469. * of odd length.)
  470. *
  471. * So we effectively implement DJB's encoding process in simulation,
  472. * and instead of actually processing a set of (r_i), we 'compile' the
  473. * process into a sequence of instructions that can be handed just the
  474. * (r_i) later and encode them in the right way. At the end of the
  475. * instructions, the queue is expected to have been reduced to length
  476. * 1 and contain the single integer 0.
  477. *
  478. * The nice thing about this system is that each of those three
  479. * instructions is easy to reverse. So you can also use the same
  480. * instructions for decoding: start with a queue containing 0, and
  481. * process the instructions in reverse order and reverse sense. So
  482. * BYTE means to _consume_ a byte from the encoded data (starting from
  483. * the rightmost end) and use it to make a queue element bigger; and
  484. * COMBINE run in reverse pops a single element from one end of the
  485. * queue, divides it by m, and pushes the quotient and remainder on
  486. * the other end.
  487. *
  488. * (So it's easy to debug, because the queue passes through the exact
  489. * same sequence of states during decoding that it did during
  490. * encoding, just in reverse order.)
  491. *
  492. * Also, the encoding schedule comes with information about the
  493. * expected size of the encoded data, because you can find that out
  494. * easily by just counting the BYTE commands.
  495. */
  496. enum {
  497. /*
  498. * Command values appearing in the 'ops' array. ENC_COPY and
  499. * ENC_BYTE are single values; values of the form
  500. * (ENC_COMBINE_BASE + m) represent a COMBINE command with
  501. * parameter m.
  502. */
  503. ENC_COPY, ENC_BYTE, ENC_COMBINE_BASE
  504. };
  505. struct NTRUEncodeSchedule {
  506. /*
  507. * Object representing a compiled set of encoding instructions.
  508. *
  509. * 'nvals' is the number of r_i we expect to encode. 'nops' is the
  510. * number of encoding commands in the 'ops' list; 'opsize' is the
  511. * physical size of the array, used during construction.
  512. *
  513. * 'endpos' is used to avoid a last-minute faff during decoding.
  514. * We implement our FIFO of integers as a ring buffer of size
  515. * 'nvals'. Encoding cycles round it some number of times, and the
  516. * final 0 element ends up at some random location in the array.
  517. * If we know _where_ the 0 ends up during encoding, we can put
  518. * the initial 0 there at the start of decoding, and then when we
  519. * finish reversing all the instructions, we'll end up with the
  520. * output numbers already arranged at their correct positions, so
  521. * that there's no need to rotate the array at the last minute.
  522. */
  523. size_t nvals, endpos, nops, opsize;
  524. uint32_t *ops;
  525. };
  526. static inline void sched_append(NTRUEncodeSchedule *sched, uint16_t op)
  527. {
  528. /* Helper function to append an operation to the schedule, and
  529. * update endpos. */
  530. sgrowarray(sched->ops, sched->opsize, sched->nops);
  531. sched->ops[sched->nops++] = op;
  532. if (op != ENC_BYTE)
  533. sched->endpos = (sched->endpos + 1) % sched->nvals;
  534. }
  535. /*
  536. * Take in the list of limit values (m_i) and compute the encoding
  537. * schedule.
  538. */
  539. NTRUEncodeSchedule *ntru_encode_schedule(const uint16_t *ms_in, size_t n)
  540. {
  541. NTRUEncodeSchedule *sched = snew(NTRUEncodeSchedule);
  542. sched->nvals = n;
  543. sched->endpos = n-1;
  544. sched->nops = sched->opsize = 0;
  545. sched->ops = NULL;
  546. assert(n != 0);
  547. /*
  548. * 'ms' is the list of (m_i) on input to the current pass.
  549. * 'ms_new' is the list output from the current pass. After each
  550. * pass we swap the arrays round.
  551. */
  552. uint32_t *ms = snewn(n, uint32_t);
  553. uint32_t *msnew = snewn(n, uint32_t);
  554. for (size_t i = 0; i < n; i++)
  555. ms[i] = ms_in[i];
  556. while (n > 1) {
  557. size_t nnew = 0;
  558. for (size_t i = 0; i < n; i += 2) {
  559. if (i+1 == n) {
  560. /*
  561. * Odd element at the end of the input list: just copy
  562. * it unchanged to the output.
  563. */
  564. sched_append(sched, ENC_COPY);
  565. msnew[nnew++] = ms[i];
  566. break;
  567. }
  568. /*
  569. * Normal case: consume two elements from the input list
  570. * and combine them.
  571. */
  572. uint32_t m1 = ms[i], m2 = ms[i+1], m = m1*m2;
  573. sched_append(sched, ENC_COMBINE_BASE + m1);
  574. /*
  575. * And then, as long as the combined limit is big enough,
  576. * emit an output byte from the bottom of it.
  577. */
  578. while (m >= (1<<14)) {
  579. sched_append(sched, ENC_BYTE);
  580. m = (m + 0xFF) >> 8;
  581. }
  582. /*
  583. * Whatever is left after that, we emit into the output
  584. * list and append to the fifo.
  585. */
  586. msnew[nnew++] = m;
  587. }
  588. /*
  589. * End of pass. The output list of (m_i) now becomes the input
  590. * list.
  591. */
  592. uint32_t *tmp = ms;
  593. ms = msnew;
  594. n = nnew;
  595. msnew = tmp;
  596. }
  597. /*
  598. * When that loop terminates, it's because there's exactly one
  599. * number left to encode. (Or, technically, _at most_ one - but we
  600. * don't support encoding a completely empty list in this
  601. * implementation, because what would be the point?) That number
  602. * is just emitted little-endian until its limit is 1 (meaning its
  603. * only possible actual value is 0).
  604. */
  605. assert(n == 1);
  606. uint32_t m = ms[0];
  607. while (m > 1) {
  608. sched_append(sched, ENC_BYTE);
  609. m = (m + 0xFF) >> 8;
  610. }
  611. sfree(ms);
  612. sfree(msnew);
  613. return sched;
  614. }
  615. void ntru_encode_schedule_free(NTRUEncodeSchedule *sched)
  616. {
  617. sfree(sched->ops);
  618. sfree(sched);
  619. }
  620. /*
  621. * Calculate the output length of the encoded data in bytes.
  622. */
  623. size_t ntru_encode_schedule_length(NTRUEncodeSchedule *sched)
  624. {
  625. size_t len = 0;
  626. for (size_t i = 0; i < sched->nops; i++)
  627. if (sched->ops[i] == ENC_BYTE)
  628. len++;
  629. return len;
  630. }
  631. /*
  632. * Retrieve the number of items encoded. (Used by testcrypt.)
  633. */
  634. size_t ntru_encode_schedule_nvals(NTRUEncodeSchedule *sched)
  635. {
  636. return sched->nvals;
  637. }
  638. /*
  639. * Actually encode a sequence of (r_i), emitting the output bytes to
  640. * an arbitrary BinarySink.
  641. */
  642. void ntru_encode(NTRUEncodeSchedule *sched, const uint16_t *rs_in,
  643. BinarySink *bs)
  644. {
  645. size_t n = sched->nvals;
  646. uint32_t *rs = snewn(n, uint32_t);
  647. for (size_t i = 0; i < n; i++)
  648. rs[i] = rs_in[i];
  649. /*
  650. * The head and tail pointers of the queue are both 'full'. That
  651. * is, rs[head] is the first element actually in the queue, and
  652. * rs[tail] is the last element.
  653. *
  654. * So you append to the queue by first advancing 'tail' and then
  655. * writing to rs[tail], whereas you consume from the queue by
  656. * first reading rs[head] and _then_ advancing 'head'.
  657. *
  658. * The more normal thing would be to make 'tail' point to the
  659. * first empty slot instead of the last full one. But then you'd
  660. * have to faff about with modular arithmetic to find the last
  661. * full slot for the BYTE command, so in this case, it's easier to
  662. * do it the less usual way.
  663. */
  664. size_t head = 0, tail = n-1;
  665. for (size_t i = 0; i < sched->nops; i++) {
  666. uint16_t op = sched->ops[i];
  667. switch (op) {
  668. case ENC_BYTE:
  669. put_byte(bs, rs[tail] & 0xFF);
  670. rs[tail] >>= 8;
  671. break;
  672. case ENC_COPY: {
  673. uint32_t r = rs[head];
  674. head = (head + 1) % n;
  675. tail = (tail + 1) % n;
  676. rs[tail] = r;
  677. break;
  678. }
  679. default: {
  680. uint32_t r1 = rs[head];
  681. head = (head + 1) % n;
  682. uint32_t r2 = rs[head];
  683. head = (head + 1) % n;
  684. tail = (tail + 1) % n;
  685. rs[tail] = r1 + (op - ENC_COMBINE_BASE) * r2;
  686. break;
  687. }
  688. }
  689. }
  690. /*
  691. * Expect that we've ended up with a single zero in the queue, at
  692. * exactly the position that the setup-time analysis predicted it.
  693. */
  694. assert(head == sched->endpos);
  695. assert(tail == sched->endpos);
  696. assert(rs[head] == 0);
  697. smemclr(rs, n * sizeof(*rs));
  698. sfree(rs);
  699. }
  700. /*
  701. * Decode a ptrlen of binary data into a sequence of (r_i). The data
  702. * is expected to be of exactly the right length (on pain of assertion
  703. * failure).
  704. */
  705. void ntru_decode(NTRUEncodeSchedule *sched, uint16_t *rs_out, ptrlen data)
  706. {
  707. size_t n = sched->nvals;
  708. const uint8_t *base = (const uint8_t *)data.ptr;
  709. const uint8_t *pos = base + data.len;
  710. /*
  711. * Initialise the queue to a single zero, at the 'endpos' position
  712. * that will mean the final output is correctly aligned.
  713. *
  714. * 'head' and 'tail' have the same meanings as in encoding. So
  715. * 'tail' is the location that BYTE modifies and COPY and COMBINE
  716. * consume from, and 'head' is the location that COPY and COMBINE
  717. * push on to. As in encoding, they both point at the extremal
  718. * full slots in the array.
  719. */
  720. uint32_t *rs = snewn(n, uint32_t);
  721. size_t head = sched->endpos, tail = head;
  722. rs[tail] = 0;
  723. for (size_t i = sched->nops; i-- > 0 ;) {
  724. uint16_t op = sched->ops[i];
  725. switch (op) {
  726. case ENC_BYTE: {
  727. assert(pos > base);
  728. uint8_t byte = *--pos;
  729. rs[tail] = (rs[tail] << 8) | byte;
  730. break;
  731. }
  732. case ENC_COPY: {
  733. uint32_t r = rs[tail];
  734. tail = (tail + n - 1) % n;
  735. head = (head + n - 1) % n;
  736. rs[head] = r;
  737. break;
  738. }
  739. default: {
  740. uint32_t r = rs[tail];
  741. tail = (tail + n - 1) % n;
  742. uint32_t m = op - ENC_COMBINE_BASE;
  743. uint64_t mrecip = reciprocal_for_reduction(m);
  744. uint32_t r1, r2;
  745. r1 = reduce_with_quot(r, &r2, m, mrecip);
  746. head = (head + n - 1) % n;
  747. rs[head] = r2;
  748. head = (head + n - 1) % n;
  749. rs[head] = r1;
  750. break;
  751. }
  752. }
  753. }
  754. assert(pos == base);
  755. assert(head == 0);
  756. assert(tail == n-1);
  757. for (size_t i = 0; i < n; i++)
  758. rs_out[i] = rs[i];
  759. smemclr(rs, n * sizeof(*rs));
  760. sfree(rs);
  761. }
  762. /* ----------------------------------------------------------------------
  763. * The actual public-key cryptosystem.
  764. */
  765. struct NTRUKeyPair {
  766. unsigned p, q, w;
  767. uint16_t *h; /* public key */
  768. uint16_t *f3, *ginv; /* private key */
  769. uint16_t *rho; /* for implicit rejection */
  770. };
  771. /* Helper function to free an array of uint16_t containing a ring
  772. * element, clearing it on the way since some of them are sensitive. */
  773. static void ring_free(uint16_t *val, unsigned p)
  774. {
  775. smemclr(val, p*sizeof(*val));
  776. sfree(val);
  777. }
  778. void ntru_keypair_free(NTRUKeyPair *keypair)
  779. {
  780. ring_free(keypair->h, keypair->p);
  781. ring_free(keypair->f3, keypair->p);
  782. ring_free(keypair->ginv, keypair->p);
  783. ring_free(keypair->rho, keypair->p);
  784. sfree(keypair);
  785. }
  786. /* Trivial accessors used by test programs. */
  787. unsigned ntru_keypair_p(NTRUKeyPair *keypair) { return keypair->p; }
  788. const uint16_t *ntru_pubkey(NTRUKeyPair *keypair) { return keypair->h; }
  789. /*
  790. * Generate a value of the class DJB describes as 'Short': it consists
  791. * of p terms that are all either 0 or +1 or -1, and exactly w of them
  792. * are not zero.
  793. *
  794. * Values of this kind are used for several purposes: part of the
  795. * private key, a plaintext, and the 'rho' fake-plaintext value used
  796. * for deliberately returning a duff but non-revealing session hash if
  797. * things go wrong.
  798. *
  799. * -1 is represented as 2 in the output array. So if you want these
  800. * numbers mod 3, then they come out already in the right form.
  801. * Otherwise, use ntru_expand.
  802. */
  803. void ntru_gen_short(uint16_t *v, unsigned p, unsigned w)
  804. {
  805. /*
  806. * Get enough random data to generate a polynomial all of whose p
  807. * terms are in {0,+1,-1}, and exactly w of them are nonzero.
  808. * We'll do this by making up a completely random sequence of
  809. * {+1,-1} and then setting a random subset of them to 0.
  810. *
  811. * So we'll need p random bits to choose the nonzero values, and
  812. * then (doing it the simplest way) log2(p!) bits to shuffle them,
  813. * plus say 128 bits to ensure any fluctuations in uniformity are
  814. * negligible.
  815. *
  816. * log2(p!) is a pain to calculate, so we'll bound it above by
  817. * p*log2(p), which we bound in turn by p*16.
  818. */
  819. size_t randbitpos = 17 * p + 128;
  820. mp_int *randdata = mp_resize(mp_random_bits(randbitpos), randbitpos + 32);
  821. /*
  822. * Initial value before zeroing out some terms: p randomly chosen
  823. * values in {1,2}.
  824. */
  825. for (size_t i = 0; i < p; i++)
  826. v[i] = 1 + mp_get_bit(randdata, --randbitpos);
  827. /*
  828. * Hereafter we're going to extract random bits by multiplication,
  829. * treating randdata as a large fixed-point number.
  830. */
  831. mp_reduce_mod_2to(randdata, randbitpos);
  832. /*
  833. * Zero out some terms, leaving a randomly selected w of them
  834. * nonzero.
  835. */
  836. uint32_t nonzeros_left = w;
  837. mp_int *x = mp_new(64);
  838. for (size_t i = p; i-- > 0 ;) {
  839. /*
  840. * Pick a random number out of the number of terms remaning.
  841. */
  842. mp_mul_integer_into(randdata, randdata, i+1);
  843. mp_rshift_fixed_into(x, randdata, randbitpos);
  844. mp_reduce_mod_2to(randdata, randbitpos);
  845. size_t j = mp_get_integer(x);
  846. /*
  847. * If that's less than nonzeros_left, then we're leaving this
  848. * number nonzero. Otherwise we're zeroing it out.
  849. */
  850. uint32_t keep = (uint32_t)(j - nonzeros_left) >> 31;
  851. v[i] &= -keep; /* clear this field if keep == 0 */
  852. nonzeros_left -= keep; /* decrement counter if keep == 1 */
  853. }
  854. mp_free(x);
  855. mp_free(randdata);
  856. }
  857. /*
  858. * Make a single attempt at generating a key pair. This involves
  859. * inventing random elements of both our quotient rings and hoping
  860. * they're both invertible.
  861. *
  862. * They may not be, if you're unlucky. The element of Z_q/<x^p-x-1>
  863. * will _almost_ certainly be invertible, because that is a field, so
  864. * invertibility can only fail if you were so unlucky as to choose the
  865. * all-0s element. But the element of Z_3/<x^p-x-1> may fail to be
  866. * invertible because it has a common factor with x^p-x-1 (which, over
  867. * Z_3, is not irreducible).
  868. *
  869. * So we can't guarantee to generate a key pair in constant time,
  870. * because there's no predicting how many retries we'll need. However,
  871. * this isn't a failure of side-channel safety, because we completely
  872. * discard all the random numbers and state from each failed attempt.
  873. * So if there were a side-channel leakage from a failure, the only
  874. * thing it would give away would be a bunch of random numbers that
  875. * turned out not to be used anyway.
  876. *
  877. * But a _successful_ call to this function should execute in a
  878. * secret-independent manner, and this 'make a single attempt'
  879. * function is exposed in the API so that 'testsc' can check that.
  880. */
  881. NTRUKeyPair *ntru_keygen_attempt(unsigned p, unsigned q, unsigned w)
  882. {
  883. /*
  884. * First invent g, which is the one more likely to fail to invert.
  885. * This is simply a uniformly random polynomial with p terms over
  886. * Z_3. So we need p*log2(3) random bits for it, plus 128 for
  887. * uniformity. It's easiest to bound log2(3) above by 2.
  888. */
  889. size_t randbitpos = 2 * p + 128;
  890. mp_int *randdata = mp_resize(mp_random_bits(randbitpos), randbitpos + 32);
  891. /*
  892. * Select p random values from {0,1,2}.
  893. */
  894. uint16_t *g = snewn(p, uint16_t);
  895. mp_int *x = mp_new(64);
  896. for (size_t i = 0; i < p; i++) {
  897. mp_mul_integer_into(randdata, randdata, 3);
  898. mp_rshift_fixed_into(x, randdata, randbitpos);
  899. mp_reduce_mod_2to(randdata, randbitpos);
  900. g[i] = mp_get_integer(x);
  901. }
  902. mp_free(x);
  903. mp_free(randdata);
  904. /*
  905. * Try to invert g over Z_3, and fail if it isn't invertible.
  906. */
  907. uint16_t *ginv = snewn(p, uint16_t);
  908. if (!ntru_ring_invert(ginv, g, p, 3)) {
  909. ring_free(g, p);
  910. ring_free(ginv, p);
  911. return NULL;
  912. }
  913. /*
  914. * Fine; we have g. Now make up an f, and convert it to a
  915. * polynomial over q.
  916. */
  917. uint16_t *f = snewn(p, uint16_t);
  918. ntru_gen_short(f, p, w);
  919. ntru_expand(f, f, p, q);
  920. /*
  921. * Multiply f by 3.
  922. */
  923. uint16_t *f3 = snewn(p, uint16_t);
  924. ntru_scale(f3, f, 3, p, q);
  925. /*
  926. * Invert 3*f over Z_q. This is guaranteed to succeed, since
  927. * Z_q/<x^p-x-1> is a field, so the only non-invertible value is
  928. * 0. And f is nonzero because it came from ntru_gen_short (hence,
  929. * w of its components are nonzero), hence so is 3*f.
  930. */
  931. uint16_t *f3inv = snewn(p, uint16_t);
  932. bool expect_always_success = ntru_ring_invert(f3inv, f3, p, q);
  933. assert(expect_always_success);
  934. /*
  935. * Make the public key, by converting g to a polynomial over q and
  936. * then multiplying by f3inv.
  937. */
  938. uint16_t *g_q = snewn(p, uint16_t);
  939. ntru_expand(g_q, g, p, q);
  940. uint16_t *h = snewn(p, uint16_t);
  941. ntru_ring_multiply(h, g_q, f3inv, p, q);
  942. /*
  943. * Make up rho, used to substitute for the plaintext in the
  944. * session hash in case of confirmation failure.
  945. */
  946. uint16_t *rho = snewn(p, uint16_t);
  947. ntru_gen_short(rho, p, w);
  948. /*
  949. * And we're done! Free everything except the pieces we're
  950. * returning.
  951. */
  952. NTRUKeyPair *keypair = snew(NTRUKeyPair);
  953. keypair->p = p;
  954. keypair->q = q;
  955. keypair->w = w;
  956. keypair->h = h;
  957. keypair->f3 = f3;
  958. keypair->ginv = ginv;
  959. keypair->rho = rho;
  960. ring_free(f, p);
  961. ring_free(f3inv, p);
  962. ring_free(g, p);
  963. ring_free(g_q, p);
  964. return keypair;
  965. }
  966. /*
  967. * The top-level key generation function for real use (as opposed to
  968. * testsc): keep trying to make a key until you succeed.
  969. */
  970. NTRUKeyPair *ntru_keygen(unsigned p, unsigned q, unsigned w)
  971. {
  972. while (1) {
  973. NTRUKeyPair *keypair = ntru_keygen_attempt(p, q, w);
  974. if (keypair)
  975. return keypair;
  976. }
  977. }
  978. /*
  979. * Public-key encryption.
  980. */
  981. void ntru_encrypt(uint16_t *ciphertext, const uint16_t *plaintext,
  982. uint16_t *pubkey, unsigned p, unsigned q)
  983. {
  984. uint16_t *r_q = snewn(p, uint16_t);
  985. ntru_expand(r_q, plaintext, p, q);
  986. uint16_t *unrounded = snewn(p, uint16_t);
  987. ntru_ring_multiply(unrounded, r_q, pubkey, p, q);
  988. ntru_round3(ciphertext, unrounded, p, q);
  989. ntru_normalise(ciphertext, ciphertext, p, q);
  990. ring_free(r_q, p);
  991. ring_free(unrounded, p);
  992. }
  993. /*
  994. * Public-key decryption.
  995. */
  996. void ntru_decrypt(uint16_t *plaintext, const uint16_t *ciphertext,
  997. NTRUKeyPair *keypair)
  998. {
  999. unsigned p = keypair->p, q = keypair->q, w = keypair->w;
  1000. uint16_t *tmp = snewn(p, uint16_t);
  1001. ntru_ring_multiply(tmp, ciphertext, keypair->f3, p, q);
  1002. ntru_mod3(tmp, tmp, p, q);
  1003. ntru_normalise(tmp, tmp, p, 3);
  1004. ntru_ring_multiply(plaintext, tmp, keypair->ginv, p, 3);
  1005. ring_free(tmp, p);
  1006. /*
  1007. * With luck, this should have recovered exactly the original
  1008. * plaintext. But, as per the spec, we check whether it has
  1009. * exactly w nonzero coefficients, and if not, then something has
  1010. * gone wrong - and in that situation we time-safely substitute a
  1011. * different output.
  1012. *
  1013. * (I don't know exactly why we do this, but I assume it's because
  1014. * otherwise the mis-decoded output could be made to disgorge a
  1015. * secret about the private key in some way.)
  1016. */
  1017. unsigned weight = p;
  1018. for (size_t i = 0; i < p; i++)
  1019. weight -= iszero(plaintext[i]);
  1020. unsigned ok = iszero(weight ^ w);
  1021. /*
  1022. * The default failure return value consists of w 1s followed by
  1023. * 0s.
  1024. */
  1025. unsigned mask = ok - 1;
  1026. for (size_t i = 0; i < w; i++) {
  1027. uint16_t diff = (1 ^ plaintext[i]) & mask;
  1028. plaintext[i] ^= diff;
  1029. }
  1030. for (size_t i = w; i < p; i++) {
  1031. uint16_t diff = (0 ^ plaintext[i]) & mask;
  1032. plaintext[i] ^= diff;
  1033. }
  1034. }
  1035. /* ----------------------------------------------------------------------
  1036. * Encode and decode public keys, ciphertexts and plaintexts.
  1037. *
  1038. * Public keys and ciphertexts use the complicated binary encoding
  1039. * system implemented above. In both cases, the inputs are regarded as
  1040. * symmetric about zero, and are first biased to map their most
  1041. * negative permitted value to 0, so that they become non-negative and
  1042. * hence suitable as inputs to the encoding system. In the case of a
  1043. * ciphertext, where the input coefficients have also been coerced to
  1044. * be multiples of 3, we divide by 3 as well, saving space by reducing
  1045. * the upper bounds (m_i) on all the encoded numbers.
  1046. */
  1047. /*
  1048. * Compute the encoding schedule for a public key.
  1049. */
  1050. static NTRUEncodeSchedule *ntru_encode_pubkey_schedule(unsigned p, unsigned q)
  1051. {
  1052. uint16_t *ms = snewn(p, uint16_t);
  1053. for (size_t i = 0; i < p; i++)
  1054. ms[i] = q;
  1055. NTRUEncodeSchedule *sched = ntru_encode_schedule(ms, p);
  1056. sfree(ms);
  1057. return sched;
  1058. }
  1059. /*
  1060. * Encode a public key.
  1061. */
  1062. void ntru_encode_pubkey(const uint16_t *pubkey, unsigned p, unsigned q,
  1063. BinarySink *bs)
  1064. {
  1065. /* Compute the biased version for encoding */
  1066. uint16_t *biased_pubkey = snewn(p, uint16_t);
  1067. ntru_bias(biased_pubkey, pubkey, q / 2, p, q);
  1068. /* Encode it */
  1069. NTRUEncodeSchedule *sched = ntru_encode_pubkey_schedule(p, q);
  1070. ntru_encode(sched, biased_pubkey, bs);
  1071. ntru_encode_schedule_free(sched);
  1072. ring_free(biased_pubkey, p);
  1073. }
  1074. /*
  1075. * Decode a public key and write it into 'pubkey'. We also return a
  1076. * ptrlen pointing at the chunk of data we removed from the
  1077. * BinarySource.
  1078. */
  1079. ptrlen ntru_decode_pubkey(uint16_t *pubkey, unsigned p, unsigned q,
  1080. BinarySource *src)
  1081. {
  1082. NTRUEncodeSchedule *sched = ntru_encode_pubkey_schedule(p, q);
  1083. /* Retrieve the right number of bytes from the source */
  1084. size_t len = ntru_encode_schedule_length(sched);
  1085. ptrlen encoded = get_data(src, len);
  1086. if (get_err(src)) {
  1087. /* If there wasn't enough data, give up and return all-zeroes
  1088. * purely for determinism. But that value should never be
  1089. * used, because the caller will also check get_err(src). */
  1090. memset(pubkey, 0, p*sizeof(*pubkey));
  1091. } else {
  1092. /* Do the decoding */
  1093. ntru_decode(sched, pubkey, encoded);
  1094. /* Unbias the coefficients */
  1095. ntru_bias(pubkey, pubkey, q-q/2, p, q);
  1096. }
  1097. ntru_encode_schedule_free(sched);
  1098. return encoded;
  1099. }
  1100. /*
  1101. * For ciphertext biasing: work out the largest absolute value a
  1102. * ciphertext element can take, which is given by taking q/2 and
  1103. * rounding it to the nearest multiple of 3.
  1104. */
  1105. static inline unsigned ciphertext_bias(unsigned q)
  1106. {
  1107. return (q/2+1) / 3;
  1108. }
  1109. /*
  1110. * The number of possible values of a ciphertext coefficient (for use
  1111. * as the m_i in encoding) ranges from +ciphertext_bias(q) to
  1112. * -ciphertext_bias(q) inclusive.
  1113. */
  1114. static inline unsigned ciphertext_m(unsigned q)
  1115. {
  1116. return 1 + 2 * ciphertext_bias(q);
  1117. }
  1118. /*
  1119. * Compute the encoding schedule for a ciphertext.
  1120. */
  1121. static NTRUEncodeSchedule *ntru_encode_ciphertext_schedule(
  1122. unsigned p, unsigned q)
  1123. {
  1124. unsigned m = ciphertext_m(q);
  1125. uint16_t *ms = snewn(p, uint16_t);
  1126. for (size_t i = 0; i < p; i++)
  1127. ms[i] = m;
  1128. NTRUEncodeSchedule *sched = ntru_encode_schedule(ms, p);
  1129. sfree(ms);
  1130. return sched;
  1131. }
  1132. /*
  1133. * Encode a ciphertext.
  1134. */
  1135. void ntru_encode_ciphertext(const uint16_t *ciphertext, unsigned p, unsigned q,
  1136. BinarySink *bs)
  1137. {
  1138. SETUP;
  1139. /*
  1140. * Bias the ciphertext, and scale down by 1/3, which we do by
  1141. * modular multiplication by the inverse of 3 mod q. (That only
  1142. * works if we know the inputs are all _exact_ multiples of 3
  1143. * - but we do!)
  1144. */
  1145. uint16_t *biased_ciphertext = snewn(p, uint16_t);
  1146. ntru_bias(biased_ciphertext, ciphertext, 3 * ciphertext_bias(q), p, q);
  1147. ntru_scale(biased_ciphertext, biased_ciphertext, INVERT(3), p, q);
  1148. /* Encode. */
  1149. NTRUEncodeSchedule *sched = ntru_encode_ciphertext_schedule(p, q);
  1150. ntru_encode(sched, biased_ciphertext, bs);
  1151. ntru_encode_schedule_free(sched);
  1152. ring_free(biased_ciphertext, p);
  1153. }
  1154. ptrlen ntru_decode_ciphertext(uint16_t *ct, NTRUKeyPair *keypair,
  1155. BinarySource *src)
  1156. {
  1157. unsigned p = keypair->p, q = keypair->q;
  1158. NTRUEncodeSchedule *sched = ntru_encode_ciphertext_schedule(p, q);
  1159. /* Retrieve the right number of bytes from the source */
  1160. size_t len = ntru_encode_schedule_length(sched);
  1161. ptrlen encoded = get_data(src, len);
  1162. if (get_err(src)) {
  1163. /* As above, return deterministic nonsense on failure */
  1164. memset(ct, 0, p*sizeof(*ct));
  1165. } else {
  1166. /* Do the decoding */
  1167. ntru_decode(sched, ct, encoded);
  1168. /* Undo the scaling and bias */
  1169. ntru_scale(ct, ct, 3, p, q);
  1170. ntru_bias(ct, ct, q - 3 * ciphertext_bias(q), p, q);
  1171. }
  1172. ntru_encode_schedule_free(sched);
  1173. return encoded; /* also useful to the caller, optionally */
  1174. }
  1175. /*
  1176. * Encode a plaintext.
  1177. *
  1178. * This is a much simpler encoding than the NTRUEncodeSchedule system:
  1179. * since elements of a plaintext are mod 3, we just encode each one in
  1180. * 2 bits, applying the usual bias so that {-1,0,+1} map to {0,1,2}
  1181. * respectively.
  1182. *
  1183. * There's no corresponding decode function, because plaintexts are
  1184. * never transmitted on the wire (the whole point is that they're too
  1185. * secret!). Plaintexts are only encoded in order to put them into
  1186. * hash preimages.
  1187. */
  1188. void ntru_encode_plaintext(const uint16_t *plaintext, unsigned p,
  1189. BinarySink *bs)
  1190. {
  1191. unsigned byte = 0, bitpos = 0;
  1192. for (size_t i = 0; i < p; i++) {
  1193. unsigned encoding = (plaintext[i] + 1) * iszero(plaintext[i] >> 1);
  1194. byte |= encoding << bitpos;
  1195. bitpos += 2;
  1196. if (bitpos == 8 || i+1 == p) {
  1197. put_byte(bs, byte);
  1198. byte = 0;
  1199. bitpos = 0;
  1200. }
  1201. }
  1202. }
  1203. /* ----------------------------------------------------------------------
  1204. * Compute the hashes required by the key exchange layer of NTRU Prime.
  1205. *
  1206. * There are two of these. The 'confirmation hash' is sent by the
  1207. * server along with the ciphertext, and the client can recalculate it
  1208. * to check whether the ciphertext was decrypted correctly. Then, the
  1209. * 'session hash' is the actual output of key exchange, and if the
  1210. * confirmation hash doesn't match, it gets deliberately corrupted.
  1211. */
  1212. /*
  1213. * Make the confirmation hash, whose inputs are the plaintext and the
  1214. * public key.
  1215. *
  1216. * This is defined as H(2 || H(3 || r) || H(4 || K)), where r is the
  1217. * plaintext and K is the public key (as encoded by the above
  1218. * functions), and the constants 2,3,4 are single bytes. The choice of
  1219. * hash function (H itself) is SHA-512 truncated to 256 bits.
  1220. *
  1221. * (To be clear: that is _not_ the thing that FIPS 180-4 6.7 defines
  1222. * as "SHA-512/256", which varies the initialisation vector of the
  1223. * SHA-512 algorithm as well as truncating the output. _This_
  1224. * algorithm uses the standard SHA-512 IV, and _just_ truncates the
  1225. * output, in the manner suggested by FIPS 180-4 section 7.)
  1226. *
  1227. * 'out' should therefore expect to receive 32 bytes of data.
  1228. */
  1229. static void ntru_confirmation_hash(
  1230. uint8_t *out, const uint16_t *plaintext,
  1231. const uint16_t *pubkey, unsigned p, unsigned q)
  1232. {
  1233. /* The outer hash object */
  1234. ssh_hash *hconfirm = ssh_hash_new(&ssh_sha512);
  1235. put_byte(hconfirm, 2); /* initial byte 2 */
  1236. uint8_t hashdata[64];
  1237. /* Compute H(3 || r) and add it to the main hash */
  1238. ssh_hash *h3r = ssh_hash_new(&ssh_sha512);
  1239. put_byte(h3r, 3);
  1240. ntru_encode_plaintext(plaintext, p, BinarySink_UPCAST(h3r));
  1241. ssh_hash_final(h3r, hashdata);
  1242. put_data(hconfirm, hashdata, 32);
  1243. /* Compute H(4 || K) and add it to the main hash */
  1244. ssh_hash *h4K = ssh_hash_new(&ssh_sha512);
  1245. put_byte(h4K, 4);
  1246. ntru_encode_pubkey(pubkey, p, q, BinarySink_UPCAST(h4K));
  1247. ssh_hash_final(h4K, hashdata);
  1248. put_data(hconfirm, hashdata, 32);
  1249. /* Compute the full output of the main SHA-512 hash */
  1250. ssh_hash_final(hconfirm, hashdata);
  1251. /* And copy the first 32 bytes into the caller's output array */
  1252. memcpy(out, hashdata, 32);
  1253. smemclr(hashdata, sizeof(hashdata));
  1254. }
  1255. /*
  1256. * Make the session hash, whose inputs are the plaintext, the
  1257. * ciphertext, and the confirmation hash (hence, transitively, a
  1258. * dependence on the public key as well).
  1259. *
  1260. * As computed by the server, and by the client if the confirmation
  1261. * hash matched, this is defined as
  1262. *
  1263. * H(1 || H(3 || r) || ciphertext || confirmation hash)
  1264. *
  1265. * but if the confirmation hash _didn't_ match, then the plaintext r
  1266. * is replaced with the dummy plaintext-shaped value 'rho' we invented
  1267. * during key generation (presumably to avoid leaking any information
  1268. * about our secrets), and the initial byte 1 is replaced with 0 (to
  1269. * ensure that the resulting hash preimage can't match any legitimate
  1270. * preimage). So in that case, you instead get
  1271. *
  1272. * H(0 || H(3 || rho) || ciphertext || confirmation hash)
  1273. *
  1274. * The inputs to this function include 'ok', which is the value to use
  1275. * as the initial byte (1 on success, 0 on failure), and 'plaintext'
  1276. * which should already have been substituted with rho in case of
  1277. * failure.
  1278. *
  1279. * The ciphertext is provided in already-encoded form.
  1280. */
  1281. static void ntru_session_hash(
  1282. uint8_t *out, unsigned ok, const uint16_t *plaintext,
  1283. unsigned p, ptrlen ciphertext, ptrlen confirmation_hash)
  1284. {
  1285. /* The outer hash object */
  1286. ssh_hash *hsession = ssh_hash_new(&ssh_sha512);
  1287. put_byte(hsession, ok); /* initial byte 1 or 0 */
  1288. uint8_t hashdata[64];
  1289. /* Compute H(3 || r), or maybe H(3 || rho), and add it to the main hash */
  1290. ssh_hash *h3r = ssh_hash_new(&ssh_sha512);
  1291. put_byte(h3r, 3);
  1292. ntru_encode_plaintext(plaintext, p, BinarySink_UPCAST(h3r));
  1293. ssh_hash_final(h3r, hashdata);
  1294. put_data(hsession, hashdata, 32);
  1295. /* Put the ciphertext and confirmation hash in */
  1296. put_datapl(hsession, ciphertext);
  1297. put_datapl(hsession, confirmation_hash);
  1298. /* Compute the full output of the main SHA-512 hash */
  1299. ssh_hash_final(hsession, hashdata);
  1300. /* And copy the first 32 bytes into the caller's output array */
  1301. memcpy(out, hashdata, 32);
  1302. smemclr(hashdata, sizeof(hashdata));
  1303. }
  1304. /* ----------------------------------------------------------------------
  1305. * Top-level KEM functions.
  1306. */
  1307. /*
  1308. * The parameters p,q,w for the system. There are other choices of
  1309. * these, but OpenSSH only specifies this set. (If that ever changes,
  1310. * we'll need to turn these into elements of the state structures.)
  1311. */
  1312. #define p_LIVE 761
  1313. #define q_LIVE 4591
  1314. #define w_LIVE 286
  1315. struct ntru_dk {
  1316. NTRUKeyPair *keypair;
  1317. strbuf *encoded;
  1318. pq_kem_dk dk;
  1319. };
  1320. static pq_kem_dk *ntru_vt_keygen(const pq_kemalg *alg, BinarySink *ek)
  1321. {
  1322. struct ntru_dk *ndk = snew(struct ntru_dk);
  1323. ndk->dk.vt = alg;
  1324. ndk->encoded = strbuf_new_nm();
  1325. ndk->keypair = ntru_keygen(p_LIVE, q_LIVE, w_LIVE);
  1326. ntru_encode_pubkey(ndk->keypair->h, p_LIVE, q_LIVE, ek);
  1327. return &ndk->dk;
  1328. }
  1329. static bool ntru_vt_encaps(const pq_kemalg *alg, BinarySink *c, BinarySink *k,
  1330. ptrlen ek)
  1331. {
  1332. BinarySource src[1];
  1333. BinarySource_BARE_INIT_PL(src, ek);
  1334. uint16_t *pubkey = snewn(p_LIVE, uint16_t);
  1335. ntru_decode_pubkey(pubkey, p_LIVE, q_LIVE, src);
  1336. if (get_err(src) || get_avail(src)) {
  1337. /* Hard-fail if the input wasn't exactly the right length */
  1338. ring_free(pubkey, p_LIVE);
  1339. return false;
  1340. }
  1341. /* Invent a valid NTRU plaintext. */
  1342. uint16_t *plaintext = snewn(p_LIVE, uint16_t);
  1343. ntru_gen_short(plaintext, p_LIVE, w_LIVE);
  1344. /* Encrypt the plaintext, and encode the ciphertext into a strbuf,
  1345. * so we can reuse it for both the session hash and sending to the
  1346. * client. */
  1347. uint16_t *ciphertext = snewn(p_LIVE, uint16_t);
  1348. ntru_encrypt(ciphertext, plaintext, pubkey, p_LIVE, q_LIVE);
  1349. strbuf *ciphertext_encoded = strbuf_new_nm();
  1350. ntru_encode_ciphertext(ciphertext, p_LIVE, q_LIVE,
  1351. BinarySink_UPCAST(ciphertext_encoded));
  1352. put_datapl(c, ptrlen_from_strbuf(ciphertext_encoded));
  1353. /* Compute the confirmation hash, and append that to the data sent
  1354. * to the other side. */
  1355. uint8_t confhash[32];
  1356. ntru_confirmation_hash(confhash, plaintext, pubkey, p_LIVE, q_LIVE);
  1357. put_data(c, confhash, 32);
  1358. /* Compute the session hash, i.e. the output shared secret. */
  1359. uint8_t sesshash[32];
  1360. ntru_session_hash(sesshash, 1, plaintext, p_LIVE,
  1361. ptrlen_from_strbuf(ciphertext_encoded),
  1362. make_ptrlen(confhash, 32));
  1363. put_data(k, sesshash, 32);
  1364. ring_free(pubkey, p_LIVE);
  1365. ring_free(plaintext, p_LIVE);
  1366. ring_free(ciphertext, p_LIVE);
  1367. strbuf_free(ciphertext_encoded);
  1368. smemclr(confhash, sizeof(confhash));
  1369. smemclr(sesshash, sizeof(sesshash));
  1370. return true;
  1371. }
  1372. static bool ntru_vt_decaps(pq_kem_dk *dk, BinarySink *k, ptrlen c)
  1373. {
  1374. struct ntru_dk *ndk = container_of(dk, struct ntru_dk, dk);
  1375. /* Expect a string containing a ciphertext and a confirmation hash. */
  1376. BinarySource src[1];
  1377. BinarySource_BARE_INIT_PL(src, c);
  1378. uint16_t *ciphertext = snewn(p_LIVE, uint16_t);
  1379. ptrlen ciphertext_encoded = ntru_decode_ciphertext(
  1380. ciphertext, ndk->keypair, src);
  1381. ptrlen confirmation_hash = get_data(src, 32);
  1382. if (get_err(src) || get_avail(src)) {
  1383. /* Hard-fail if the input wasn't exactly the right length */
  1384. ring_free(ciphertext, p_LIVE);
  1385. return false;
  1386. }
  1387. /* Decrypt the ciphertext to recover the sender's plaintext */
  1388. uint16_t *plaintext = snewn(p_LIVE, uint16_t);
  1389. ntru_decrypt(plaintext, ciphertext, ndk->keypair);
  1390. /* Make the confirmation hash */
  1391. uint8_t confhash[32];
  1392. ntru_confirmation_hash(confhash, plaintext, ndk->keypair->h,
  1393. p_LIVE, q_LIVE);
  1394. /* Check it matches the one the server sent */
  1395. unsigned ok = smemeq(confhash, confirmation_hash.ptr, 32);
  1396. /* If not, substitute in rho for the plaintext in the session hash */
  1397. unsigned mask = ok-1;
  1398. for (size_t i = 0; i < p_LIVE; i++)
  1399. plaintext[i] ^= mask & (plaintext[i] ^ ndk->keypair->rho[i]);
  1400. /* Compute the session hash, whether or not we did that */
  1401. uint8_t sesshash[32];
  1402. ntru_session_hash(sesshash, ok, plaintext, p_LIVE, ciphertext_encoded,
  1403. confirmation_hash);
  1404. put_data(k, sesshash, 32);
  1405. ring_free(plaintext, p_LIVE);
  1406. ring_free(ciphertext, p_LIVE);
  1407. smemclr(confhash, sizeof(confhash));
  1408. smemclr(sesshash, sizeof(sesshash));
  1409. return true;
  1410. }
  1411. static void ntru_vt_free_dk(pq_kem_dk *dk)
  1412. {
  1413. struct ntru_dk *ndk = container_of(dk, struct ntru_dk, dk);
  1414. strbuf_free(ndk->encoded);
  1415. ntru_keypair_free(ndk->keypair);
  1416. sfree(ndk);
  1417. }
  1418. const pq_kemalg ssh_ntru = {
  1419. .keygen = ntru_vt_keygen,
  1420. .encaps = ntru_vt_encaps,
  1421. .decaps = ntru_vt_decaps,
  1422. .free_dk = ntru_vt_free_dk,
  1423. .description = "NTRU Prime",
  1424. .ek_len = 1158,
  1425. .c_len = 1039,
  1426. };