aesni.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. /*
  2. * This file is adapted from PolarSSL 1.3.19 (GPL)
  3. */
  4. /*
  5. * AES-NI support functions
  6. *
  7. * Copyright (C) 2006-2014, ARM Limited, All Rights Reserved
  8. *
  9. * This file is part of mbed TLS (https://tls.mbed.org)
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License along
  22. * with this program; if not, write to the Free Software Foundation, Inc.,
  23. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  24. */
  25. /*
  26. * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set
  27. * [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/
  28. */
  29. #include <string.h>
  30. #include "aesni.h"
  31. #if defined(HAVE_AMD64)
  32. /*
  33. * AES-NI support detection routine
  34. */
  35. #define AESNI_AES 0x02000000u
  36. int aesni_supported( void )
  37. {
  38. static int done = 0;
  39. static unsigned int c = 0;
  40. if( ! done )
  41. {
  42. asm( "movl $1, %%eax \n\t"
  43. "cpuid \n\t"
  44. : "=c" (c)
  45. :
  46. : "eax", "ebx", "edx" );
  47. done = 1;
  48. }
  49. return( ( c & AESNI_AES ) != 0 );
  50. }
  51. /*
  52. * Binutils needs to be at least 2.19 to support AES-NI instructions.
  53. * Unfortunately, a lot of users have a lower version now (2014-04).
  54. * Emit bytecode directly in order to support "old" version of gas.
  55. *
  56. * Opcodes from the Intel architecture reference manual, vol. 3.
  57. * We always use registers, so we don't need prefixes for memory operands.
  58. * Operand macros are in gas order (src, dst) as opposed to Intel order
  59. * (dst, src) in order to blend better into the surrounding assembly code.
  60. */
  61. #define AESDEC ".byte 0x66,0x0F,0x38,0xDE,"
  62. #define AESDECLAST ".byte 0x66,0x0F,0x38,0xDF,"
  63. #define AESENC ".byte 0x66,0x0F,0x38,0xDC,"
  64. #define AESENCLAST ".byte 0x66,0x0F,0x38,0xDD,"
  65. #define AESIMC ".byte 0x66,0x0F,0x38,0xDB,"
  66. #define AESKEYGENA ".byte 0x66,0x0F,0x3A,0xDF,"
  67. #define PCLMULQDQ ".byte 0x66,0x0F,0x3A,0x44,"
  68. #define xmm0_xmm0 "0xC0"
  69. #define xmm0_xmm1 "0xC8"
  70. #define xmm0_xmm2 "0xD0"
  71. #define xmm0_xmm3 "0xD8"
  72. #define xmm0_xmm4 "0xE0"
  73. #define xmm1_xmm0 "0xC1"
  74. #define xmm1_xmm2 "0xD1"
  75. /*
  76. * AES-NI AES-ECB block en(de)cryption
  77. */
  78. void aesni_crypt_ecb( int nr,
  79. unsigned char *rk,
  80. int mode,
  81. const unsigned char input[16],
  82. unsigned char output[16] )
  83. {
  84. asm( "movdqu (%3), %%xmm0 \n\t" // load input
  85. "movdqu (%1), %%xmm1 \n\t" // load round key 0
  86. "pxor %%xmm1, %%xmm0 \n\t" // round 0
  87. "addq $16, %1 \n\t" // point to next round key
  88. "subl $1, %0 \n\t" // normal rounds = nr - 1
  89. "test %2, %2 \n\t" // mode?
  90. "jz 2f \n\t" // 0 = decrypt
  91. "1: \n\t" // encryption loop
  92. "movdqu (%1), %%xmm1 \n\t" // load round key
  93. AESENC xmm1_xmm0 "\n\t" // do round
  94. "addq $16, %1 \n\t" // point to next round key
  95. "subl $1, %0 \n\t" // loop
  96. "jnz 1b \n\t"
  97. "movdqu (%1), %%xmm1 \n\t" // load round key
  98. AESENCLAST xmm1_xmm0 "\n\t" // last round
  99. "jmp 3f \n\t"
  100. "2: \n\t" // decryption loop
  101. "movdqu (%1), %%xmm1 \n\t"
  102. AESDEC xmm1_xmm0 "\n\t" // do round
  103. "addq $16, %1 \n\t"
  104. "subl $1, %0 \n\t"
  105. "jnz 2b \n\t"
  106. "movdqu (%1), %%xmm1 \n\t" // load round key
  107. AESDECLAST xmm1_xmm0 "\n\t" // last round
  108. "3: \n\t"
  109. "movdqu %%xmm0, (%4) \n\t" // export output
  110. :
  111. : "r" (nr), "r" (rk), "r" (mode), "r" (input), "r" (output)
  112. : "memory", "cc", "xmm0", "xmm1" );
  113. }
  114. /*
  115. * Compute decryption round keys from encryption round keys
  116. */
  117. void aesni_inverse_key( unsigned char *invkey,
  118. const unsigned char *fwdkey, int nr )
  119. {
  120. unsigned char *ik = invkey;
  121. const unsigned char *fk = fwdkey + 16 * nr;
  122. memcpy( ik, fk, 16 );
  123. for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
  124. asm( "movdqu (%0), %%xmm0 \n\t"
  125. AESIMC xmm0_xmm0 "\n\t"
  126. "movdqu %%xmm0, (%1) \n\t"
  127. :
  128. : "r" (fk), "r" (ik)
  129. : "memory", "xmm0" );
  130. memcpy( ik, fk, 16 );
  131. }
  132. /*
  133. * Key expansion, 128-bit case
  134. */
  135. void aesni_setkey_enc_128( unsigned char *rk,
  136. const unsigned char *key )
  137. {
  138. asm( "movdqu (%1), %%xmm0 \n\t" // copy the original key
  139. "movdqu %%xmm0, (%0) \n\t" // as round key 0
  140. "jmp 2f \n\t" // skip auxiliary routine
  141. /*
  142. * Finish generating the next round key.
  143. *
  144. * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff
  145. * with X = rot( sub( r3 ) ) ^ RCON.
  146. *
  147. * On exit, xmm0 is r7:r6:r5:r4
  148. * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
  149. * and those are written to the round key buffer.
  150. */
  151. "1: \n\t"
  152. "pshufd $0xff, %%xmm1, %%xmm1 \n\t" // X:X:X:X
  153. "pxor %%xmm0, %%xmm1 \n\t" // X+r3:X+r2:X+r1:r4
  154. "pslldq $4, %%xmm0 \n\t" // r2:r1:r0:0
  155. "pxor %%xmm0, %%xmm1 \n\t" // X+r3+r2:X+r2+r1:r5:r4
  156. "pslldq $4, %%xmm0 \n\t" // etc
  157. "pxor %%xmm0, %%xmm1 \n\t"
  158. "pslldq $4, %%xmm0 \n\t"
  159. "pxor %%xmm1, %%xmm0 \n\t" // update xmm0 for next time!
  160. "add $16, %0 \n\t" // point to next round key
  161. "movdqu %%xmm0, (%0) \n\t" // write it
  162. "ret \n\t"
  163. /* Main "loop" */
  164. "2: \n\t"
  165. AESKEYGENA xmm0_xmm1 ",0x01 \n\tcall 1b \n\t"
  166. AESKEYGENA xmm0_xmm1 ",0x02 \n\tcall 1b \n\t"
  167. AESKEYGENA xmm0_xmm1 ",0x04 \n\tcall 1b \n\t"
  168. AESKEYGENA xmm0_xmm1 ",0x08 \n\tcall 1b \n\t"
  169. AESKEYGENA xmm0_xmm1 ",0x10 \n\tcall 1b \n\t"
  170. AESKEYGENA xmm0_xmm1 ",0x20 \n\tcall 1b \n\t"
  171. AESKEYGENA xmm0_xmm1 ",0x40 \n\tcall 1b \n\t"
  172. AESKEYGENA xmm0_xmm1 ",0x80 \n\tcall 1b \n\t"
  173. AESKEYGENA xmm0_xmm1 ",0x1B \n\tcall 1b \n\t"
  174. AESKEYGENA xmm0_xmm1 ",0x36 \n\tcall 1b \n\t"
  175. :
  176. : "r" (rk), "r" (key)
  177. : "memory", "cc", "0" );
  178. }
  179. /*
  180. * Key expansion, 192-bit case
  181. */
  182. void aesni_setkey_enc_192( unsigned char *rk,
  183. const unsigned char *key )
  184. {
  185. asm( "movdqu (%1), %%xmm0 \n\t" // copy original round key
  186. "movdqu %%xmm0, (%0) \n\t"
  187. "add $16, %0 \n\t"
  188. "movq 16(%1), %%xmm1 \n\t"
  189. "movq %%xmm1, (%0) \n\t"
  190. "add $8, %0 \n\t"
  191. "jmp 2f \n\t" // skip auxiliary routine
  192. /*
  193. * Finish generating the next 6 quarter-keys.
  194. *
  195. * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4
  196. * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON.
  197. *
  198. * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
  199. * and those are written to the round key buffer.
  200. */
  201. "1: \n\t"
  202. "pshufd $0x55, %%xmm2, %%xmm2 \n\t" // X:X:X:X
  203. "pxor %%xmm0, %%xmm2 \n\t" // X+r3:X+r2:X+r1:r4
  204. "pslldq $4, %%xmm0 \n\t" // etc
  205. "pxor %%xmm0, %%xmm2 \n\t"
  206. "pslldq $4, %%xmm0 \n\t"
  207. "pxor %%xmm0, %%xmm2 \n\t"
  208. "pslldq $4, %%xmm0 \n\t"
  209. "pxor %%xmm2, %%xmm0 \n\t" // update xmm0 = r9:r8:r7:r6
  210. "movdqu %%xmm0, (%0) \n\t"
  211. "add $16, %0 \n\t"
  212. "pshufd $0xff, %%xmm0, %%xmm2 \n\t" // r9:r9:r9:r9
  213. "pxor %%xmm1, %%xmm2 \n\t" // stuff:stuff:r9+r5:r10
  214. "pslldq $4, %%xmm1 \n\t" // r2:r1:r0:0
  215. "pxor %%xmm2, %%xmm1 \n\t" // xmm1 = stuff:stuff:r11:r10
  216. "movq %%xmm1, (%0) \n\t"
  217. "add $8, %0 \n\t"
  218. "ret \n\t"
  219. "2: \n\t"
  220. AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t"
  221. AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t"
  222. AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t"
  223. AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t"
  224. AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t"
  225. AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t"
  226. AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t"
  227. AESKEYGENA xmm1_xmm2 ",0x80 \n\tcall 1b \n\t"
  228. :
  229. : "r" (rk), "r" (key)
  230. : "memory", "cc", "0" );
  231. }
  232. /*
  233. * Key expansion, 256-bit case
  234. */
  235. void aesni_setkey_enc_256( unsigned char *rk,
  236. const unsigned char *key )
  237. {
  238. asm( "movdqu (%1), %%xmm0 \n\t"
  239. "movdqu %%xmm0, (%0) \n\t"
  240. "add $16, %0 \n\t"
  241. "movdqu 16(%1), %%xmm1 \n\t"
  242. "movdqu %%xmm1, (%0) \n\t"
  243. "jmp 2f \n\t" // skip auxiliary routine
  244. /*
  245. * Finish generating the next two round keys.
  246. *
  247. * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and
  248. * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
  249. *
  250. * On exit, xmm0 is r11:r10:r9:r8 and xmm1 is r15:r14:r13:r12
  251. * and those have been written to the output buffer.
  252. */
  253. "1: \n\t"
  254. "pshufd $0xff, %%xmm2, %%xmm2 \n\t"
  255. "pxor %%xmm0, %%xmm2 \n\t"
  256. "pslldq $4, %%xmm0 \n\t"
  257. "pxor %%xmm0, %%xmm2 \n\t"
  258. "pslldq $4, %%xmm0 \n\t"
  259. "pxor %%xmm0, %%xmm2 \n\t"
  260. "pslldq $4, %%xmm0 \n\t"
  261. "pxor %%xmm2, %%xmm0 \n\t"
  262. "add $16, %0 \n\t"
  263. "movdqu %%xmm0, (%0) \n\t"
  264. /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
  265. * and proceed to generate next round key from there */
  266. AESKEYGENA xmm0_xmm2 ",0x00 \n\t"
  267. "pshufd $0xaa, %%xmm2, %%xmm2 \n\t"
  268. "pxor %%xmm1, %%xmm2 \n\t"
  269. "pslldq $4, %%xmm1 \n\t"
  270. "pxor %%xmm1, %%xmm2 \n\t"
  271. "pslldq $4, %%xmm1 \n\t"
  272. "pxor %%xmm1, %%xmm2 \n\t"
  273. "pslldq $4, %%xmm1 \n\t"
  274. "pxor %%xmm2, %%xmm1 \n\t"
  275. "add $16, %0 \n\t"
  276. "movdqu %%xmm1, (%0) \n\t"
  277. "ret \n\t"
  278. /*
  279. * Main "loop" - Generating one more key than necessary,
  280. * see definition of aes_context.buf
  281. */
  282. "2: \n\t"
  283. AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t"
  284. AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t"
  285. AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t"
  286. AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t"
  287. AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t"
  288. AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t"
  289. AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t"
  290. :
  291. : "r" (rk), "r" (key)
  292. : "memory", "cc", "0" );
  293. }
  294. #endif /* HAVE_AMD64 */