100-Implements-AES-and-GCM-with-ARMv8-Crypto-Extensions.patch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. From dfb6015ca79a9fee28f7fcb0af7e350a83574b83 Mon Sep 17 00:00:00 2001
  2. From: "Markku-Juhani O. Saarinen" <[email protected]>
  3. Date: Mon, 20 Nov 2017 14:58:41 +0000
  4. Subject: Implements AES and GCM with ARMv8 Crypto Extensions
  5. A compact patch that provides AES and GCM implementations that utilize the
  6. ARMv8 Crypto Extensions. The config flag is MBEDTLS_ARMV8CE_AES_C, which
  7. is disabled by default as we don't do runtime checking for the feature.
  8. The new implementation lives in armv8ce_aes.c.
  9. Provides similar functionality to https://github.com/ARMmbed/mbedtls/pull/432
  10. Thanks to Barry O'Rourke and others for that contribtion.
  11. Tested on a Cortex A53 device and QEMU. On a midrange phone the real AES-GCM
  12. throughput increases about 4x, while raw AES speed is up to 10x faster.
  13. When cross-compiling, you want to set something like:
  14. export CC='aarch64-linux-gnu-gcc'
  15. export CFLAGS='-Ofast -march=armv8-a+crypto'
  16. scripts/config.pl set MBEDTLS_ARMV8CE_AES_C
  17. QEMU seems to also need
  18. export LDFLAGS='-static'
  19. Then run normal make or cmake etc.
  20. ---
  21. diff -ruNa --binary a/ChangeLog.d/armv8_crypto_extensions.txt b/ChangeLog.d/armv8_crypto_extensions.txt
  22. --- a/ChangeLog.d/armv8_crypto_extensions.txt 1970-01-01 08:00:00.000000000 +0800
  23. +++ b/ChangeLog.d/armv8_crypto_extensions.txt 2021-03-07 15:07:17.781911791 +0800
  24. @@ -0,0 +1,2 @@
  25. +Features
  26. + * Support ARMv8 Cryptography Extensions for AES and GCM.
  27. diff -ruNa --binary a/include/mbedtls/armv8ce_aes.h b/include/mbedtls/armv8ce_aes.h
  28. --- a/include/mbedtls/armv8ce_aes.h 1970-01-01 08:00:00.000000000 +0800
  29. +++ b/include/mbedtls/armv8ce_aes.h 2021-03-07 15:07:17.781911791 +0800
  30. @@ -0,0 +1,63 @@
  31. +/**
  32. + * \file armv8ce_aes.h
  33. + *
  34. + * \brief ARMv8 Cryptography Extensions -- Optimized code for AES and GCM
  35. + */
  36. +
  37. +/*
  38. + *
  39. + * Copyright (C) 2006-2017, ARM Limited, All Rights Reserved
  40. + * SPDX-License-Identifier: Apache-2.0
  41. + *
  42. + * Licensed under the Apache License, Version 2.0 (the "License"); you may
  43. + * not use this file except in compliance with the License.
  44. + * You may obtain a copy of the License at
  45. + *
  46. + * http://www.apache.org/licenses/LICENSE-2.0
  47. + *
  48. + * Unless required by applicable law or agreed to in writing, software
  49. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  50. + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  51. + * See the License for the specific language governing permissions and
  52. + * limitations under the License.
  53. + *
  54. + * This file is part of mbed TLS (https://tls.mbed.org)
  55. + */
  56. +
  57. +#ifndef MBEDTLS_ARMV8CE_AES_H
  58. +#define MBEDTLS_ARMV8CE_AES_H
  59. +
  60. +#include "aes.h"
  61. +
  62. +/**
  63. + * \brief [ARMv8 Crypto Extensions] AES-ECB block en(de)cryption
  64. + *
  65. + * \param ctx AES context
  66. + * \param mode MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT
  67. + * \param input 16-byte input block
  68. + * \param output 16-byte output block
  69. + *
  70. + * \return 0 on success (cannot fail)
  71. + */
  72. +
  73. +int mbedtls_armv8ce_aes_crypt_ecb( mbedtls_aes_context *ctx,
  74. + int mode,
  75. + const unsigned char input[16],
  76. + unsigned char output[16] );
  77. +
  78. +/**
  79. + * \brief [ARMv8 Crypto Extensions] Multiply in GF(2^128) for GCM
  80. + *
  81. + * \param c Result
  82. + * \param a First operand
  83. + * \param b Second operand
  84. + *
  85. + * \note Both operands and result are bit strings interpreted as
  86. + * elements of GF(2^128) as per the GCM spec.
  87. + */
  88. +
  89. +void mbedtls_armv8ce_gcm_mult( unsigned char c[16],
  90. + const unsigned char a[16],
  91. + const unsigned char b[16] );
  92. +
  93. +#endif /* MBEDTLS_ARMV8CE_AES_H */
  94. diff -ruNa --binary a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h
  95. --- a/include/mbedtls/check_config.h 2020-12-10 20:54:15.000000000 +0800
  96. +++ b/include/mbedtls/check_config.h 2021-03-07 15:06:45.625543309 +0800
  97. @@ -95,6 +95,10 @@
  98. #error "MBEDTLS_AESNI_C defined, but not all prerequisites"
  99. #endif
  100. +#if defined(MBEDTLS_ARMV8CE_AES_C) && !defined(MBEDTLS_HAVE_ASM)
  101. +#error "MBEDTLS_ARMV8CE_AES_C defined, but not all prerequisites"
  102. +#endif
  103. +
  104. #if defined(MBEDTLS_CTR_DRBG_C) && !defined(MBEDTLS_AES_C)
  105. #error "MBEDTLS_CTR_DRBG_C defined, but not all prerequisites"
  106. #endif
  107. @@ -772,3 +776,4 @@
  108. typedef int mbedtls_iso_c_forbids_empty_translation_units;
  109. #endif /* MBEDTLS_CHECK_CONFIG_H */
  110. +
  111. diff -ruNa --binary a/include/mbedtls/config.h b/include/mbedtls/config.h
  112. --- a/include/mbedtls/config.h 2020-12-10 20:54:15.000000000 +0800
  113. +++ b/include/mbedtls/config.h 2021-03-07 15:14:27.957855484 +0800
  114. @@ -73,6 +73,7 @@
  115. * Requires support for asm() in compiler.
  116. *
  117. * Used in:
  118. + * library/armv8ce_aes.c
  119. * library/aria.c
  120. * library/timing.c
  121. * include/mbedtls/bn_mul.h
  122. @@ -1888,6 +1889,21 @@
  123. #define MBEDTLS_AESNI_C
  124. /**
  125. + * \def MBEDTLS_ARMV8CE_AES_C
  126. + *
  127. + * Enable ARMv8 Crypto Extensions for AES and GCM
  128. + *
  129. + * Module: library/armv8ce_aes.c
  130. + * Caller: library/aes.c
  131. + * library/gcm.c
  132. + *
  133. + * Requires: MBEDTLS_HAVE_ASM
  134. + *
  135. + * This module adds support for Armv8 Cryptography Extensions for AES and GCM.
  136. + */
  137. +//#define MBEDTLS_ARMV8CE_AES_C
  138. +
  139. +/**
  140. * \def MBEDTLS_AES_C
  141. *
  142. * Enable the AES block cipher.
  143. diff -ruNa --binary a/library/aes.c b/library/aes.c
  144. --- a/library/aes.c 2020-12-10 20:54:15.000000000 +0800
  145. +++ b/library/aes.c 2021-03-07 15:06:45.625543309 +0800
  146. @@ -69,7 +69,9 @@
  147. #if defined(MBEDTLS_AESNI_C)
  148. #include "mbedtls/aesni.h"
  149. #endif
  150. -
  151. +#if defined(MBEDTLS_ARMV8CE_AES_C)
  152. +#include "mbedtls/armv8ce_aes.h"
  153. +#endif
  154. #if defined(MBEDTLS_SELF_TEST)
  155. #if defined(MBEDTLS_PLATFORM_C)
  156. #include "mbedtls/platform.h"
  157. @@ -1052,6 +1054,11 @@
  158. return( mbedtls_aesni_crypt_ecb( ctx, mode, input, output ) );
  159. #endif
  160. +#if defined(MBEDTLS_ARMV8CE_AES_C)
  161. + // We don't do runtime checking for ARMv8 Crypto Extensions
  162. + return mbedtls_armv8ce_aes_crypt_ecb( ctx, mode, input, output );
  163. +#endif
  164. +
  165. #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
  166. if( aes_padlock_ace )
  167. {
  168. diff -ruNa --binary a/library/armv8ce_aes.c b/library/armv8ce_aes.c
  169. --- a/library/armv8ce_aes.c 1970-01-01 08:00:00.000000000 +0800
  170. +++ b/library/armv8ce_aes.c 2021-03-07 15:07:17.781911791 +0800
  171. @@ -0,0 +1,142 @@
  172. +/*
  173. + * ARMv8 Cryptography Extensions -- Optimized code for AES and GCM
  174. + *
  175. + * Copyright (C) 2006-2017, ARM Limited, All Rights Reserved
  176. + * SPDX-License-Identifier: Apache-2.0
  177. + *
  178. + * Licensed under the Apache License, Version 2.0 (the "License"); you may
  179. + * not use this file except in compliance with the License.
  180. + * You may obtain a copy of the License at
  181. + *
  182. + * http://www.apache.org/licenses/LICENSE-2.0
  183. + *
  184. + * Unless required by applicable law or agreed to in writing, software
  185. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  186. + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  187. + * See the License for the specific language governing permissions and
  188. + * limitations under the License.
  189. + *
  190. + * This file is part of mbed TLS (https://tls.mbed.org)
  191. + */
  192. +
  193. +#if !defined(MBEDTLS_CONFIG_FILE)
  194. +#include "mbedtls/config.h"
  195. +#else
  196. +#include MBEDTLS_CONFIG_FILE
  197. +#endif
  198. +
  199. +#if defined(MBEDTLS_ARMV8CE_AES_C)
  200. +
  201. +#include <arm_neon.h>
  202. +#include "mbedtls/armv8ce_aes.h"
  203. +
  204. +#ifndef asm
  205. +#define asm __asm
  206. +#endif
  207. +
  208. +/*
  209. + * [Armv8 Cryptography Extensions] AES-ECB block en(de)cryption
  210. + */
  211. +
  212. +#if defined(MBEDTLS_AES_C)
  213. +
  214. +int mbedtls_armv8ce_aes_crypt_ecb( mbedtls_aes_context *ctx,
  215. + int mode,
  216. + const unsigned char input[16],
  217. + unsigned char output[16] )
  218. +{
  219. + unsigned int i;
  220. + const uint8_t *rk;
  221. + uint8x16_t x, k;
  222. +
  223. + x = vld1q_u8( input ); /* input block */
  224. + rk = (const uint8_t *) ctx->rk; /* round keys */
  225. +
  226. + if( mode == MBEDTLS_AES_ENCRYPT )
  227. + {
  228. + for( i = ctx->nr - 1; i != 0; i-- ) /* encryption loop */
  229. + {
  230. + k = vld1q_u8( rk );
  231. + rk += 16;
  232. + x = vaeseq_u8( x, k );
  233. + x = vaesmcq_u8( x );
  234. + }
  235. + k = vld1q_u8( rk );
  236. + rk += 16;
  237. + x = vaeseq_u8( x, k );
  238. + }
  239. + else
  240. + {
  241. + for( i = ctx->nr - 1; i != 0 ; i-- ) /* decryption loop */
  242. + {
  243. + k = vld1q_u8( rk );
  244. + rk += 16;
  245. + x = vaesdq_u8( x, k );
  246. + x = vaesimcq_u8( x );
  247. + }
  248. + k = vld1q_u8( rk );
  249. + rk += 16;
  250. + x = vaesdq_u8( x, k );
  251. + }
  252. +
  253. + k = vld1q_u8( rk ); /* final key just XORed */
  254. + x = veorq_u8( x, k );
  255. + vst1q_u8( output, x ); /* write out */
  256. +
  257. + return ( 0 );
  258. +}
  259. +
  260. +#endif /* MBEDTLS_AES_C */
  261. +
  262. +
  263. +/*
  264. + * [Armv8 Cryptography Extensions] Multiply in GF(2^128) for GCM
  265. + */
  266. +
  267. +#if defined(MBEDTLS_GCM_C)
  268. +
  269. +void mbedtls_armv8ce_gcm_mult( unsigned char c[16],
  270. + const unsigned char a[16],
  271. + const unsigned char b[16] )
  272. +{
  273. + /* GCM's GF(2^128) polynomial basis is x^128 + x^7 + x^2 + x + 1 */
  274. + const uint64x2_t base = { 0, 0x86 }; /* note missing LS bit */
  275. +
  276. + register uint8x16_t vc asm( "v0" ); /* named registers */
  277. + register uint8x16_t va asm( "v1" ); /* (to avoid conflict) */
  278. + register uint8x16_t vb asm( "v2" );
  279. + register uint64x2_t vp asm( "v3" );
  280. +
  281. + va = vld1q_u8( a ); /* load inputs */
  282. + vb = vld1q_u8( b );
  283. + vp = base;
  284. +
  285. + asm (
  286. + "rbit %1.16b, %1.16b \n\t" /* reverse bit order */
  287. + "rbit %2.16b, %2.16b \n\t"
  288. + "pmull2 %0.1q, %1.2d, %2.2d \n\t" /* v0 = a.hi * b.hi */
  289. + "pmull2 v4.1q, %0.2d, %3.2d \n\t" /* mul v0 by x^64, reduce */
  290. + "ext %0.16b, %0.16b, %0.16b, #8 \n\t"
  291. + "eor %0.16b, %0.16b, v4.16b \n\t"
  292. + "ext v5.16b, %2.16b, %2.16b, #8 \n\t" /* (swap hi and lo in b) */
  293. + "pmull v4.1q, %1.1d, v5.1d \n\t" /* v0 ^= a.lo * b.hi */
  294. + "eor %0.16b, %0.16b, v4.16b \n\t"
  295. + "pmull2 v4.1q, %1.2d, v5.2d \n\t" /* v0 ^= a.hi * b.lo */
  296. + "eor %0.16b, %0.16b, v4.16b \n\t"
  297. + "pmull2 v4.1q, %0.2d, %3.2d \n\t" /* mul v0 by x^64, reduce */
  298. + "ext %0.16b, %0.16b, %0.16b, #8 \n\t"
  299. + "eor %0.16b, %0.16b, v4.16b \n\t"
  300. + "pmull v4.1q, %1.1d, %2.1d \n\t" /* v0 ^= a.lo * b.lo */
  301. + "eor %0.16b, %0.16b, v4.16b \n\t"
  302. + "rbit %0.16b, %0.16b \n\t" /* reverse bits for output */
  303. + : "=w" (vc) /* q0: output */
  304. + : "w" (va), "w" (vb), "w" (vp) /* q1, q2: input */
  305. + : "v4", "v5" /* q4, q5: clobbered */
  306. + );
  307. +
  308. + vst1q_u8( c, vc ); /* write out */
  309. +}
  310. +
  311. +#endif /* MBEDTLS_GCM_C */
  312. +
  313. +#endif /* MBEDTLS_ARMV8CE_AES_C */
  314. diff -ruNa --binary a/library/CMakeLists.txt b/library/CMakeLists.txt
  315. --- a/library/CMakeLists.txt 2020-12-10 20:54:15.000000000 +0800
  316. +++ b/library/CMakeLists.txt 2021-03-07 15:06:45.625543309 +0800
  317. @@ -7,6 +7,7 @@
  318. aesni.c
  319. arc4.c
  320. aria.c
  321. + armv8ce_aes.c
  322. asn1parse.c
  323. asn1write.c
  324. base64.c
  325. diff -ruNa --binary a/library/gcm.c b/library/gcm.c
  326. --- a/library/gcm.c 2020-12-10 20:54:15.000000000 +0800
  327. +++ b/library/gcm.c 2021-03-07 15:06:45.625543309 +0800
  328. @@ -71,6 +71,10 @@
  329. #include "mbedtls/aesni.h"
  330. #endif
  331. +#if defined(MBEDTLS_ARMV8CE_AES_C)
  332. +#include "mbedtls/armv8ce_aes.h"
  333. +#endif
  334. +
  335. #if defined(MBEDTLS_SELF_TEST) && defined(MBEDTLS_AES_C)
  336. #include "mbedtls/aes.h"
  337. #include "mbedtls/platform.h"
  338. @@ -140,6 +144,12 @@
  339. if( ( ret = mbedtls_cipher_update( &ctx->cipher_ctx, h, 16, h, &olen ) ) != 0 )
  340. return( ret );
  341. +#if defined(MBEDTLS_ARMV8CE_AES_C)
  342. + // we don't do feature testing with ARMv8 cryptography extensions
  343. + memcpy( ctx ->HL, h, 16 ); // put H at the beginning of buffer
  344. + return( 0 ); // that's all we need
  345. +#endif
  346. +
  347. /* pack h as two 64-bits ints, big-endian */
  348. GET_UINT32_BE( hi, h, 0 );
  349. GET_UINT32_BE( lo, h, 4 );
  350. @@ -248,6 +258,11 @@
  351. unsigned char lo, hi, rem;
  352. uint64_t zh, zl;
  353. +#if defined(MBEDTLS_ARMV8CE_AES_C)
  354. + mbedtls_armv8ce_gcm_mult( output, x, (const unsigned char *) ctx->HL );
  355. + return;
  356. +#endif
  357. +
  358. #if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
  359. if( mbedtls_aesni_has_support( MBEDTLS_AESNI_CLMUL ) ) {
  360. unsigned char h[16];
  361. diff -ruNa --binary a/library/Makefile b/library/Makefile
  362. --- a/library/Makefile 2020-12-10 20:54:15.000000000 +0800
  363. +++ b/library/Makefile 2021-03-07 15:12:49.277078224 +0800
  364. @@ -65,6 +65,7 @@
  365. OBJS_CRYPTO= aes.o aesni.o arc4.o \
  366. aria.o asn1parse.o asn1write.o \
  367. + armv8ce_aes.o \
  368. base64.o bignum.o blowfish.o \
  369. camellia.o ccm.o chacha20.o \
  370. chachapoly.o cipher.o cipher_wrap.o \
  371. diff -ruNa --binary a/library/version_features.c b/library/version_features.c
  372. --- a/library/version_features.c 2020-12-10 20:54:15.000000000 +0800
  373. +++ b/library/version_features.c 2021-03-07 15:06:45.625543309 +0800
  374. @@ -583,6 +583,9 @@
  375. #if defined(MBEDTLS_AESNI_C)
  376. "MBEDTLS_AESNI_C",
  377. #endif /* MBEDTLS_AESNI_C */
  378. +#if defined(MBEDTLS_ARMV8CE_AES_C)
  379. + "MBEDTLS_ARMV8CE_AES_C",
  380. +#endif /* MBEDTLS_ARMV8CE_AES_C */
  381. #if defined(MBEDTLS_AES_C)
  382. "MBEDTLS_AES_C",
  383. #endif /* MBEDTLS_AES_C */