|  | @@ -1,247 +1,4 @@
 | 
	
		
			
				|  |  |  /*
 | 
	
		
			
				|  |  | - * sshaes.c - implementation of AES
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#include <assert.h>
 | 
	
		
			
				|  |  | -#include <stdlib.h>
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#include "ssh.h"
 | 
	
		
			
				|  |  | -#include "mpint_i.h"               /* we reuse the BignumInt system */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Start by deciding whether we can support hardware AES at all.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -#define HW_AES_NONE 0
 | 
	
		
			
				|  |  | -#define HW_AES_NI 1
 | 
	
		
			
				|  |  | -#define HW_AES_NEON 2
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#ifdef _FORCE_AES_NI
 | 
	
		
			
				|  |  | -#   define HW_AES HW_AES_NI
 | 
	
		
			
				|  |  | -#elif defined(__clang__)
 | 
	
		
			
				|  |  | -#   if __has_attribute(target) && __has_include(<wmmintrin.h>) &&       \
 | 
	
		
			
				|  |  | -    (defined(__x86_64__) || defined(__i386))
 | 
	
		
			
				|  |  | -#       define HW_AES HW_AES_NI
 | 
	
		
			
				|  |  | -#   endif
 | 
	
		
			
				|  |  | -#elif defined(__GNUC__)
 | 
	
		
			
				|  |  | -#    if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) && \
 | 
	
		
			
				|  |  | -    (defined(__x86_64__) || defined(__i386))
 | 
	
		
			
				|  |  | -#       define HW_AES HW_AES_NI
 | 
	
		
			
				|  |  | -#    endif
 | 
	
		
			
				|  |  | -#elif defined (_MSC_VER)
 | 
	
		
			
				|  |  | -#   if (defined(_M_X64) || defined(_M_IX86)) && _MSC_FULL_VER >= 150030729
 | 
	
		
			
				|  |  | -#      define HW_AES HW_AES_NI
 | 
	
		
			
				|  |  | -#   endif
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#ifdef _FORCE_AES_NEON
 | 
	
		
			
				|  |  | -#   define HW_AES HW_AES_NEON
 | 
	
		
			
				|  |  | -#elif defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 | 
	
		
			
				|  |  | -    /* Arm can potentially support both endiannesses, but this code
 | 
	
		
			
				|  |  | -     * hasn't been tested on anything but little. If anyone wants to
 | 
	
		
			
				|  |  | -     * run big-endian, they'll need to fix it first. */
 | 
	
		
			
				|  |  | -#elif defined __ARM_FEATURE_CRYPTO
 | 
	
		
			
				|  |  | -    /* If the Arm crypto extension is available already, we can
 | 
	
		
			
				|  |  | -     * support NEON AES without having to enable anything by hand */
 | 
	
		
			
				|  |  | -#   define HW_AES HW_AES_NEON
 | 
	
		
			
				|  |  | -#elif defined(__clang__)
 | 
	
		
			
				|  |  | -#   if __has_attribute(target) && __has_include(<arm_neon.h>) &&       \
 | 
	
		
			
				|  |  | -    (defined(__aarch64__))
 | 
	
		
			
				|  |  | -        /* clang can enable the crypto extension in AArch64 using
 | 
	
		
			
				|  |  | -         * __attribute__((target)) */
 | 
	
		
			
				|  |  | -#       define HW_AES HW_AES_NEON
 | 
	
		
			
				|  |  | -#       define USE_CLANG_ATTR_TARGET_AARCH64
 | 
	
		
			
				|  |  | -#   endif
 | 
	
		
			
				|  |  | -#elif defined _MSC_VER
 | 
	
		
			
				|  |  | -#   if defined _M_ARM64
 | 
	
		
			
				|  |  | -#       define HW_AES HW_AES_NEON
 | 
	
		
			
				|  |  | -        /* 64-bit Visual Studio uses the header <arm64_neon.h> in place
 | 
	
		
			
				|  |  | -         * of the standard <arm_neon.h> */
 | 
	
		
			
				|  |  | -#       define USE_ARM64_NEON_H
 | 
	
		
			
				|  |  | -#   elif defined _M_ARM
 | 
	
		
			
				|  |  | -#       define HW_AES HW_AES_NEON
 | 
	
		
			
				|  |  | -        /* 32-bit Visual Studio uses the right header name, but requires
 | 
	
		
			
				|  |  | -         * this #define to enable a set of intrinsic definitions that
 | 
	
		
			
				|  |  | -         * do not omit one of the parameters for vaes[ed]q_u8 */
 | 
	
		
			
				|  |  | -#       define _ARM_USE_NEW_NEON_INTRINSICS
 | 
	
		
			
				|  |  | -#   endif
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#if defined _FORCE_SOFTWARE_AES || !defined HW_AES
 | 
	
		
			
				|  |  | -#   undef HW_AES
 | 
	
		
			
				|  |  | -#   define HW_AES HW_AES_NONE
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#if HW_AES == HW_AES_NI
 | 
	
		
			
				|  |  | -#define HW_NAME_SUFFIX " (AES-NI accelerated)"
 | 
	
		
			
				|  |  | -#elif HW_AES == HW_AES_NEON
 | 
	
		
			
				|  |  | -#define HW_NAME_SUFFIX " (NEON accelerated)"
 | 
	
		
			
				|  |  | -#else
 | 
	
		
			
				|  |  | -#define HW_NAME_SUFFIX " (!NONEXISTENT ACCELERATED VERSION!)"
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Vtable collection for AES. For each SSH-level cipher id (i.e.
 | 
	
		
			
				|  |  | - * combination of key length and cipher mode), we provide three
 | 
	
		
			
				|  |  | - * vtables: one for the pure software implementation, one using
 | 
	
		
			
				|  |  | - * hardware acceleration (if available), and a top-level one which is
 | 
	
		
			
				|  |  | - * never actually instantiated, and only contains a new() method whose
 | 
	
		
			
				|  |  | - * job is to decide which of the other two to return an actual
 | 
	
		
			
				|  |  | - * instance of.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static ssh_cipher *aes_select(const ssh_cipheralg *alg);
 | 
	
		
			
				|  |  | -static ssh_cipher *aes_sw_new(const ssh_cipheralg *alg);
 | 
	
		
			
				|  |  | -static void aes_sw_free(ssh_cipher *);
 | 
	
		
			
				|  |  | -static void aes_sw_setiv_cbc(ssh_cipher *, const void *iv);
 | 
	
		
			
				|  |  | -static void aes_sw_setiv_sdctr(ssh_cipher *, const void *iv);
 | 
	
		
			
				|  |  | -static void aes_sw_setkey(ssh_cipher *, const void *key);
 | 
	
		
			
				|  |  | -static ssh_cipher *aes_hw_new(const ssh_cipheralg *alg);
 | 
	
		
			
				|  |  | -static void aes_hw_free(ssh_cipher *);
 | 
	
		
			
				|  |  | -static void aes_hw_setiv_cbc(ssh_cipher *, const void *iv);
 | 
	
		
			
				|  |  | -static void aes_hw_setiv_sdctr(ssh_cipher *, const void *iv);
 | 
	
		
			
				|  |  | -static void aes_hw_setkey(ssh_cipher *, const void *key);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -struct aes_extra {
 | 
	
		
			
				|  |  | -    const ssh_cipheralg *sw, *hw;
 | 
	
		
			
				|  |  | -};
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define VTABLES_INNER(cid, pid, bits, name, encsuffix,                  \
 | 
	
		
			
				|  |  | -                      decsuffix, setivsuffix, flagsval)                 \
 | 
	
		
			
				|  |  | -    static void cid##_sw##encsuffix(ssh_cipher *, void *blk, int len);  \
 | 
	
		
			
				|  |  | -    static void cid##_sw##decsuffix(ssh_cipher *, void *blk, int len);  \
 | 
	
		
			
				|  |  | -    const ssh_cipheralg ssh_##cid##_sw = {                              \
 | 
	
		
			
				|  |  | -        .new = aes_sw_new,                                              \
 | 
	
		
			
				|  |  | -        .free = aes_sw_free,                                            \
 | 
	
		
			
				|  |  | -        .setiv = aes_sw_##setivsuffix,                                  \
 | 
	
		
			
				|  |  | -        .setkey = aes_sw_setkey,                                        \
 | 
	
		
			
				|  |  | -        .encrypt = cid##_sw##encsuffix,                                 \
 | 
	
		
			
				|  |  | -        .decrypt = cid##_sw##decsuffix,                                 \
 | 
	
		
			
				|  |  | -        .ssh2_id = pid,                                                 \
 | 
	
		
			
				|  |  | -        .blksize = 16,                                                  \
 | 
	
		
			
				|  |  | -        .real_keybits = bits,                                           \
 | 
	
		
			
				|  |  | -        .padded_keybytes = bits/8,                                      \
 | 
	
		
			
				|  |  | -        .flags = flagsval,                                              \
 | 
	
		
			
				|  |  | -        .text_name = name " (unaccelerated)",                           \
 | 
	
		
			
				|  |  | -    };                                                                  \
 | 
	
		
			
				|  |  | -                                                                        \
 | 
	
		
			
				|  |  | -    static void cid##_hw##encsuffix(ssh_cipher *, void *blk, int len);  \
 | 
	
		
			
				|  |  | -    static void cid##_hw##decsuffix(ssh_cipher *, void *blk, int len);  \
 | 
	
		
			
				|  |  | -    const ssh_cipheralg ssh_##cid##_hw = {                              \
 | 
	
		
			
				|  |  | -        .new = aes_hw_new,                                              \
 | 
	
		
			
				|  |  | -        .free = aes_hw_free,                                            \
 | 
	
		
			
				|  |  | -        .setiv = aes_hw_##setivsuffix,                                  \
 | 
	
		
			
				|  |  | -        .setkey = aes_hw_setkey,                                        \
 | 
	
		
			
				|  |  | -        .encrypt = cid##_hw##encsuffix,                                 \
 | 
	
		
			
				|  |  | -        .decrypt = cid##_hw##decsuffix,                                 \
 | 
	
		
			
				|  |  | -        .ssh2_id = pid,                                                 \
 | 
	
		
			
				|  |  | -        .blksize = 16,                                                  \
 | 
	
		
			
				|  |  | -        .real_keybits = bits,                                           \
 | 
	
		
			
				|  |  | -        .padded_keybytes = bits/8,                                      \
 | 
	
		
			
				|  |  | -        .flags = flagsval,                                              \
 | 
	
		
			
				|  |  | -        .text_name = name HW_NAME_SUFFIX,                               \
 | 
	
		
			
				|  |  | -    };                                                                  \
 | 
	
		
			
				|  |  | -                                                                        \
 | 
	
		
			
				|  |  | -    static const struct aes_extra extra_##cid = {                       \
 | 
	
		
			
				|  |  | -        &ssh_##cid##_sw, &ssh_##cid##_hw };                             \
 | 
	
		
			
				|  |  | -                                                                        \
 | 
	
		
			
				|  |  | -    const ssh_cipheralg ssh_##cid = {                                   \
 | 
	
		
			
				|  |  | -        .new = aes_select,                                              \
 | 
	
		
			
				|  |  | -        .ssh2_id = pid,                                                 \
 | 
	
		
			
				|  |  | -        .blksize = 16,                                                  \
 | 
	
		
			
				|  |  | -        .real_keybits = bits,                                           \
 | 
	
		
			
				|  |  | -        .padded_keybytes = bits/8,                                      \
 | 
	
		
			
				|  |  | -        .flags = flagsval,                                              \
 | 
	
		
			
				|  |  | -        .text_name = name " (dummy selector vtable)",                   \
 | 
	
		
			
				|  |  | -        .extra = &extra_##cid                                           \
 | 
	
		
			
				|  |  | -    };                                                                  \
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define VTABLES(keylen)                                                 \
 | 
	
		
			
				|  |  | -    VTABLES_INNER(aes ## keylen ## _cbc, "aes" #keylen "-cbc",          \
 | 
	
		
			
				|  |  | -                  keylen, "AES-" #keylen " CBC", _encrypt, _decrypt,    \
 | 
	
		
			
				|  |  | -                  setiv_cbc, SSH_CIPHER_IS_CBC)                         \
 | 
	
		
			
				|  |  | -    VTABLES_INNER(aes ## keylen ## _sdctr, "aes" #keylen "-ctr",        \
 | 
	
		
			
				|  |  | -                  keylen, "AES-" #keylen " SDCTR",,, setiv_sdctr, 0)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -VTABLES(128)
 | 
	
		
			
				|  |  | -VTABLES(192)
 | 
	
		
			
				|  |  | -VTABLES(256)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static const ssh_cipheralg ssh_rijndael_lysator = {
 | 
	
		
			
				|  |  | -    /* Same as aes256_cbc, but with a different protocol ID */
 | 
	
		
			
				|  |  | -    .new = aes_select,
 | 
	
		
			
				|  |  | -    .ssh2_id = "[email protected]",
 | 
	
		
			
				|  |  | -    .blksize = 16,
 | 
	
		
			
				|  |  | -    .real_keybits = 256,
 | 
	
		
			
				|  |  | -    .padded_keybytes = 256/8,
 | 
	
		
			
				|  |  | -    .flags = 0,
 | 
	
		
			
				|  |  | -    .text_name = "AES-256 CBC (dummy selector vtable)",
 | 
	
		
			
				|  |  | -    .extra = &extra_aes256_cbc,
 | 
	
		
			
				|  |  | -};
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static const ssh_cipheralg *const aes_list[] = {
 | 
	
		
			
				|  |  | -    &ssh_aes256_sdctr,
 | 
	
		
			
				|  |  | -    &ssh_aes256_cbc,
 | 
	
		
			
				|  |  | -    &ssh_rijndael_lysator,
 | 
	
		
			
				|  |  | -    &ssh_aes192_sdctr,
 | 
	
		
			
				|  |  | -    &ssh_aes192_cbc,
 | 
	
		
			
				|  |  | -    &ssh_aes128_sdctr,
 | 
	
		
			
				|  |  | -    &ssh_aes128_cbc,
 | 
	
		
			
				|  |  | -};
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -const ssh2_ciphers ssh2_aes = { lenof(aes_list), aes_list };
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * The actual query function that asks if hardware acceleration is
 | 
	
		
			
				|  |  | - * available.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static bool aes_hw_available(void);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * The top-level selection function, caching the results of
 | 
	
		
			
				|  |  | - * aes_hw_available() so it only has to run once.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static bool aes_hw_available_cached(void)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    static bool initialised = false;
 | 
	
		
			
				|  |  | -    static bool hw_available;
 | 
	
		
			
				|  |  | -    if (!initialised) {
 | 
	
		
			
				|  |  | -        hw_available = aes_hw_available();
 | 
	
		
			
				|  |  | -        initialised = true;
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -    return hw_available;
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static ssh_cipher *aes_select(const ssh_cipheralg *alg)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    const struct aes_extra *extra = (const struct aes_extra *)alg->extra;
 | 
	
		
			
				|  |  | -    const ssh_cipheralg *real_alg =
 | 
	
		
			
				|  |  | -        aes_hw_available_cached() ? extra->hw : extra->sw;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    return ssh_cipher_new(real_alg);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/* ----------------------------------------------------------------------
 | 
	
		
			
				|  |  | - * Definitions likely to be helpful to multiple implementations.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define REP2(x) x x
 | 
	
		
			
				|  |  | -#define REP4(x) REP2(REP2(x))
 | 
	
		
			
				|  |  | -#define REP8(x) REP2(REP4(x))
 | 
	
		
			
				|  |  | -#define REP9(x) REP8(x) x
 | 
	
		
			
				|  |  | -#define REP11(x) REP8(x) REP2(x) x
 | 
	
		
			
				|  |  | -#define REP13(x) REP8(x) REP4(x) x
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static const uint8_t key_setup_round_constants[] = {
 | 
	
		
			
				|  |  | -    /* The first few powers of X in GF(2^8), used during key setup.
 | 
	
		
			
				|  |  | -     * This can safely be a lookup table without side channel risks,
 | 
	
		
			
				|  |  | -     * because key setup iterates through it once in a standard way
 | 
	
		
			
				|  |  | -     * regardless of the key. */
 | 
	
		
			
				|  |  | -    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
 | 
	
		
			
				|  |  | -};
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define MAXROUNDKEYS 15
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/* ----------------------------------------------------------------------
 | 
	
		
			
				|  |  |   * Software implementation of AES.
 | 
	
		
			
				|  |  |   *
 | 
	
		
			
				|  |  |   * This implementation uses a bit-sliced representation. Instead of
 | 
	
	
		
			
				|  | @@ -257,6 +14,16 @@ static const uint8_t key_setup_round_constants[] = {
 | 
	
		
			
				|  |  |   * ops you get 64 S-box lookups, not just one.
 | 
	
		
			
				|  |  |   */
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +#include "ssh.h"
 | 
	
		
			
				|  |  | +#include "aes.h"
 | 
	
		
			
				|  |  | +#include "mpint_i.h"               /* we reuse the BignumInt system */
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +static bool aes_sw_available(void)
 | 
	
		
			
				|  |  | +{
 | 
	
		
			
				|  |  | +    /* Software AES is always available */
 | 
	
		
			
				|  |  | +    return true;
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  #define SLICE_PARALLELISM (BIGNUM_INT_BYTES / 2)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  #ifdef BITSLICED_DEBUG
 | 
	
	
		
			
				|  | @@ -922,8 +689,8 @@ static void aes_sliced_key_setup(
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |              if (rotate_and_round_constant) {
 | 
	
		
			
				|  |  | -                assert(rconpos < lenof(key_setup_round_constants));
 | 
	
		
			
				|  |  | -                uint8_t rcon = key_setup_round_constants[rconpos++];
 | 
	
		
			
				|  |  | +                assert(rconpos < lenof(aes_key_setup_round_constants));
 | 
	
		
			
				|  |  | +                uint8_t rcon = aes_key_setup_round_constants[rconpos++];
 | 
	
		
			
				|  |  |                  for (size_t i = 0; i < 8; i++)
 | 
	
		
			
				|  |  |                      slices[i] ^= 1 & (rcon >> i);
 | 
	
		
			
				|  |  |              }
 | 
	
	
		
			
				|  | @@ -1255,13 +1022,13 @@ static inline void aes_sdctr_sw(
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  #define SW_ENC_DEC(len)                                 \
 | 
	
		
			
				|  |  | -    static void aes##len##_cbc_sw_encrypt(              \
 | 
	
		
			
				|  |  | +    static void aes##len##_sw_cbc_encrypt(              \
 | 
	
		
			
				|  |  |          ssh_cipher *ciph, void *vblk, int blklen)       \
 | 
	
		
			
				|  |  |      { aes_cbc_sw_encrypt(ciph, vblk, blklen); }         \
 | 
	
		
			
				|  |  | -    static void aes##len##_cbc_sw_decrypt(              \
 | 
	
		
			
				|  |  | +    static void aes##len##_sw_cbc_decrypt(              \
 | 
	
		
			
				|  |  |          ssh_cipher *ciph, void *vblk, int blklen)       \
 | 
	
		
			
				|  |  |      { aes_cbc_sw_decrypt(ciph, vblk, blklen); }         \
 | 
	
		
			
				|  |  | -    static void aes##len##_sdctr_sw(                    \
 | 
	
		
			
				|  |  | +    static void aes##len##_sw_sdctr(                    \
 | 
	
		
			
				|  |  |          ssh_cipher *ciph, void *vblk, int blklen)       \
 | 
	
		
			
				|  |  |      { aes_sdctr_sw(ciph, vblk, blklen); }
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -1269,645 +1036,5 @@ SW_ENC_DEC(128)
 | 
	
		
			
				|  |  |  SW_ENC_DEC(192)
 | 
	
		
			
				|  |  |  SW_ENC_DEC(256)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -/* ----------------------------------------------------------------------
 | 
	
		
			
				|  |  | - * Hardware-accelerated implementation of AES using x86 AES-NI.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#if HW_AES == HW_AES_NI
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Set target architecture for Clang and GCC
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -#if !defined(__clang__) && defined(__GNUC__)
 | 
	
		
			
				|  |  | -#    pragma GCC target("aes")
 | 
	
		
			
				|  |  | -#    pragma GCC target("sse4.1")
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))
 | 
	
		
			
				|  |  | -#    define FUNC_ISA __attribute__ ((target("sse4.1,aes")))
 | 
	
		
			
				|  |  | -#else
 | 
	
		
			
				|  |  | -#    define FUNC_ISA
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#include <wmmintrin.h>
 | 
	
		
			
				|  |  | -#include <smmintrin.h>
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#if defined(__clang__) || defined(__GNUC__)
 | 
	
		
			
				|  |  | -#include <cpuid.h>
 | 
	
		
			
				|  |  | -#define GET_CPU_ID(out) __cpuid(1, (out)[0], (out)[1], (out)[2], (out)[3])
 | 
	
		
			
				|  |  | -#else
 | 
	
		
			
				|  |  | -#define GET_CPU_ID(out) __cpuid(out, 1)
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -bool aes_hw_available(void)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * Determine if AES is available on this CPU, by checking that
 | 
	
		
			
				|  |  | -     * both AES itself and SSE4.1 are supported.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    unsigned int CPUInfo[4];
 | 
	
		
			
				|  |  | -    GET_CPU_ID(CPUInfo);
 | 
	
		
			
				|  |  | -    return (CPUInfo[2] & (1 << 25)) && (CPUInfo[2] & (1 << 19));
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Core AES-NI encrypt/decrypt functions, one per length and direction.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define NI_CIPHER(len, dir, dirlong, repmacro)                          \
 | 
	
		
			
				|  |  | -    static FUNC_ISA inline __m128i aes_ni_##len##_##dir(                \
 | 
	
		
			
				|  |  | -        __m128i v, const __m128i *keysched)                             \
 | 
	
		
			
				|  |  | -    {                                                                   \
 | 
	
		
			
				|  |  | -        v = _mm_xor_si128(v, *keysched++);                              \
 | 
	
		
			
				|  |  | -        repmacro(v = _mm_aes##dirlong##_si128(v, *keysched++););        \
 | 
	
		
			
				|  |  | -        return _mm_aes##dirlong##last_si128(v, *keysched);              \
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -NI_CIPHER(128, e, enc, REP9)
 | 
	
		
			
				|  |  | -NI_CIPHER(128, d, dec, REP9)
 | 
	
		
			
				|  |  | -NI_CIPHER(192, e, enc, REP11)
 | 
	
		
			
				|  |  | -NI_CIPHER(192, d, dec, REP11)
 | 
	
		
			
				|  |  | -NI_CIPHER(256, e, enc, REP13)
 | 
	
		
			
				|  |  | -NI_CIPHER(256, d, dec, REP13)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * The main key expansion.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static FUNC_ISA void aes_ni_key_expand(
 | 
	
		
			
				|  |  | -    const unsigned char *key, size_t key_words,
 | 
	
		
			
				|  |  | -    __m128i *keysched_e, __m128i *keysched_d)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    size_t rounds = key_words + 6;
 | 
	
		
			
				|  |  | -    size_t sched_words = (rounds + 1) * 4;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * Store the key schedule as 32-bit integers during expansion, so
 | 
	
		
			
				|  |  | -     * that it's easy to refer back to individual previous words. We
 | 
	
		
			
				|  |  | -     * collect them into the final __m128i form at the end.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    uint32_t sched[MAXROUNDKEYS * 4];
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    unsigned rconpos = 0;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (size_t i = 0; i < sched_words; i++) {
 | 
	
		
			
				|  |  | -        if (i < key_words) {
 | 
	
		
			
				|  |  | -            sched[i] = GET_32BIT_LSB_FIRST(key + 4 * i);
 | 
	
		
			
				|  |  | -        } else {
 | 
	
		
			
				|  |  | -            uint32_t temp = sched[i - 1];
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            bool rotate_and_round_constant = (i % key_words == 0);
 | 
	
		
			
				|  |  | -            bool only_sub = (key_words == 8 && i % 8 == 4);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            if (rotate_and_round_constant) {
 | 
	
		
			
				|  |  | -                __m128i v = _mm_setr_epi32(0,temp,0,0);
 | 
	
		
			
				|  |  | -                v = _mm_aeskeygenassist_si128(v, 0);
 | 
	
		
			
				|  |  | -                temp = _mm_extract_epi32(v, 1);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -                assert(rconpos < lenof(key_setup_round_constants));
 | 
	
		
			
				|  |  | -                temp ^= key_setup_round_constants[rconpos++];
 | 
	
		
			
				|  |  | -            } else if (only_sub) {
 | 
	
		
			
				|  |  | -                __m128i v = _mm_setr_epi32(0,temp,0,0);
 | 
	
		
			
				|  |  | -                v = _mm_aeskeygenassist_si128(v, 0);
 | 
	
		
			
				|  |  | -                temp = _mm_extract_epi32(v, 0);
 | 
	
		
			
				|  |  | -            }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            sched[i] = sched[i - key_words] ^ temp;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * Combine the key schedule words into __m128i vectors and store
 | 
	
		
			
				|  |  | -     * them in the output context.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    for (size_t round = 0; round <= rounds; round++)
 | 
	
		
			
				|  |  | -        keysched_e[round] = _mm_setr_epi32(
 | 
	
		
			
				|  |  | -            sched[4*round  ], sched[4*round+1],
 | 
	
		
			
				|  |  | -            sched[4*round+2], sched[4*round+3]);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    smemclr(sched, sizeof(sched));
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * Now prepare the modified keys for the inverse cipher.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    for (size_t eround = 0; eround <= rounds; eround++) {
 | 
	
		
			
				|  |  | -        size_t dround = rounds - eround;
 | 
	
		
			
				|  |  | -        __m128i rkey = keysched_e[eround];
 | 
	
		
			
				|  |  | -        if (eround && dround)      /* neither first nor last */
 | 
	
		
			
				|  |  | -            rkey = _mm_aesimc_si128(rkey);
 | 
	
		
			
				|  |  | -        keysched_d[dround] = rkey;
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Auxiliary routine to increment the 128-bit counter used in SDCTR
 | 
	
		
			
				|  |  | - * mode.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static FUNC_ISA inline __m128i aes_ni_sdctr_increment(__m128i v)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    const __m128i ONE  = _mm_setr_epi32(1,0,0,0);
 | 
	
		
			
				|  |  | -    const __m128i ZERO = _mm_setzero_si128();
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /* Increment the low-order 64 bits of v */
 | 
	
		
			
				|  |  | -    v  = _mm_add_epi64(v, ONE);
 | 
	
		
			
				|  |  | -    /* Check if they've become zero */
 | 
	
		
			
				|  |  | -    __m128i cmp = _mm_cmpeq_epi64(v, ZERO);
 | 
	
		
			
				|  |  | -    /* If so, the low half of cmp is all 1s. Pack that into the high
 | 
	
		
			
				|  |  | -     * half of addend with zero in the low half. */
 | 
	
		
			
				|  |  | -    __m128i addend = _mm_unpacklo_epi64(ZERO, cmp);
 | 
	
		
			
				|  |  | -    /* And subtract that from v, which increments the high 64 bits iff
 | 
	
		
			
				|  |  | -     * the low 64 wrapped round. */
 | 
	
		
			
				|  |  | -    v = _mm_sub_epi64(v, addend);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    return v;
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Auxiliary routine to reverse the byte order of a vector, so that
 | 
	
		
			
				|  |  | - * the SDCTR IV can be made big-endian for feeding to the cipher.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static FUNC_ISA inline __m128i aes_ni_sdctr_reverse(__m128i v)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    v = _mm_shuffle_epi8(
 | 
	
		
			
				|  |  | -        v, _mm_setr_epi8(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0));
 | 
	
		
			
				|  |  | -    return v;
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * The SSH interface and the cipher modes.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -typedef struct aes_ni_context aes_ni_context;
 | 
	
		
			
				|  |  | -struct aes_ni_context {
 | 
	
		
			
				|  |  | -    __m128i keysched_e[MAXROUNDKEYS], keysched_d[MAXROUNDKEYS], iv;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    void *pointer_to_free;
 | 
	
		
			
				|  |  | -    ssh_cipher ciph;
 | 
	
		
			
				|  |  | -};
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static ssh_cipher *aes_hw_new(const ssh_cipheralg *alg)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    if (!aes_hw_available_cached())
 | 
	
		
			
				|  |  | -        return NULL;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * The __m128i variables in the context structure need to be
 | 
	
		
			
				|  |  | -     * 16-byte aligned, but not all malloc implementations that this
 | 
	
		
			
				|  |  | -     * code has to work with will guarantee to return a 16-byte
 | 
	
		
			
				|  |  | -     * aligned pointer. So we over-allocate, manually realign the
 | 
	
		
			
				|  |  | -     * pointer ourselves, and store the original one inside the
 | 
	
		
			
				|  |  | -     * context so we know how to free it later.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    void *allocation = smalloc(sizeof(aes_ni_context) + 15);
 | 
	
		
			
				|  |  | -    uintptr_t alloc_address = (uintptr_t)allocation;
 | 
	
		
			
				|  |  | -    uintptr_t aligned_address = (alloc_address + 15) & ~15;
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = (aes_ni_context *)aligned_address;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    ctx->ciph.vt = alg;
 | 
	
		
			
				|  |  | -    ctx->pointer_to_free = allocation;
 | 
	
		
			
				|  |  | -    return &ctx->ciph;
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static void aes_hw_free(ssh_cipher *ciph)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = container_of(ciph, aes_ni_context, ciph);
 | 
	
		
			
				|  |  | -    void *allocation = ctx->pointer_to_free;
 | 
	
		
			
				|  |  | -    smemclr(ctx, sizeof(*ctx));
 | 
	
		
			
				|  |  | -    sfree(allocation);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static void aes_hw_setkey(ssh_cipher *ciph, const void *vkey)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = container_of(ciph, aes_ni_context, ciph);
 | 
	
		
			
				|  |  | -    const unsigned char *key = (const unsigned char *)vkey;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    aes_ni_key_expand(key, ctx->ciph.vt->real_keybits / 32,
 | 
	
		
			
				|  |  | -                      ctx->keysched_e, ctx->keysched_d);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA void aes_hw_setiv_cbc(ssh_cipher *ciph, const void *iv)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = container_of(ciph, aes_ni_context, ciph);
 | 
	
		
			
				|  |  | -    ctx->iv = _mm_loadu_si128(iv);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA void aes_hw_setiv_sdctr(ssh_cipher *ciph, const void *iv)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = container_of(ciph, aes_ni_context, ciph);
 | 
	
		
			
				|  |  | -    __m128i counter = _mm_loadu_si128(iv);
 | 
	
		
			
				|  |  | -    ctx->iv = aes_ni_sdctr_reverse(counter);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -typedef __m128i (*aes_ni_fn)(__m128i v, const __m128i *keysched);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA inline void aes_cbc_ni_encrypt(
 | 
	
		
			
				|  |  | -    ssh_cipher *ciph, void *vblk, int blklen, aes_ni_fn encrypt)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = container_of(ciph, aes_ni_context, ciph);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (uint8_t *blk = (uint8_t *)vblk, *finish = blk + blklen;
 | 
	
		
			
				|  |  | -         blk < finish; blk += 16) {
 | 
	
		
			
				|  |  | -        __m128i plaintext = _mm_loadu_si128((const __m128i *)blk);
 | 
	
		
			
				|  |  | -        __m128i cipher_input = _mm_xor_si128(plaintext, ctx->iv);
 | 
	
		
			
				|  |  | -        __m128i ciphertext = encrypt(cipher_input, ctx->keysched_e);
 | 
	
		
			
				|  |  | -        _mm_storeu_si128((__m128i *)blk, ciphertext);
 | 
	
		
			
				|  |  | -        ctx->iv = ciphertext;
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA inline void aes_cbc_ni_decrypt(
 | 
	
		
			
				|  |  | -    ssh_cipher *ciph, void *vblk, int blklen, aes_ni_fn decrypt)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = container_of(ciph, aes_ni_context, ciph);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (uint8_t *blk = (uint8_t *)vblk, *finish = blk + blklen;
 | 
	
		
			
				|  |  | -         blk < finish; blk += 16) {
 | 
	
		
			
				|  |  | -        __m128i ciphertext = _mm_loadu_si128((const __m128i *)blk);
 | 
	
		
			
				|  |  | -        __m128i decrypted = decrypt(ciphertext, ctx->keysched_d);
 | 
	
		
			
				|  |  | -        __m128i plaintext = _mm_xor_si128(decrypted, ctx->iv);
 | 
	
		
			
				|  |  | -        _mm_storeu_si128((__m128i *)blk, plaintext);
 | 
	
		
			
				|  |  | -        ctx->iv = ciphertext;
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA inline void aes_sdctr_ni(
 | 
	
		
			
				|  |  | -    ssh_cipher *ciph, void *vblk, int blklen, aes_ni_fn encrypt)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_ni_context *ctx = container_of(ciph, aes_ni_context, ciph);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (uint8_t *blk = (uint8_t *)vblk, *finish = blk + blklen;
 | 
	
		
			
				|  |  | -         blk < finish; blk += 16) {
 | 
	
		
			
				|  |  | -        __m128i counter = aes_ni_sdctr_reverse(ctx->iv);
 | 
	
		
			
				|  |  | -        __m128i keystream = encrypt(counter, ctx->keysched_e);
 | 
	
		
			
				|  |  | -        __m128i input = _mm_loadu_si128((const __m128i *)blk);
 | 
	
		
			
				|  |  | -        __m128i output = _mm_xor_si128(input, keystream);
 | 
	
		
			
				|  |  | -        _mm_storeu_si128((__m128i *)blk, output);
 | 
	
		
			
				|  |  | -        ctx->iv = aes_ni_sdctr_increment(ctx->iv);
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define NI_ENC_DEC(len)                                                 \
 | 
	
		
			
				|  |  | -    static FUNC_ISA void aes##len##_cbc_hw_encrypt(                     \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen)                       \
 | 
	
		
			
				|  |  | -    { aes_cbc_ni_encrypt(ciph, vblk, blklen, aes_ni_##len##_e); }       \
 | 
	
		
			
				|  |  | -    static FUNC_ISA void aes##len##_cbc_hw_decrypt(                     \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen)                       \
 | 
	
		
			
				|  |  | -    { aes_cbc_ni_decrypt(ciph, vblk, blklen, aes_ni_##len##_d); }       \
 | 
	
		
			
				|  |  | -    static FUNC_ISA void aes##len##_sdctr_hw(                           \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen)                       \
 | 
	
		
			
				|  |  | -    { aes_sdctr_ni(ciph, vblk, blklen, aes_ni_##len##_e); }             \
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -NI_ENC_DEC(128)
 | 
	
		
			
				|  |  | -NI_ENC_DEC(192)
 | 
	
		
			
				|  |  | -NI_ENC_DEC(256)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/* ----------------------------------------------------------------------
 | 
	
		
			
				|  |  | - * Hardware-accelerated implementation of AES using Arm NEON.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#elif HW_AES == HW_AES_NEON
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Manually set the target architecture, if we decided above that we
 | 
	
		
			
				|  |  | - * need to.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -#ifdef USE_CLANG_ATTR_TARGET_AARCH64
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * A spot of cheating: redefine some ACLE feature macros before
 | 
	
		
			
				|  |  | - * including arm_neon.h. Otherwise we won't get the AES intrinsics
 | 
	
		
			
				|  |  | - * defined by that header, because it will be looking at the settings
 | 
	
		
			
				|  |  | - * for the whole translation unit rather than the ones we're going to
 | 
	
		
			
				|  |  | - * put on some particular functions using __attribute__((target)).
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -#define __ARM_NEON 1
 | 
	
		
			
				|  |  | -#define __ARM_FEATURE_CRYPTO 1
 | 
	
		
			
				|  |  | -#define __ARM_FEATURE_AES 1
 | 
	
		
			
				|  |  | -#define FUNC_ISA __attribute__ ((target("neon,crypto")))
 | 
	
		
			
				|  |  | -#endif /* USE_CLANG_ATTR_TARGET_AARCH64 */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#ifndef FUNC_ISA
 | 
	
		
			
				|  |  | -#define FUNC_ISA
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#ifdef USE_ARM64_NEON_H
 | 
	
		
			
				|  |  | -#include <arm64_neon.h>
 | 
	
		
			
				|  |  | -#else
 | 
	
		
			
				|  |  | -#include <arm_neon.h>
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static bool aes_hw_available(void)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * For Arm, we delegate to a per-platform AES detection function,
 | 
	
		
			
				|  |  | -     * because it has to be implemented by asking the operating system
 | 
	
		
			
				|  |  | -     * rather than directly querying the CPU.
 | 
	
		
			
				|  |  | -     *
 | 
	
		
			
				|  |  | -     * That's because Arm systems commonly have multiple cores that
 | 
	
		
			
				|  |  | -     * are not all alike, so any method of querying whether NEON
 | 
	
		
			
				|  |  | -     * crypto instructions work on the _current_ CPU - even one as
 | 
	
		
			
				|  |  | -     * crude as just trying one and catching the SIGILL - wouldn't
 | 
	
		
			
				|  |  | -     * give an answer that you could still rely on the first time the
 | 
	
		
			
				|  |  | -     * OS migrated your process to another CPU.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    return platform_aes_hw_available();
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Core NEON encrypt/decrypt functions, one per length and direction.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define NEON_CIPHER(len, repmacro)                              \
 | 
	
		
			
				|  |  | -    static FUNC_ISA inline uint8x16_t aes_neon_##len##_e(       \
 | 
	
		
			
				|  |  | -        uint8x16_t v, const uint8x16_t *keysched)               \
 | 
	
		
			
				|  |  | -    {                                                           \
 | 
	
		
			
				|  |  | -        repmacro(v = vaesmcq_u8(vaeseq_u8(v, *keysched++)););   \
 | 
	
		
			
				|  |  | -        v = vaeseq_u8(v, *keysched++);                          \
 | 
	
		
			
				|  |  | -        return veorq_u8(v, *keysched);                          \
 | 
	
		
			
				|  |  | -    }                                                           \
 | 
	
		
			
				|  |  | -    static FUNC_ISA inline uint8x16_t aes_neon_##len##_d(       \
 | 
	
		
			
				|  |  | -        uint8x16_t v, const uint8x16_t *keysched)               \
 | 
	
		
			
				|  |  | -    {                                                           \
 | 
	
		
			
				|  |  | -        repmacro(v = vaesimcq_u8(vaesdq_u8(v, *keysched++)););  \
 | 
	
		
			
				|  |  | -        v = vaesdq_u8(v, *keysched++);                          \
 | 
	
		
			
				|  |  | -        return veorq_u8(v, *keysched);                          \
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -NEON_CIPHER(128, REP9)
 | 
	
		
			
				|  |  | -NEON_CIPHER(192, REP11)
 | 
	
		
			
				|  |  | -NEON_CIPHER(256, REP13)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * The main key expansion.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static FUNC_ISA void aes_neon_key_expand(
 | 
	
		
			
				|  |  | -    const unsigned char *key, size_t key_words,
 | 
	
		
			
				|  |  | -    uint8x16_t *keysched_e, uint8x16_t *keysched_d)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    size_t rounds = key_words + 6;
 | 
	
		
			
				|  |  | -    size_t sched_words = (rounds + 1) * 4;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * Store the key schedule as 32-bit integers during expansion, so
 | 
	
		
			
				|  |  | -     * that it's easy to refer back to individual previous words. We
 | 
	
		
			
				|  |  | -     * collect them into the final uint8x16_t form at the end.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    uint32_t sched[MAXROUNDKEYS * 4];
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    unsigned rconpos = 0;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (size_t i = 0; i < sched_words; i++) {
 | 
	
		
			
				|  |  | -        if (i < key_words) {
 | 
	
		
			
				|  |  | -            sched[i] = GET_32BIT_LSB_FIRST(key + 4 * i);
 | 
	
		
			
				|  |  | -        } else {
 | 
	
		
			
				|  |  | -            uint32_t temp = sched[i - 1];
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            bool rotate_and_round_constant = (i % key_words == 0);
 | 
	
		
			
				|  |  | -            bool sub = rotate_and_round_constant ||
 | 
	
		
			
				|  |  | -                (key_words == 8 && i % 8 == 4);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            if (rotate_and_round_constant)
 | 
	
		
			
				|  |  | -                temp = (temp << 24) | (temp >> 8);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            if (sub) {
 | 
	
		
			
				|  |  | -                uint32x4_t v32 = vdupq_n_u32(temp);
 | 
	
		
			
				|  |  | -                uint8x16_t v8 = vreinterpretq_u8_u32(v32);
 | 
	
		
			
				|  |  | -                v8 = vaeseq_u8(v8, vdupq_n_u8(0));
 | 
	
		
			
				|  |  | -                v32 = vreinterpretq_u32_u8(v8);
 | 
	
		
			
				|  |  | -                temp = vget_lane_u32(vget_low_u32(v32), 0);
 | 
	
		
			
				|  |  | -            }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            if (rotate_and_round_constant) {
 | 
	
		
			
				|  |  | -                assert(rconpos < lenof(key_setup_round_constants));
 | 
	
		
			
				|  |  | -                temp ^= key_setup_round_constants[rconpos++];
 | 
	
		
			
				|  |  | -            }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            sched[i] = sched[i - key_words] ^ temp;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * Combine the key schedule words into uint8x16_t vectors and
 | 
	
		
			
				|  |  | -     * store them in the output context.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    for (size_t round = 0; round <= rounds; round++)
 | 
	
		
			
				|  |  | -        keysched_e[round] = vreinterpretq_u8_u32(vld1q_u32(sched + 4*round));
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    smemclr(sched, sizeof(sched));
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /*
 | 
	
		
			
				|  |  | -     * Now prepare the modified keys for the inverse cipher.
 | 
	
		
			
				|  |  | -     */
 | 
	
		
			
				|  |  | -    for (size_t eround = 0; eround <= rounds; eround++) {
 | 
	
		
			
				|  |  | -        size_t dround = rounds - eround;
 | 
	
		
			
				|  |  | -        uint8x16_t rkey = keysched_e[eround];
 | 
	
		
			
				|  |  | -        if (eround && dround)      /* neither first nor last */
 | 
	
		
			
				|  |  | -            rkey = vaesimcq_u8(rkey);
 | 
	
		
			
				|  |  | -        keysched_d[dround] = rkey;
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Auxiliary routine to reverse the byte order of a vector, so that
 | 
	
		
			
				|  |  | - * the SDCTR IV can be made big-endian for feeding to the cipher.
 | 
	
		
			
				|  |  | - *
 | 
	
		
			
				|  |  | - * In fact we don't need to reverse the vector _all_ the way; we leave
 | 
	
		
			
				|  |  | - * the two lanes in MSW,LSW order, because that makes no difference to
 | 
	
		
			
				|  |  | - * the efficiency of the increment. That way we only have to reverse
 | 
	
		
			
				|  |  | - * bytes within each lane in this function.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static FUNC_ISA inline uint8x16_t aes_neon_sdctr_reverse(uint8x16_t v)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    return vrev64q_u8(v);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * Auxiliary routine to increment the 128-bit counter used in SDCTR
 | 
	
		
			
				|  |  | - * mode. There's no instruction to treat a 128-bit vector as a single
 | 
	
		
			
				|  |  | - * long integer, so instead we have to increment the bottom half
 | 
	
		
			
				|  |  | - * unconditionally, and the top half if the bottom half started off as
 | 
	
		
			
				|  |  | - * all 1s (in which case there was about to be a carry).
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -static FUNC_ISA inline uint8x16_t aes_neon_sdctr_increment(uint8x16_t in)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -#ifdef __aarch64__
 | 
	
		
			
				|  |  | -    /* There will be a carry if the low 64 bits are all 1s. */
 | 
	
		
			
				|  |  | -    uint64x1_t all1 = vcreate_u64(0xFFFFFFFFFFFFFFFF);
 | 
	
		
			
				|  |  | -    uint64x1_t carry = vceq_u64(vget_high_u64(vreinterpretq_u64_u8(in)), all1);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /* Make a word whose bottom half is unconditionally all 1s, and
 | 
	
		
			
				|  |  | -     * the top half is 'carry', i.e. all 0s most of the time but all
 | 
	
		
			
				|  |  | -     * 1s if we need to increment the top half. Then that word is what
 | 
	
		
			
				|  |  | -     * we need to _subtract_ from the input counter. */
 | 
	
		
			
				|  |  | -    uint64x2_t subtrahend = vcombine_u64(carry, all1);
 | 
	
		
			
				|  |  | -#else
 | 
	
		
			
				|  |  | -    /* AArch32 doesn't have comparisons that operate on a 64-bit lane,
 | 
	
		
			
				|  |  | -     * so we start by comparing each 32-bit half of the low 64 bits
 | 
	
		
			
				|  |  | -     * _separately_ to all-1s. */
 | 
	
		
			
				|  |  | -    uint32x2_t all1 = vdup_n_u32(0xFFFFFFFF);
 | 
	
		
			
				|  |  | -    uint32x2_t carry = vceq_u32(
 | 
	
		
			
				|  |  | -        vget_high_u32(vreinterpretq_u32_u8(in)), all1);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /* Swap the 32-bit words of the compare output, and AND with the
 | 
	
		
			
				|  |  | -     * unswapped version. Now carry is all 1s iff the bottom half of
 | 
	
		
			
				|  |  | -     * the input counter was all 1s, and all 0s otherwise. */
 | 
	
		
			
				|  |  | -    carry = vand_u32(carry, vrev64_u32(carry));
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    /* Now make the vector to subtract in the same way as above. */
 | 
	
		
			
				|  |  | -    uint64x2_t subtrahend = vreinterpretq_u64_u32(vcombine_u32(carry, all1));
 | 
	
		
			
				|  |  | -#endif
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    return vreinterpretq_u8_u64(
 | 
	
		
			
				|  |  | -        vsubq_u64(vreinterpretq_u64_u8(in), subtrahend));
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/*
 | 
	
		
			
				|  |  | - * The SSH interface and the cipher modes.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -typedef struct aes_neon_context aes_neon_context;
 | 
	
		
			
				|  |  | -struct aes_neon_context {
 | 
	
		
			
				|  |  | -    uint8x16_t keysched_e[MAXROUNDKEYS], keysched_d[MAXROUNDKEYS], iv;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    ssh_cipher ciph;
 | 
	
		
			
				|  |  | -};
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static ssh_cipher *aes_hw_new(const ssh_cipheralg *alg)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    if (!aes_hw_available_cached())
 | 
	
		
			
				|  |  | -        return NULL;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = snew(aes_neon_context);
 | 
	
		
			
				|  |  | -    ctx->ciph.vt = alg;
 | 
	
		
			
				|  |  | -    return &ctx->ciph;
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static void aes_hw_free(ssh_cipher *ciph)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = container_of(ciph, aes_neon_context, ciph);
 | 
	
		
			
				|  |  | -    smemclr(ctx, sizeof(*ctx));
 | 
	
		
			
				|  |  | -    sfree(ctx);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static void aes_hw_setkey(ssh_cipher *ciph, const void *vkey)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = container_of(ciph, aes_neon_context, ciph);
 | 
	
		
			
				|  |  | -    const unsigned char *key = (const unsigned char *)vkey;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    aes_neon_key_expand(key, ctx->ciph.vt->real_keybits / 32,
 | 
	
		
			
				|  |  | -                      ctx->keysched_e, ctx->keysched_d);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA void aes_hw_setiv_cbc(ssh_cipher *ciph, const void *iv)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = container_of(ciph, aes_neon_context, ciph);
 | 
	
		
			
				|  |  | -    ctx->iv = vld1q_u8(iv);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA void aes_hw_setiv_sdctr(ssh_cipher *ciph, const void *iv)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = container_of(ciph, aes_neon_context, ciph);
 | 
	
		
			
				|  |  | -    uint8x16_t counter = vld1q_u8(iv);
 | 
	
		
			
				|  |  | -    ctx->iv = aes_neon_sdctr_reverse(counter);
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -typedef uint8x16_t (*aes_neon_fn)(uint8x16_t v, const uint8x16_t *keysched);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA inline void aes_cbc_neon_encrypt(
 | 
	
		
			
				|  |  | -    ssh_cipher *ciph, void *vblk, int blklen, aes_neon_fn encrypt)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = container_of(ciph, aes_neon_context, ciph);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (uint8_t *blk = (uint8_t *)vblk, *finish = blk + blklen;
 | 
	
		
			
				|  |  | -         blk < finish; blk += 16) {
 | 
	
		
			
				|  |  | -        uint8x16_t plaintext = vld1q_u8(blk);
 | 
	
		
			
				|  |  | -        uint8x16_t cipher_input = veorq_u8(plaintext, ctx->iv);
 | 
	
		
			
				|  |  | -        uint8x16_t ciphertext = encrypt(cipher_input, ctx->keysched_e);
 | 
	
		
			
				|  |  | -        vst1q_u8(blk, ciphertext);
 | 
	
		
			
				|  |  | -        ctx->iv = ciphertext;
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA inline void aes_cbc_neon_decrypt(
 | 
	
		
			
				|  |  | -    ssh_cipher *ciph, void *vblk, int blklen, aes_neon_fn decrypt)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = container_of(ciph, aes_neon_context, ciph);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (uint8_t *blk = (uint8_t *)vblk, *finish = blk + blklen;
 | 
	
		
			
				|  |  | -         blk < finish; blk += 16) {
 | 
	
		
			
				|  |  | -        uint8x16_t ciphertext = vld1q_u8(blk);
 | 
	
		
			
				|  |  | -        uint8x16_t decrypted = decrypt(ciphertext, ctx->keysched_d);
 | 
	
		
			
				|  |  | -        uint8x16_t plaintext = veorq_u8(decrypted, ctx->iv);
 | 
	
		
			
				|  |  | -        vst1q_u8(blk, plaintext);
 | 
	
		
			
				|  |  | -        ctx->iv = ciphertext;
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static FUNC_ISA inline void aes_sdctr_neon(
 | 
	
		
			
				|  |  | -    ssh_cipher *ciph, void *vblk, int blklen, aes_neon_fn encrypt)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    aes_neon_context *ctx = container_of(ciph, aes_neon_context, ciph);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    for (uint8_t *blk = (uint8_t *)vblk, *finish = blk + blklen;
 | 
	
		
			
				|  |  | -         blk < finish; blk += 16) {
 | 
	
		
			
				|  |  | -        uint8x16_t counter = aes_neon_sdctr_reverse(ctx->iv);
 | 
	
		
			
				|  |  | -        uint8x16_t keystream = encrypt(counter, ctx->keysched_e);
 | 
	
		
			
				|  |  | -        uint8x16_t input = vld1q_u8(blk);
 | 
	
		
			
				|  |  | -        uint8x16_t output = veorq_u8(input, keystream);
 | 
	
		
			
				|  |  | -        vst1q_u8(blk, output);
 | 
	
		
			
				|  |  | -        ctx->iv = aes_neon_sdctr_increment(ctx->iv);
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define NEON_ENC_DEC(len)                                               \
 | 
	
		
			
				|  |  | -    static FUNC_ISA void aes##len##_cbc_hw_encrypt(                     \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen)                       \
 | 
	
		
			
				|  |  | -    { aes_cbc_neon_encrypt(ciph, vblk, blklen, aes_neon_##len##_e); }   \
 | 
	
		
			
				|  |  | -    static FUNC_ISA void aes##len##_cbc_hw_decrypt(                     \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen)                       \
 | 
	
		
			
				|  |  | -    { aes_cbc_neon_decrypt(ciph, vblk, blklen, aes_neon_##len##_d); }   \
 | 
	
		
			
				|  |  | -    static FUNC_ISA void aes##len##_sdctr_hw(                           \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen)                       \
 | 
	
		
			
				|  |  | -    { aes_sdctr_neon(ciph, vblk, blklen, aes_neon_##len##_e); }         \
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -NEON_ENC_DEC(128)
 | 
	
		
			
				|  |  | -NEON_ENC_DEC(192)
 | 
	
		
			
				|  |  | -NEON_ENC_DEC(256)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/* ----------------------------------------------------------------------
 | 
	
		
			
				|  |  | - * Stub functions if we have no hardware-accelerated AES. In this
 | 
	
		
			
				|  |  | - * case, aes_hw_new returns NULL (though it should also never be
 | 
	
		
			
				|  |  | - * selected by aes_select, so the only thing that should even be
 | 
	
		
			
				|  |  | - * _able_ to call it is testcrypt). As a result, the remaining vtable
 | 
	
		
			
				|  |  | - * functions should never be called at all.
 | 
	
		
			
				|  |  | - */
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#elif HW_AES == HW_AES_NONE
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -bool aes_hw_available(void)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    return false;
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static ssh_cipher *aes_hw_new(const ssh_cipheralg *alg)
 | 
	
		
			
				|  |  | -{
 | 
	
		
			
				|  |  | -    return NULL;
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#define STUB_BODY { unreachable("Should never be called"); }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -static void aes_hw_free(ssh_cipher *ciph) STUB_BODY
 | 
	
		
			
				|  |  | -static void aes_hw_setkey(ssh_cipher *ciph, const void *key) STUB_BODY
 | 
	
		
			
				|  |  | -static void aes_hw_setiv_cbc(ssh_cipher *ciph, const void *iv) STUB_BODY
 | 
	
		
			
				|  |  | -static void aes_hw_setiv_sdctr(ssh_cipher *ciph, const void *iv) STUB_BODY
 | 
	
		
			
				|  |  | -#define STUB_ENC_DEC(len)                                       \
 | 
	
		
			
				|  |  | -    static void aes##len##_cbc_hw_encrypt(                      \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen) STUB_BODY     \
 | 
	
		
			
				|  |  | -    static void aes##len##_cbc_hw_decrypt(                      \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen) STUB_BODY     \
 | 
	
		
			
				|  |  | -    static void aes##len##_sdctr_hw(                            \
 | 
	
		
			
				|  |  | -        ssh_cipher *ciph, void *vblk, int blklen) STUB_BODY
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -STUB_ENC_DEC(128)
 | 
	
		
			
				|  |  | -STUB_ENC_DEC(192)
 | 
	
		
			
				|  |  | -STUB_ENC_DEC(256)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -#endif /* HW_AES */
 | 
	
		
			
				|  |  | +AES_EXTRA(_sw);
 | 
	
		
			
				|  |  | +AES_ALL_VTABLES(_sw, "unaccelerated");
 |