6 年之前 · c1f64d6b16
--- a/source/putty/WINDOWS/winmiscs.c
+++ b/source/putty/WINDOWS/winmiscs.c
@@ -16,33 +16,6 @@ void smemclr(void *b, size_t n) {
 
				 }

			
 
				 #endif

			
 
				 

			
 
				-#ifdef DEBUG

			
 
				-static FILE *debug_fp = NULL;

			
 
				-static HANDLE debug_hdl = INVALID_HANDLE_VALUE;

			
 
				-static int debug_got_console = 0;

			
 
				-

			
 
				-void dputs(const char *buf)

			
 
				-{

			
 
				-    DWORD dw;

			
 
				-

			
 
				-    if (!debug_got_console) {

			
 
				-	if (AllocConsole()) {

			
 
				-	    debug_got_console = 1;

			
 
				-	    debug_hdl = GetStdHandle(STD_OUTPUT_HANDLE);

			
 
				-	}

			
 
				-    }

			
 
				-    if (!debug_fp) {

			
 
				-	debug_fp = fopen("debug.log", "w");

			
 
				-    }

			
 
				-

			
 
				-    if (debug_hdl != INVALID_HANDLE_VALUE) {

			
 
				-	WriteFile(debug_hdl, buf, strlen(buf), &dw, NULL);

			
 
				-    }

			
 
				-    fputs(buf, debug_fp);

			
 
				-    fflush(debug_fp);

			
 
				-}

			
 
				-#endif

			
 
				-

			
 
				 #ifdef MINEFIELD

			
 
				 /*

			
 
				  * Minefield - a Windows equivalent for Electric Fence

			
@@ -283,4 +256,14 @@ bool platform_aes_hw_available(void)
 
				     return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);

			
 
				 }

			
 
				 

			
 
				+bool platform_sha256_hw_available(void)

			
 
				+{

			
 
				+    return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);

			
 
				+}

			
 
				+

			
 
				+bool platform_sha1_hw_available(void)

			
 
				+{

			
 
				+    return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);

			
 
				+}

			
 
				+

			
 
				 #endif

			
--- a/source/putty/mpint.c
+++ b/source/putty/mpint.c
@@ -36,6 +36,7 @@ static inline BignumInt mp_word(mp_int *x, size_t i)
 
				 static mp_int *mp_make_sized(size_t nw)

			
 
				 {

			
 
				     mp_int *x = snew_plus(mp_int, nw * sizeof(BignumInt));

			
 
				+    assert(nw);                   /* we outlaw the zero-word mp_int */

			
 
				     x->nw = nw;

			
 
				     x->w = snew_plus_get_aux(x);

			
 
				     mp_clear(x);

			
@@ -140,8 +141,9 @@ void mp_cond_clear(mp_int *x, unsigned clear)
 
				  */

			
 
				 static mp_int *mp_from_bytes_int(ptrlen bytes, size_t m, size_t c)

			
 
				 {

			
 
				-    mp_int *n = mp_make_sized(

			
 
				-        (bytes.len + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES);

			
 
				+    size_t nw = (bytes.len + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES;

			
 
				+    nw = size_t_max(nw, 1);

			
 
				+    mp_int *n = mp_make_sized(nw);

			
 
				     for (size_t i = 0; i < bytes.len; i++)

			
 
				         n->w[i / BIGNUM_INT_BYTES] |=

			
 
				             (BignumInt)(((const unsigned char *)bytes.ptr)[m*i+c]) <<

			
@@ -184,7 +186,7 @@ mp_int *mp_from_decimal_pl(ptrlen decimal)
 
				     size_t words = bits / BIGNUM_INT_BITS + 1;

			
 
				 

			
 
				     mp_int *x = mp_make_sized(words);

			
 
				-    for (size_t i = 0;; i++) {

			
 
				+    for (size_t i = 0; i < decimal.len; i++) {

			
 
				         mp_add_integer_into(x, x, ((char *)decimal.ptr)[i] - '0');

			
 
				 

			
 
				         if (i+1 == decimal.len)

			
@@ -211,6 +213,7 @@ mp_int *mp_from_hex_pl(ptrlen hex)
 
				     assert(hex.len <= (~(size_t)0) / 4);

			
 
				     size_t bits = hex.len * 4;

			
 
				     size_t words = (bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;

			
 
				+    words = size_t_max(words, 1);

			
 
				     mp_int *x = mp_make_sized(words);

			
 
				     for (size_t nibble = 0; nibble < hex.len; nibble++) {

			
 
				         BignumInt digit = ((char *)hex.ptr)[hex.len-1 - nibble];

			
@@ -1077,7 +1080,8 @@ void mp_rshift_fixed_into(mp_int *r, mp_int *a, size_t bits)
 
				 mp_int *mp_rshift_fixed(mp_int *x, size_t bits)

			
 
				 {

			
 
				     size_t words = bits / BIGNUM_INT_BITS;

			
 
				-    mp_int *r = mp_make_sized(x->nw - size_t_min(x->nw, words));

			
 
				+    size_t nw = x->nw - size_t_min(x->nw, words);

			
 
				+    mp_int *r = mp_make_sized(size_t_max(nw, 1));

			
 
				     mp_rshift_fixed_into(r, x, bits);

			
 
				     return r;

			
 
				 }

			
@@ -1148,6 +1152,7 @@ mp_int *mp_invert_mod_2to(mp_int *x, size_t p)
 
				     assert(p > 0);

			
 
				 

			
 
				     size_t rw = (p + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS;

			
 
				+    rw = size_t_max(rw, 1);

			
 
				     mp_int *r = mp_make_sized(rw);

			
 
				 

			
 
				     size_t mul_scratchsize = mp_mul_scratchspace(2*rw, rw, rw);

			
@@ -2109,6 +2114,11 @@ void mp_min_into(mp_int *r, mp_int *x, mp_int *y)
 
				     mp_select_into(r, x, y, mp_cmp_hs(x, y));

			
 
				 }

			
 
				 

			
 
				+void mp_max_into(mp_int *r, mp_int *x, mp_int *y)

			
 
				+{

			
 
				+    mp_select_into(r, y, x, mp_cmp_hs(x, y));

			
 
				+}

			
 
				+

			
 
				 mp_int *mp_min(mp_int *x, mp_int *y)

			
 
				 {

			
 
				     mp_int *r = mp_make_sized(size_t_min(x->nw, y->nw));

			
@@ -2116,6 +2126,13 @@ mp_int *mp_min(mp_int *x, mp_int *y)
 
				     return r;

			
 
				 }

			
 
				 

			
 
				+mp_int *mp_max(mp_int *x, mp_int *y)

			
 
				+{

			
 
				+    mp_int *r = mp_make_sized(size_t_max(x->nw, y->nw));

			
 
				+    mp_max_into(r, x, y);

			
 
				+    return r;

			
 
				+}

			
 
				+

			
 
				 mp_int *mp_power_2(size_t power)

			
 
				 {

			
 
				     mp_int *x = mp_new(power + 1);

			
--- a/source/putty/mpint.h
+++ b/source/putty/mpint.h
@@ -152,10 +152,13 @@ unsigned mp_hs_integer(mp_int *x, uintmax_t n);
 
				 unsigned mp_eq_integer(mp_int *x, uintmax_t n);

			
 
				 

			
 
				 /*

			
 
				- * Take the minimum of two mp_ints, without using a conditional branch.

			
 
				+ * Take the minimum or maximum of two mp_ints, without using a

			
 
				+ * conditional branch.

			
 
				  */

			
 
				 void mp_min_into(mp_int *r, mp_int *x, mp_int *y);

			
 
				+void mp_max_into(mp_int *r, mp_int *x, mp_int *y);

			
 
				 mp_int *mp_min(mp_int *x, mp_int *y);

			
 
				+mp_int *mp_max(mp_int *x, mp_int *y);

			
 
				 

			
 
				 /*

			
 
				  * Diagnostic function. Writes out x in hex to the supplied stdio

			
--- a/source/putty/ssh.c
+++ b/source/putty/ssh.c
@@ -1013,7 +1013,8 @@ static void ssh_unthrottle(Backend *be, int bufsize)
 
				 {

			
 
				     Ssh *ssh = container_of(be, Ssh, backend);

			
 
				 

			
 
				-    ssh_stdout_unthrottle(ssh->cl, bufsize);

			
 
				+    if (ssh->cl)

			
 
				+        ssh_stdout_unthrottle(ssh->cl, bufsize);

			
 
				 }

			
 
				 

			
 
				 static bool ssh_connected(Backend *be)

			
--- a/source/putty/ssh.h
+++ b/source/putty/ssh.h
@@ -664,7 +664,9 @@ struct ssh_hashalg {
 
				     void (*free)(ssh_hash *);

			
 
				     int hlen; /* output length in bytes */

			
 
				     int blocklen; /* length of the hash's input block, or 0 for N/A */

			
 
				-    const char *text_name;

			
 
				+    const char *text_basename;     /* the semantic name of the hash */

			
 
				+    const char *annotation;   /* extra info, e.g. which of multiple impls */

			
 
				+    const char *text_name;    /* both combined, e.g. "SHA-n (unaccelerated)" */

			
 
				 };

			
 
				 

			
 
				 #define ssh_hash_new(alg) ((alg)->new(alg))

			
@@ -673,6 +675,12 @@ struct ssh_hashalg {
 
				 #define ssh_hash_free(ctx) ((ctx)->vt->free(ctx))

			
 
				 #define ssh_hash_alg(ctx) ((ctx)->vt)

			
 
				 

			
 
				+/* Handy macros for defining all those text-name fields at once */

			
 
				+#define HASHALG_NAMES_BARE(base) \

			
 
				+    base, NULL, base

			
 
				+#define HASHALG_NAMES_ANNOTATED(base, annotation) \

			
 
				+    base, annotation, base " (" annotation ")"

			
 
				+

			
 
				 void hash_simple(const ssh_hashalg *alg, ptrlen data, void *output);

			
 
				 

			
 
				 struct ssh_kex {

			
@@ -856,6 +864,8 @@ extern const ssh_compression_alg ssh_zlib;
 
				  * platform subdirectory.

			
 
				  */

			
 
				 bool platform_aes_hw_available(void);

			
 
				+bool platform_sha256_hw_available(void);

			
 
				+bool platform_sha1_hw_available(void);

			
 
				 

			
 
				 /*

			
 
				  * PuTTY version number formatted as an SSH version string. 

			
--- a/source/putty/ssh2bpp.c
+++ b/source/putty/ssh2bpp.c
@@ -514,7 +514,7 @@ static void ssh2_bpp_handle_input(BinaryPacketProtocol *bpp)
 
				 

			
 
				         s->length = s->payload + 5;

			
 
				 

			
 
				-        DTS_CONSUME(s->stats, in, s->packetlen);

			
 
				+        dts_consume(&s->stats->in, s->packetlen);

			
 
				 

			
 
				         s->pktin->sequence = s->in.sequence++;

			
 
				 

			
@@ -762,8 +762,7 @@ static void ssh2_bpp_format_packet_inner(struct ssh2_bpp_state *s, PktOut *pkt)
 
				 

			
 
				     s->out.sequence++;       /* whether or not we MACed */

			
 
				 

			
 
				-    DTS_CONSUME(s->stats, out, origlen + padding);

			
 
				-

			
 
				+    dts_consume(&s->stats->out, origlen + padding);

			
 
				 }

			
 
				 

			
 
				 static void ssh2_bpp_format_packet(struct ssh2_bpp_state *s, PktOut *pkt)

			
--- a/source/putty/ssh2kex-client.c
+++ b/source/putty/ssh2kex-client.c
@@ -94,7 +94,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
				             ppl_logevent("Doing Diffie-Hellman key exchange using %d-bit "

			
 
				                          "modulus and hash %s with a server-supplied group",

			
 
				                          dh_modulus_bit_size(s->dh_ctx),

			
 
				-                         s->kex_alg->hash->text_name);

			
 
				+                         ssh_hash_alg(s->exhash)->text_name);

			
 
				         } else {

			
 
				             s->ppl.bpp->pls->kctx = SSH2_PKTCTX_DHGROUP;

			
 
				             s->dh_ctx = dh_setup_group(s->kex_alg);

			
@@ -104,7 +104,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
				             ppl_logevent("Doing Diffie-Hellman key exchange using %d-bit "

			
 
				                          "modulus and hash %s with standard group \"%s\"",

			
 
				                          dh_modulus_bit_size(s->dh_ctx),

			
 
				-                         s->kex_alg->hash->text_name,

			
 
				+                         ssh_hash_alg(s->exhash)->text_name,

			
 
				                          s->kex_alg->groupname);

			
 
				         }

			
 
				 

			
@@ -180,7 +180,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
				 

			
 
				         ppl_logevent("Doing ECDH key exchange with curve %s and hash %s",

			
 
				                      ssh_ecdhkex_curve_textname(s->kex_alg),

			
 
				-                     s->kex_alg->hash->text_name);

			
 
				+                     ssh_hash_alg(s->exhash)->text_name);

			
 
				         s->ppl.bpp->pls->kctx = SSH2_PKTCTX_ECDHKEX;

			
 
				 

			
 
				         s->ecdh_key = ssh_ecdhkex_newkey(s->kex_alg);

			
@@ -314,7 +314,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
				         }

			
 
				 

			
 
				         ppl_logevent("Doing GSSAPI (with Kerberos V5) Diffie-Hellman key "

			
 
				-                     "exchange with hash %s", s->kex_alg->hash->text_name);

			
 
				+                     "exchange with hash %s", ssh_hash_alg(s->exhash)->text_name);

			
 
				         /* Now generate e for Diffie-Hellman. */

			
 
				         seat_set_busy_status(s->ppl.seat, BUSY_CPU);

			
 
				         s->e = dh_create_e(s->dh_ctx, s->nbits * 2);

			
@@ -513,7 +513,7 @@ void ssh2kex_coroutine(struct ssh2_transport_state *s, bool *aborted)
 
				 

			
 
				         assert(s->kex_alg->main_type == KEXTYPE_RSA);

			
 
				         ppl_logevent("Doing RSA key exchange with hash %s",

			
 
				-                     s->kex_alg->hash->text_name);

			
 
				+                     ssh_hash_alg(s->exhash)->text_name);

			
 
				         s->ppl.bpp->pls->kctx = SSH2_PKTCTX_RSAKEX;

			
 
				         /*

			
 
				          * RSA key exchange. First expect a KEXRSA_PUBKEY packet

			
--- a/source/putty/ssh2transport.c
+++ b/source/putty/ssh2transport.c
@@ -1261,8 +1261,7 @@ static void ssh2_transport_process_queue(PacketProtocolLayer *ppl)
 
				     pktout = ssh_bpp_new_pktout(s->ppl.bpp, SSH2_MSG_NEWKEYS);

			
 
				     pq_push(s->ppl.out_pq, pktout);

			
 
				     /* Start counting down the outgoing-data limit for these cipher keys. */

			
 
				-    s->stats->out.running = true;

			
 
				-    s->stats->out.remaining = s->max_data_size;

			
 
				+    dts_reset(&s->stats->out, s->max_data_size);

			
 
				 

			
 
				     /*

			
 
				      * Force the BPP to synchronously marshal all packets up to and

			
@@ -1324,8 +1323,7 @@ static void ssh2_transport_process_queue(PacketProtocolLayer *ppl)
 
				         return;

			
 
				     }

			
 
				     /* Start counting down the incoming-data limit for these cipher keys. */

			
 
				-    s->stats->in.running = true;

			
 
				-    s->stats->in.remaining = s->max_data_size;

			
 
				+    dts_reset(&s->stats->in, s->max_data_size);

			
 
				 

			
 
				     /*

			
 
				      * We've seen incoming NEWKEYS, so create and initialise

			
@@ -1490,10 +1488,10 @@ static void ssh2_transport_process_queue(PacketProtocolLayer *ppl)
 
				         if (!s->rekey_class) {

			
 
				             /* If we don't yet have any other reason to rekey, check

			
 
				              * if we've hit our data limit in either direction. */

			
 
				-            if (!s->stats->in.running) {

			
 
				+            if (s->stats->in.expired) {

			
 
				                 s->rekey_reason = "too much data received";

			
 
				                 s->rekey_class = RK_NORMAL;

			
 
				-            } else if (!s->stats->out.running) {

			
 
				+            } else if (s->stats->out.expired) {

			
 
				                 s->rekey_reason = "too much data sent";

			
 
				                 s->rekey_class = RK_NORMAL;

			
 
				             }

			
@@ -1511,9 +1509,8 @@ static void ssh2_transport_process_queue(PacketProtocolLayer *ppl)
 
				                              s->rekey_reason);

			
 
				                 /* Reset the counters, so that at least this message doesn't

			
 
				                  * hit the event log _too_ often. */

			
 
				-                s->stats->in.running = s->stats->out.running = true;

			
 
				-                s->stats->in.remaining = s->stats->out.remaining =

			
 
				-                    s->max_data_size;

			
 
				+                dts_reset(&s->stats->in, s->max_data_size);

			
 
				+                dts_reset(&s->stats->out, s->max_data_size);

			
 
				                 (void) ssh2_transport_timer_update(s, 0);

			
 
				                 s->rekey_class = RK_NONE;

			
 
				             } else {

			
@@ -1910,11 +1907,9 @@ static void ssh2_transport_reconfigure(PacketProtocolLayer *ppl, Conf *conf)
 
				         if (s->max_data_size < old_max_data_size) {

			
 
				             unsigned long diff = old_max_data_size - s->max_data_size;

			
 
				 

			
 
				-            /* We must decrement both counters, so avoid short-circuit

			
 
				-             * evaluation skipping one */

			
 
				-            bool out_expired = DTS_CONSUME(s->stats, out, diff);

			
 
				-            bool in_expired = DTS_CONSUME(s->stats, in, diff);

			
 
				-            if (out_expired || in_expired)

			
 
				+            dts_consume(&s->stats->out, diff);

			
 
				+            dts_consume(&s->stats->in, diff);

			
 
				+            if (s->stats->out.expired || s->stats->in.expired)

			
 
				                 rekey_reason = "data limit lowered";

			
 
				         } else {

			
 
				             unsigned long diff = s->max_data_size - old_max_data_size;

			
--- a/source/putty/sshaes.c
+++ b/source/putty/sshaes.c
@@ -369,6 +369,29 @@ static inline void dumpslices_BignumInt(
 
				  *

			
 
				  * Source: 'A new combinational logic minimization technique with

			
 
				  * applications to cryptology', https://eprint.iacr.org/2009/191

			
 
				+ *

			
 
				+ * As a minor speed optimisation, I use a modified version of the

			
 
				+ * S-box which omits the additive constant 0x63, i.e. this S-box

			
 
				+ * consists of only the field inversion and linear map components.

			
 
				+ * Instead, the addition of the constant is deferred until after the

			
 
				+ * subsequent ShiftRows and MixColumns stages, so that it happens at

			
 
				+ * the same time as adding the next round key - and then we just make

			
 
				+ * it _part_ of the round key, so it doesn't cost any extra

			
 
				+ * instructions to add.

			
 
				+ *

			
 
				+ * (Obviously adding a constant to each byte commutes with ShiftRows,

			
 
				+ * which only permutes the bytes. It also commutes with MixColumns:

			
 
				+ * that's not quite so obvious, but since the effect of MixColumns is

			
 
				+ * to multiply a constant polynomial M into each column, it is obvious

			
 
				+ * that adding some polynomial K and then multiplying by M is

			
 
				+ * equivalent to multiplying by M and then adding the product KM. And

			
 
				+ * in fact, since the coefficients of M happen to sum to 1, it turns

			
 
				+ * out that KM = K, so we don't even have to change the constant when

			
 
				+ * we move it to the far side of MixColumns.)

			
 
				+ *

			
 
				+ * Of course, one knock-on effect of this is that the use of the S-box

			
 
				+ * *during* key setup has to be corrected by manually adding on the

			
 
				+ * constant afterwards!

			
 
				  */

			
 
				 

			
 
				 /* Initial linear transformation for the forward S-box, from Fig 2 of

			
@@ -495,14 +518,14 @@ static inline void dumpslices_BignumInt(
 
				         uintN_t t65 = t61 ^ t62;                        \

			
 
				         uintN_t t66 = z1 ^ t63;                         \

			
 
				         output[7] = t59 ^ t63;                          \

			
 
				-        output[1] = ~(t56 ^ t62);                       \

			
 
				-        output[0] = ~(t48 ^ t60);                       \

			
 
				+        output[1] = t56 ^ t62;                          \

			
 
				+        output[0] = t48 ^ t60;                          \

			
 
				         uintN_t t67 = t64 ^ t65;                        \

			
 
				         output[4] = t53 ^ t66;                          \

			
 
				         output[3] = t51 ^ t66;                          \

			
 
				         output[2] = t47 ^ t65;                          \

			
 
				-        output[6] = ~(t64 ^ output[4]);                 \

			
 
				-        output[5] = ~(t55 ^ t67);                       \

			
 
				+        output[6] = t64 ^ output[4];                    \

			
 
				+        output[5] = t55 ^ t67;                          \

			
 
				         /* end */

			
 
				 

			
 
				 #define BITSLICED_SUBBYTES(output, input, uintN_t) do { \

			
@@ -520,23 +543,19 @@ static inline void dumpslices_BignumInt(
 
				  * S_box.

			
 
				  */

			
 
				 #define SBOX_BACKWARD_TOP_TRANSFORM(input, uintN_t)     \

			
 
				-    /* Initial subtraction of the constant */           \

			
 
				-    uintN_t iv0 = ~input[0], iv1 = ~input[1];           \

			
 
				-    uintN_t iv5 = ~input[5], iv6 = ~input[6];           \

			
 
				-                                                        \

			
 
				-    uintN_t y5 = input[4] ^ iv6;                        \

			
 
				-    uintN_t y19 = input[3] ^ iv0;                       \

			
 
				-    uintN_t itmp8 = y5 ^ iv0;                           \

			
 
				-    uintN_t y4 = itmp8 ^ iv1;                           \

			
 
				+    uintN_t y5 = input[4] ^ input[6];                   \

			
 
				+    uintN_t y19 = input[3] ^ input[0];                  \

			
 
				+    uintN_t itmp8 = y5 ^ input[0];                      \

			
 
				+    uintN_t y4 = itmp8 ^ input[1];                      \

			
 
				     uintN_t y9 = input[4] ^ input[3];                   \

			
 
				     uintN_t y2 = y9 ^ y4;                               \

			
 
				     uintN_t itmp9 = y2 ^ input[7];                      \

			
 
				-    uintN_t y1 = y9 ^ iv0;                              \

			
 
				+    uintN_t y1 = y9 ^ input[0];                         \

			
 
				     uintN_t y6 = y5 ^ input[7];                         \

			
 
				-    uintN_t y18 = y9 ^ iv5;                             \

			
 
				+    uintN_t y18 = y9 ^ input[5];                        \

			
 
				     uintN_t y7 = y18 ^ y2;                              \

			
 
				     uintN_t y16 = y7 ^ y1;                              \

			
 
				-    uintN_t y21 = y7 ^ iv1;                             \

			
 
				+    uintN_t y21 = y7 ^ input[1];                        \

			
 
				     uintN_t y3 = input[4] ^ input[7];                   \

			
 
				     uintN_t y13 = y16 ^ y21;                            \

			
 
				     uintN_t y8 = input[4] ^ y6;                         \

			
@@ -860,7 +879,15 @@ static void aes_sliced_key_setup(
 
				             }

			
 
				 

			
 
				             if (sub) {

			
 
				+                /* Apply the SubBytes transform to the key word. But

			
 
				+                 * here we need to apply the _full_ SubBytes from the

			
 
				+                 * spec, including the constant which our S-box leaves

			
 
				+                 * out. */

			
 
				                 BITSLICED_SUBBYTES(slices, slices, uint16_t);

			
 
				+                slices[0] ^= 0xFFFF;

			
 
				+                slices[1] ^= 0xFFFF;

			
 
				+                slices[5] ^= 0xFFFF;

			
 
				+                slices[6] ^= 0xFFFF;

			
 
				             }

			
 
				 

			
 
				             if (rotate_and_round_constant) {

			
@@ -893,6 +920,17 @@ static void aes_sliced_key_setup(
 
				     smemclr(inblk, sizeof(inblk));

			
 
				     smemclr(slices, sizeof(slices));

			
 
				 

			
 
				+    /*

			
 
				+     * Add the S-box constant to every round key after the first one,

			
 
				+     * compensating for it being left out in the main cipher.

			
 
				+     */

			
 
				+    for (size_t i = 8; i < 8 * (sched_words/4); i += 8) {

			
 
				+        sk->roundkeys_serial[i+0] ^= 0xFFFF;

			
 
				+        sk->roundkeys_serial[i+1] ^= 0xFFFF;

			
 
				+        sk->roundkeys_serial[i+5] ^= 0xFFFF;

			
 
				+        sk->roundkeys_serial[i+6] ^= 0xFFFF;

			
 
				+    }

			
 
				+

			
 
				     /*

			
 
				      * Replicate that set of round keys into larger integers for the

			
 
				      * parallel versions of the cipher.

			
--- a/source/putty/sshbpp.h
+++ b/source/putty/sshbpp.h
@@ -80,25 +80,47 @@ bool ssh2_bpp_check_unimplemented(BinaryPacketProtocol *bpp, PktIn *pktin);
 
				  * purposes of triggering an SSH-2 rekey when either one gets over a

			
 
				  * configured limit. In each direction, the flag 'running' indicates

			
 
				  * that we haven't hit the limit yet, and 'remaining' tracks how much

			
 
				- * longer until we do. The macro DTS_CONSUME subtracts a given amount

			
 
				- * from the counter in a particular direction, and evaluates to a

			
 
				- * boolean indicating whether the limit has been hit.

			
 
				+ * longer until we do. The function dts_consume() subtracts a given

			
 
				+ * amount from the counter in a particular direction, and sets

			
 
				+ * 'expired' if the limit has been hit.

			
 
				  *

			
 
				  * The limit is sticky: once 'running' has flipped to false,

			
 
				  * 'remaining' is no longer decremented, so it shouldn't dangerously

			
 
				  * wrap round.

			
 
				  */

			
 
				+struct DataTransferStatsDirection {

			
 
				+    bool running, expired;

			
 
				+    unsigned long remaining;

			
 
				+};

			
 
				 struct DataTransferStats {

			
 
				-    struct {

			
 
				-        bool running;

			
 
				-        unsigned long remaining;

			
 
				-    } in, out;

			
 
				+    struct DataTransferStatsDirection in, out;

			
 
				 };

			
 
				-#define DTS_CONSUME(stats, direction, size)             \

			
 
				-    ((stats)->direction.running &&                      \

			
 
				-     (stats)->direction.remaining <= (size) ?           \

			
 
				-     ((stats)->direction.running = false, true) :       \

			
 
				-     ((stats)->direction.remaining -= (size), false))

			
 
				+static inline void dts_consume(struct DataTransferStatsDirection *s,

			
 
				+                               unsigned long size_consumed)

			
 
				+{

			
 
				+    if (s->running) {

			
 
				+        if (s->remaining <= size_consumed) {

			
 
				+            s->running = false;

			
 
				+            s->expired = true;

			
 
				+        } else {

			
 
				+            s->remaining -= size_consumed;

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+static inline void dts_reset(struct DataTransferStatsDirection *s,

			
 
				+                             unsigned long starting_size)

			
 
				+{

			
 
				+    s->expired = false;

			
 
				+    s->remaining = starting_size;

			
 
				+    /*

			
 
				+     * The semantics of setting CONF_ssh_rekey_data to zero are to

			
 
				+     * disable data-volume based rekeying completely. So if the

			
 
				+     * starting size is actually zero, we don't set 'running' to true

			
 
				+     * in the first place, which means we won't ever set the expired

			
 
				+     * flag.

			
 
				+     */

			
 
				+    s->running = (starting_size != 0);

			
 
				+}

			
 
				 

			
 
				 BinaryPacketProtocol *ssh2_bpp_new(

			
 
				     LogContext *logctx, struct DataTransferStats *stats, bool is_server);

			
--- a/source/putty/sshdes.c
+++ b/source/putty/sshdes.c
@@ -61,6 +61,14 @@
 
				 #include "ssh.h"

			
 
				 #include "mpint_i.h"               /* we reuse the BignumInt system */

			
 
				 

			
 
				+/* If you compile with -DDES_DIAGNOSTICS, intermediate results will be

			
 
				+ * sent to debug() (so you also need to compile with -DDEBUG).

			
 
				+ * Otherwise this ifdef will condition away all the debug() calls. */

			
 
				+#ifndef DES_DIAGNOSTICS

			
 
				+#undef debug

			
 
				+#define debug(...) ((void)0)

			
 
				+#endif

			
 
				+

			
 
				 /*

			
 
				  * General utility functions.

			
 
				  */

			
--- a/source/putty/sshhmac.c
+++ b/source/putty/sshhmac.c
@@ -16,7 +16,7 @@ struct hmac {
 
				 

			
 
				 struct hmac_extra {

			
 
				     const ssh_hashalg *hashalg_base;

			
 
				-    const char *suffix;

			
 
				+    const char *suffix, *annotation;

			
 
				 };

			
 
				 

			
 
				 static ssh2_mac *hmac_new(const ssh2_macalg *alg, ssh_cipher *cipher)

			
@@ -44,8 +44,21 @@ static ssh2_mac *hmac_new(const ssh2_macalg *alg, ssh_cipher *cipher)
 
				     ctx->digest = snewn(ctx->hashalg->hlen, uint8_t);

			
 
				 

			
 
				     ctx->text_name = strbuf_new();

			
 
				-    strbuf_catf(ctx->text_name, "HMAC-%s%s",

			
 
				-                ctx->hashalg->text_name, extra->suffix);

			
 
				+    strbuf_catf(ctx->text_name, "HMAC-%s",

			
 
				+                ctx->hashalg->text_basename, extra->suffix);

			
 
				+    if (extra->annotation || ctx->hashalg->annotation) {

			
 
				+        strbuf_catf(ctx->text_name, " (");

			
 
				+        const char *sep = "";

			
 
				+        if (extra->annotation) {

			
 
				+            strbuf_catf(ctx->text_name, "%s%s", sep, extra->annotation);

			
 
				+            sep = ", ";

			
 
				+        }

			
 
				+        if (ctx->hashalg->annotation) {

			
 
				+            strbuf_catf(ctx->text_name, "%s%s", sep, ctx->hashalg->annotation);

			
 
				+            sep = ", ";

			
 
				+        }

			
 
				+        strbuf_catf(ctx->text_name, ")");

			
 
				+    }

			
 
				 

			
 
				     ctx->mac.vt = alg;

			
 
				     BinarySink_DELEGATE_INIT(&ctx->mac, ctx->h_live);

			
@@ -56,7 +69,6 @@ static ssh2_mac *hmac_new(const ssh2_macalg *alg, ssh_cipher *cipher)
 
				 static void hmac_free(ssh2_mac *mac)

			
 
				 {

			
 
				     struct hmac *ctx = container_of(mac, struct hmac, mac);

			
 
				-    const struct hmac_extra *extra = (const struct hmac_extra *)mac->vt->extra;

			
 
				 

			
 
				     ssh_hash_free(ctx->h_outer);

			
 
				     ssh_hash_free(ctx->h_inner);

			
@@ -75,7 +87,6 @@ static void hmac_free(ssh2_mac *mac)
 
				 static void hmac_key(ssh2_mac *mac, ptrlen key)

			
 
				 {

			
 
				     struct hmac *ctx = container_of(mac, struct hmac, mac);

			
 
				-    const struct hmac_extra *extra = (const struct hmac_extra *)mac->vt->extra;

			
 
				 

			
 
				     const uint8_t *kp;

			
 
				     size_t klen;

			
@@ -149,7 +160,6 @@ static void hmac_start(ssh2_mac *mac)
 
				 static void hmac_genresult(ssh2_mac *mac, unsigned char *output)

			
 
				 {

			
 
				     struct hmac *ctx = container_of(mac, struct hmac, mac);

			
 
				-    const struct hmac_extra *extra = (const struct hmac_extra *)mac->vt->extra;

			
 
				     ssh_hash *htmp;

			
 
				 

			
 
				     /* Leave h_live in place, so that the SSH-2 BPP can continue

			
--- a/source/putty/sshmd5.c
+++ b/source/putty/sshmd5.c
@@ -271,5 +271,5 @@ static void md5_final(ssh_hash *hash, unsigned char *output)
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_md5 = {

			
 
				-    md5_new, md5_copy, md5_final, md5_free, 16, 64, "MD5"

			
 
				+    md5_new, md5_copy, md5_final, md5_free, 16, 64, HASHALG_NAMES_BARE("MD5"),

			
 
				 };

			
--- a/source/putty/sshpubk.c
+++ b/source/putty/sshpubk.c
@@ -371,13 +371,14 @@ bool rsa_ssh1_savekey(const Filename *filename, RSAKey *key,
 
				      * Done. Write the result to the file.

			
 
				      */

			
 
				     fp = f_open(filename, "wb", true);

			
 
				+    bool ret = false;

			
 
				     if (fp) {

			
 
				-	bool ret = (fwrite(buf->u, 1, buf->len, fp) == (size_t) (buf->len));

			
 
				+        ret = (fwrite(buf->u, 1, buf->len, fp) == (size_t) (buf->len));

			
 
				         if (fclose(fp))

			
 
				             ret = false;

			
 
				-	return ret;

			
 
				-    } else

			
 
				-	return false;

			
 
				+    }

			
 
				+    strbuf_free(buf);

			
 
				+    return ret;

			
 
				 }

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
--- a/source/putty/sshrsa.c
+++ b/source/putty/sshrsa.c
@@ -328,9 +328,12 @@ bool rsa_verify(RSAKey *key)
 
				      * should instead flip them round into the canonical order of

			
 
				      * p > q. This also involves regenerating iqmp.

			
 
				      */

			
 
				-    unsigned swap_pq = mp_cmp_hs(key->q, key->p);

			
 
				-    mp_cond_swap(key->p, key->q, swap_pq);

			
 
				-    mp_free(key->iqmp);

			
 
				+    mp_int *p_new = mp_max(key->p, key->q);

			
 
				+    mp_int *q_new = mp_min(key->p, key->q);

			
 
				+    mp_free(key->p);

			
 
				+    mp_free(key->q);

			
 
				+    key->p = p_new;

			
 
				+    key->q = q_new;

			
 
				     key->iqmp = mp_invert(key->q, key->p);

			
 
				 

			
 
				     return ok;

			
--- a/source/putty/sshsh256.c
+++ b/source/putty/sshsh256.c
@@ -12,6 +12,7 @@
 
				  */

			
 
				 #define HW_SHA256_NONE 0

			
 
				 #define HW_SHA256_NI 1

			
 
				+#define HW_SHA256_NEON 2

			
 
				 

			
 
				 #ifdef _FORCE_SHA_NI

			
 
				 #   define HW_SHA256 HW_SHA256_NI

			
@@ -21,8 +22,7 @@
 
				 #       define HW_SHA256 HW_SHA256_NI

			
 
				 #   endif

			
 
				 #elif defined(__GNUC__)

			
 
				-#    if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) && \

			
 
				-    (defined(__x86_64__) || defined(__i386))

			
 
				+#    if (__GNUC__ >= 5) && (defined(__x86_64__) || defined(__i386))

			
 
				 #       define HW_SHA256 HW_SHA256_NI

			
 
				 #    endif

			
 
				 #elif defined (_MSC_VER)

			
@@ -31,6 +31,37 @@
 
				 #   endif

			
 
				 #endif

			
 
				 

			
 
				+#ifdef _FORCE_SHA_NEON

			
 
				+#   define HW_SHA256 HW_SHA256_NEON

			
 
				+#elif defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__

			
 
				+    /* Arm can potentially support both endiannesses, but this code

			
 
				+     * hasn't been tested on anything but little. If anyone wants to

			
 
				+     * run big-endian, they'll need to fix it first. */

			
 
				+#elif defined __ARM_FEATURE_CRYPTO

			
 
				+    /* If the Arm crypto extension is available already, we can

			
 
				+     * support NEON SHA without having to enable anything by hand */

			
 
				+#   define HW_SHA256 HW_SHA256_NEON

			
 
				+#elif defined(__clang__)

			
 
				+#   if __has_attribute(target) && __has_include(<arm_neon.h>) &&       \

			
 
				+    (defined(__aarch64__))

			
 
				+        /* clang can enable the crypto extension in AArch64 using

			
 
				+         * __attribute__((target)) */

			
 
				+#       define HW_SHA256 HW_SHA256_NEON

			
 
				+#       define USE_CLANG_ATTR_TARGET_AARCH64

			
 
				+#   endif

			
 
				+#elif defined _MSC_VER

			
 
				+    /* Visual Studio supports the crypto extension when targeting

			
 
				+     * AArch64, but as of VS2017, the AArch32 header doesn't quite

			
 
				+     * manage it (declaring the shae/shad intrinsics without a round

			
 
				+     * key operand). */

			
 
				+#   if defined _M_ARM64

			
 
				+#       define HW_SHA256 HW_SHA256_NEON

			
 
				+#       if defined _M_ARM64

			
 
				+#           define USE_ARM64_NEON_H /* unusual header name in this case */

			
 
				+#       endif

			
 
				+#   endif

			
 
				+#endif

			
 
				+

			
 
				 #if defined _FORCE_SOFTWARE_SHA || !defined HW_SHA256

			
 
				 #   undef HW_SHA256

			
 
				 #   define HW_SHA256 HW_SHA256_NONE

			
@@ -67,7 +98,7 @@ static ssh_hash *sha256_select(const ssh_hashalg *alg)
 
				 

			
 
				 const ssh_hashalg ssh_sha256 = {

			
 
				     sha256_select, NULL, NULL, NULL,

			
 
				-    32, 64, "SHA-256",

			
 
				+    32, 64, HASHALG_NAMES_ANNOTATED("SHA-256", "dummy selector vtable"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -295,7 +326,7 @@ static void sha256_sw_final(ssh_hash *hash, uint8_t *digest)
 
				 

			
 
				 const ssh_hashalg ssh_sha256_sw = {

			
 
				     sha256_sw_new, sha256_sw_copy, sha256_sw_final, sha256_sw_free,

			
 
				-    32, 64, "SHA-256",

			
 
				+    32, 64, HASHALG_NAMES_ANNOTATED("SHA-256", "unaccelerated"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -307,13 +338,12 @@ const ssh_hashalg ssh_sha256_sw = {
 
				 /*

			
 
				  * Set target architecture for Clang and GCC

			
 
				  */

			
 
				-#if !defined(__clang__) && defined(__GNUC__)

			
 
				+#if defined(__clang__) || defined(__GNUC__)

			
 
				+#    define FUNC_ISA __attribute__ ((target("sse4.1,sha")))

			
 
				+#if !defined(__clang__)

			
 
				 #    pragma GCC target("sha")

			
 
				 #    pragma GCC target("sse4.1")

			
 
				 #endif

			
 
				-

			
 
				-#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))

			
 
				-#    define FUNC_ISA __attribute__ ((target("sse4.1,sha")))

			
 
				 #else

			
 
				 #    define FUNC_ISA

			
 
				 #endif

			
@@ -652,7 +682,194 @@ FUNC_ISA static void sha256_ni_final(ssh_hash *hash, uint8_t *digest)
 
				 

			
 
				 const ssh_hashalg ssh_sha256_hw = {

			
 
				     sha256_ni_new, sha256_ni_copy, sha256_ni_final, sha256_ni_free,

			
 
				-    32, 64, "SHA-256",

			
 
				+    32, 64, HASHALG_NAMES_ANNOTATED("SHA-256", "SHA-NI accelerated"),

			
 
				+};

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * Hardware-accelerated implementation of SHA-256 using Arm NEON.

			
 
				+ */

			
 
				+

			
 
				+#elif HW_SHA256 == HW_SHA256_NEON

			
 
				+

			
 
				+/*

			
 
				+ * Manually set the target architecture, if we decided above that we

			
 
				+ * need to.

			
 
				+ */

			
 
				+#ifdef USE_CLANG_ATTR_TARGET_AARCH64

			
 
				+/*

			
 
				+ * A spot of cheating: redefine some ACLE feature macros before

			
 
				+ * including arm_neon.h. Otherwise we won't get the SHA intrinsics

			
 
				+ * defined by that header, because it will be looking at the settings

			
 
				+ * for the whole translation unit rather than the ones we're going to

			
 
				+ * put on some particular functions using __attribute__((target)).

			
 
				+ */

			
 
				+#define __ARM_NEON 1

			
 
				+#define __ARM_FEATURE_CRYPTO 1

			
 
				+#define FUNC_ISA __attribute__ ((target("neon,crypto")))

			
 
				+#endif /* USE_CLANG_ATTR_TARGET_AARCH64 */

			
 
				+

			
 
				+#ifndef FUNC_ISA

			
 
				+#define FUNC_ISA

			
 
				+#endif

			
 
				+

			
 
				+#ifdef USE_ARM64_NEON_H

			
 
				+#include <arm64_neon.h>

			
 
				+#else

			
 
				+#include <arm_neon.h>

			
 
				+#endif

			
 
				+

			
 
				+static bool sha256_hw_available(void)

			
 
				+{

			
 
				+    /*

			
 
				+     * For Arm, we delegate to a per-platform detection function (see

			
 
				+     * explanation in sshaes.c).

			
 
				+     */

			
 
				+    return platform_sha256_hw_available();

			
 
				+}

			
 
				+

			
 
				+typedef struct sha256_neon_core sha256_neon_core;

			
 
				+struct sha256_neon_core {

			
 
				+    uint32x4_t abcd, efgh;

			
 
				+};

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline uint32x4_t sha256_neon_load_input(const uint8_t *p)

			
 
				+{

			
 
				+    return vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(p)));

			
 
				+}

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline uint32x4_t sha256_neon_schedule_update(

			
 
				+    uint32x4_t m4, uint32x4_t m3, uint32x4_t m2, uint32x4_t m1)

			
 
				+{

			
 
				+    return vsha256su1q_u32(vsha256su0q_u32(m4, m3), m2, m1);

			
 
				+}

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline sha256_neon_core sha256_neon_round4(

			
 
				+    sha256_neon_core old, uint32x4_t sched, unsigned round)

			
 
				+{

			
 
				+    sha256_neon_core new;

			
 
				+

			
 
				+    uint32x4_t round_input = vaddq_u32(

			
 
				+        sched, vld1q_u32(sha256_round_constants + round));

			
 
				+    new.abcd = vsha256hq_u32 (old.abcd, old.efgh, round_input);

			
 
				+    new.efgh = vsha256h2q_u32(old.efgh, old.abcd, round_input);

			
 
				+    return new;

			
 
				+}

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline void sha256_neon_block(sha256_neon_core *core, const uint8_t *p)

			
 
				+{

			
 
				+    uint32x4_t s0, s1, s2, s3;

			
 
				+    sha256_neon_core cr = *core;

			
 
				+

			
 
				+    s0 = sha256_neon_load_input(p);

			
 
				+    cr = sha256_neon_round4(cr, s0, 0);

			
 
				+    s1 = sha256_neon_load_input(p+16);

			
 
				+    cr = sha256_neon_round4(cr, s1, 4);

			
 
				+    s2 = sha256_neon_load_input(p+32);

			
 
				+    cr = sha256_neon_round4(cr, s2, 8);

			
 
				+    s3 = sha256_neon_load_input(p+48);

			
 
				+    cr = sha256_neon_round4(cr, s3, 12);

			
 
				+    s0 = sha256_neon_schedule_update(s0, s1, s2, s3);

			
 
				+    cr = sha256_neon_round4(cr, s0, 16);

			
 
				+    s1 = sha256_neon_schedule_update(s1, s2, s3, s0);

			
 
				+    cr = sha256_neon_round4(cr, s1, 20);

			
 
				+    s2 = sha256_neon_schedule_update(s2, s3, s0, s1);

			
 
				+    cr = sha256_neon_round4(cr, s2, 24);

			
 
				+    s3 = sha256_neon_schedule_update(s3, s0, s1, s2);

			
 
				+    cr = sha256_neon_round4(cr, s3, 28);

			
 
				+    s0 = sha256_neon_schedule_update(s0, s1, s2, s3);

			
 
				+    cr = sha256_neon_round4(cr, s0, 32);

			
 
				+    s1 = sha256_neon_schedule_update(s1, s2, s3, s0);

			
 
				+    cr = sha256_neon_round4(cr, s1, 36);

			
 
				+    s2 = sha256_neon_schedule_update(s2, s3, s0, s1);

			
 
				+    cr = sha256_neon_round4(cr, s2, 40);

			
 
				+    s3 = sha256_neon_schedule_update(s3, s0, s1, s2);

			
 
				+    cr = sha256_neon_round4(cr, s3, 44);

			
 
				+    s0 = sha256_neon_schedule_update(s0, s1, s2, s3);

			
 
				+    cr = sha256_neon_round4(cr, s0, 48);

			
 
				+    s1 = sha256_neon_schedule_update(s1, s2, s3, s0);

			
 
				+    cr = sha256_neon_round4(cr, s1, 52);

			
 
				+    s2 = sha256_neon_schedule_update(s2, s3, s0, s1);

			
 
				+    cr = sha256_neon_round4(cr, s2, 56);

			
 
				+    s3 = sha256_neon_schedule_update(s3, s0, s1, s2);

			
 
				+    cr = sha256_neon_round4(cr, s3, 60);

			
 
				+

			
 
				+    core->abcd = vaddq_u32(core->abcd, cr.abcd);

			
 
				+    core->efgh = vaddq_u32(core->efgh, cr.efgh);

			
 
				+}

			
 
				+

			
 
				+typedef struct sha256_neon {

			
 
				+    sha256_neon_core core;

			
 
				+    sha256_block blk;

			
 
				+    BinarySink_IMPLEMENTATION;

			
 
				+    ssh_hash hash;

			
 
				+} sha256_neon;

			
 
				+

			
 
				+static void sha256_neon_write(BinarySink *bs, const void *vp, size_t len);

			
 
				+

			
 
				+static ssh_hash *sha256_neon_new(const ssh_hashalg *alg)

			
 
				+{

			
 
				+    if (!sha256_hw_available_cached())

			
 
				+        return NULL;

			
 
				+

			
 
				+    sha256_neon *s = snew(sha256_neon);

			
 
				+

			
 
				+    s->core.abcd = vld1q_u32(sha256_initial_state);

			
 
				+    s->core.efgh = vld1q_u32(sha256_initial_state + 4);

			
 
				+

			
 
				+    sha256_block_setup(&s->blk);

			
 
				+

			
 
				+    s->hash.vt = alg;

			
 
				+    BinarySink_INIT(s, sha256_neon_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+    return &s->hash;

			
 
				+}

			
 
				+

			
 
				+static ssh_hash *sha256_neon_copy(ssh_hash *hash)

			
 
				+{

			
 
				+    sha256_neon *s = container_of(hash, sha256_neon, hash);

			
 
				+    sha256_neon *copy = snew(sha256_neon);

			
 
				+

			
 
				+    *copy = *s; /* structure copy */

			
 
				+

			
 
				+    BinarySink_COPIED(copy);

			
 
				+    BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				+

			
 
				+    return &copy->hash;

			
 
				+}

			
 
				+

			
 
				+static void sha256_neon_free(ssh_hash *hash)

			
 
				+{

			
 
				+    sha256_neon *s = container_of(hash, sha256_neon, hash);

			
 
				+    smemclr(s, sizeof(*s));

			
 
				+    sfree(s);

			
 
				+}

			
 
				+

			
 
				+static void sha256_neon_write(BinarySink *bs, const void *vp, size_t len)

			
 
				+{

			
 
				+    sha256_neon *s = BinarySink_DOWNCAST(bs, sha256_neon);

			
 
				+

			
 
				+    while (len > 0)

			
 
				+        if (sha256_block_write(&s->blk, &vp, &len))

			
 
				+            sha256_neon_block(&s->core, s->blk.block);

			
 
				+}

			
 
				+

			
 
				+static void sha256_neon_final(ssh_hash *hash, uint8_t *digest)

			
 
				+{

			
 
				+    sha256_neon *s = container_of(hash, sha256_neon, hash);

			
 
				+

			
 
				+    sha256_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				+    vst1q_u8(digest,      vrev32q_u8(vreinterpretq_u8_u32(s->core.abcd)));

			
 
				+    vst1q_u8(digest + 16, vrev32q_u8(vreinterpretq_u8_u32(s->core.efgh)));

			
 
				+    sha256_neon_free(hash);

			
 
				+}

			
 
				+

			
 
				+const ssh_hashalg ssh_sha256_hw = {

			
 
				+    sha256_neon_new, sha256_neon_copy, sha256_neon_final, sha256_neon_free,

			
 
				+    32, 64, HASHALG_NAMES_ANNOTATED("SHA-256", "NEON accelerated"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -683,7 +900,8 @@ static void sha256_stub_final(ssh_hash *hash, uint8_t *digest) STUB_BODY
 
				 

			
 
				 const ssh_hashalg ssh_sha256_hw = {

			
 
				     sha256_stub_new, sha256_stub_copy, sha256_stub_final, sha256_stub_free,

			
 
				-    32, 64, "SHA-256",

			
 
				+    32, 64, HASHALG_NAMES_ANNOTATED(

			
 
				+        "SHA-256", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				 };

			
 
				 

			
 
				 #endif /* HW_SHA256 */

			
--- a/source/putty/sshsh512.c
+++ b/source/putty/sshsh512.c
@@ -343,7 +343,8 @@ static void sha512_final(ssh_hash *hash, unsigned char *output)
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha512 = {

			
 
				-    sha512_new, sha512_copy, sha512_final, sha512_free, 64, BLKSIZE, "SHA-512"

			
 
				+    sha512_new, sha512_copy, sha512_final, sha512_free,

			
 
				+    64, BLKSIZE, HASHALG_NAMES_BARE("SHA-512"),

			
 
				 };

			
 
				 

			
 
				 static ssh_hash *sha384_new(const ssh_hashalg *alg)

			
@@ -363,5 +364,6 @@ static void sha384_final(ssh_hash *hash, unsigned char *output)
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha384 = {

			
 
				-    sha384_new, sha512_copy, sha384_final, sha512_free, 48, BLKSIZE, "SHA-384"

			
 
				+    sha384_new, sha512_copy, sha384_final, sha512_free,

			
 
				+    48, BLKSIZE, HASHALG_NAMES_BARE("SHA-384"),

			
 
				 };

			
--- a/source/putty/sshsha.c
+++ b/source/putty/sshsha.c
@@ -12,6 +12,7 @@
 
				  */

			
 
				 #define HW_SHA1_NONE 0

			
 
				 #define HW_SHA1_NI 1

			
 
				+#define HW_SHA1_NEON 2

			
 
				 

			
 
				 #ifdef _FORCE_SHA_NI

			
 
				 #   define HW_SHA1 HW_SHA1_NI

			
@@ -21,8 +22,7 @@
 
				 #       define HW_SHA1 HW_SHA1_NI

			
 
				 #   endif

			
 
				 #elif defined(__GNUC__)

			
 
				-#    if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) && \

			
 
				-    (defined(__x86_64__) || defined(__i386))

			
 
				+#    if (__GNUC__ >= 5) && (defined(__x86_64__) || defined(__i386))

			
 
				 #       define HW_SHA1 HW_SHA1_NI

			
 
				 #    endif

			
 
				 #elif defined (_MSC_VER)

			
@@ -31,6 +31,37 @@
 
				 #   endif

			
 
				 #endif

			
 
				 

			
 
				+#ifdef _FORCE_SHA_NEON

			
 
				+#   define HW_SHA1 HW_SHA1_NEON

			
 
				+#elif defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__

			
 
				+    /* Arm can potentially support both endiannesses, but this code

			
 
				+     * hasn't been tested on anything but little. If anyone wants to

			
 
				+     * run big-endian, they'll need to fix it first. */

			
 
				+#elif defined __ARM_FEATURE_CRYPTO

			
 
				+    /* If the Arm crypto extension is available already, we can

			
 
				+     * support NEON SHA without having to enable anything by hand */

			
 
				+#   define HW_SHA1 HW_SHA1_NEON

			
 
				+#elif defined(__clang__)

			
 
				+#   if __has_attribute(target) && __has_include(<arm_neon.h>) &&       \

			
 
				+    (defined(__aarch64__))

			
 
				+        /* clang can enable the crypto extension in AArch64 using

			
 
				+         * __attribute__((target)) */

			
 
				+#       define HW_SHA1 HW_SHA1_NEON

			
 
				+#       define USE_CLANG_ATTR_TARGET_AARCH64

			
 
				+#   endif

			
 
				+#elif defined _MSC_VER

			
 
				+    /* Visual Studio supports the crypto extension when targeting

			
 
				+     * AArch64, but as of VS2017, the AArch32 header doesn't quite

			
 
				+     * manage it (declaring the shae/shad intrinsics without a round

			
 
				+     * key operand). */

			
 
				+#   if defined _M_ARM64

			
 
				+#       define HW_SHA1 HW_SHA1_NEON

			
 
				+#       if defined _M_ARM64

			
 
				+#           define USE_ARM64_NEON_H /* unusual header name in this case */

			
 
				+#       endif

			
 
				+#   endif

			
 
				+#endif

			
 
				+

			
 
				 #if defined _FORCE_SOFTWARE_SHA || !defined HW_SHA1

			
 
				 #   undef HW_SHA1

			
 
				 #   define HW_SHA1 HW_SHA1_NONE

			
@@ -67,7 +98,7 @@ static ssh_hash *sha1_select(const ssh_hashalg *alg)
 
				 

			
 
				 const ssh_hashalg ssh_sha1 = {

			
 
				     sha1_select, NULL, NULL, NULL,

			
 
				-    20, 64, "SHA-1",

			
 
				+    20, 64, HASHALG_NAMES_ANNOTATED("SHA-1", "dummy selector vtable"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -278,7 +309,7 @@ static void sha1_sw_final(ssh_hash *hash, uint8_t *digest)
 
				 

			
 
				 const ssh_hashalg ssh_sha1_sw = {

			
 
				     sha1_sw_new, sha1_sw_copy, sha1_sw_final, sha1_sw_free,

			
 
				-    20, 64, "SHA-1",

			
 
				+    20, 64, HASHALG_NAMES_ANNOTATED("SHA-1", "unaccelerated"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -290,13 +321,13 @@ const ssh_hashalg ssh_sha1_sw = {
 
				 /*

			
 
				  * Set target architecture for Clang and GCC

			
 
				  */

			
 
				-#if !defined(__clang__) && defined(__GNUC__)

			
 
				+

			
 
				+#if defined(__clang__) || defined(__GNUC__)

			
 
				+#    define FUNC_ISA __attribute__ ((target("sse4.1,sha")))

			
 
				+#if !defined(__clang__)

			
 
				 #    pragma GCC target("sha")

			
 
				 #    pragma GCC target("sse4.1")

			
 
				 #endif

			
 
				-

			
 
				-#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))

			
 
				-#    define FUNC_ISA __attribute__ ((target("sse4.1,sha")))

			
 
				 #else

			
 
				 #    define FUNC_ISA

			
 
				 #endif

			
@@ -619,7 +650,223 @@ FUNC_ISA static void sha1_ni_final(ssh_hash *hash, uint8_t *digest)
 
				 

			
 
				 const ssh_hashalg ssh_sha1_hw = {

			
 
				     sha1_ni_new, sha1_ni_copy, sha1_ni_final, sha1_ni_free,

			
 
				-    20, 64, "SHA-1",

			
 
				+    20, 64, HASHALG_NAMES_ANNOTATED("SHA-1", "SHA-NI accelerated"),

			
 
				+};

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * Hardware-accelerated implementation of SHA-1 using Arm NEON.

			
 
				+ */

			
 
				+

			
 
				+#elif HW_SHA1 == HW_SHA1_NEON

			
 
				+

			
 
				+/*

			
 
				+ * Manually set the target architecture, if we decided above that we

			
 
				+ * need to.

			
 
				+ */

			
 
				+#ifdef USE_CLANG_ATTR_TARGET_AARCH64

			
 
				+/*

			
 
				+ * A spot of cheating: redefine some ACLE feature macros before

			
 
				+ * including arm_neon.h. Otherwise we won't get the SHA intrinsics

			
 
				+ * defined by that header, because it will be looking at the settings

			
 
				+ * for the whole translation unit rather than the ones we're going to

			
 
				+ * put on some particular functions using __attribute__((target)).

			
 
				+ */

			
 
				+#define __ARM_NEON 1

			
 
				+#define __ARM_FEATURE_CRYPTO 1

			
 
				+#define FUNC_ISA __attribute__ ((target("neon,crypto")))

			
 
				+#endif /* USE_CLANG_ATTR_TARGET_AARCH64 */

			
 
				+

			
 
				+#ifndef FUNC_ISA

			
 
				+#define FUNC_ISA

			
 
				+#endif

			
 
				+

			
 
				+#ifdef USE_ARM64_NEON_H

			
 
				+#include <arm64_neon.h>

			
 
				+#else

			
 
				+#include <arm_neon.h>

			
 
				+#endif

			
 
				+

			
 
				+static bool sha1_hw_available(void)

			
 
				+{

			
 
				+    /*

			
 
				+     * For Arm, we delegate to a per-platform detection function (see

			
 
				+     * explanation in sshaes.c).

			
 
				+     */

			
 
				+    return platform_sha1_hw_available();

			
 
				+}

			
 
				+

			
 
				+typedef struct sha1_neon_core sha1_neon_core;

			
 
				+struct sha1_neon_core {

			
 
				+    uint32x4_t abcd;

			
 
				+    uint32_t e;

			
 
				+};

			
 
				+

			
 
				+/* ------------- got up to here ----------------------------------------- */

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline uint32x4_t sha1_neon_load_input(const uint8_t *p)

			
 
				+{

			
 
				+    return vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(p)));

			
 
				+}

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline uint32x4_t sha1_neon_schedule_update(

			
 
				+    uint32x4_t m4, uint32x4_t m3, uint32x4_t m2, uint32x4_t m1)

			
 
				+{

			
 
				+    return vsha1su1q_u32(vsha1su0q_u32(m4, m3, m2), m1);

			
 
				+}

			
 
				+

			
 
				+/*

			
 
				+ * SHA-1 has three different kinds of round, differing in whether they

			
 
				+ * use the Ch, Maj or Par functions defined above. Each one uses a

			
 
				+ * separate NEON instruction, so we define three inline functions for

			
 
				+ * the different round types using this macro.

			
 
				+ *

			
 
				+ * The two batches of Par-type rounds also use a different constant,

			
 
				+ * but that's passed in as an operand, so we don't need a fourth

			
 
				+ * inline function just for that.

			
 
				+ */

			
 
				+#define SHA1_NEON_ROUND_FN(type)                                        \

			
 
				+    FUNC_ISA static inline sha1_neon_core sha1_neon_round4_##type(      \

			
 
				+        sha1_neon_core old, uint32x4_t sched, uint32x4_t constant)      \

			
 
				+    {                                                                   \

			
 
				+        sha1_neon_core new;                                             \

			
 
				+        uint32x4_t round_input = vaddq_u32(sched, constant);            \

			
 
				+        new.abcd = vsha1##type##q_u32(old.abcd, old.e, round_input);    \

			
 
				+        new.e = vsha1h_u32(vget_lane_u32(vget_low_u32(old.abcd), 0));   \

			
 
				+        return new;                                                     \

			
 
				+    }

			
 
				+SHA1_NEON_ROUND_FN(c)

			
 
				+SHA1_NEON_ROUND_FN(p)

			
 
				+SHA1_NEON_ROUND_FN(m)

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline void sha1_neon_block(sha1_neon_core *core, const uint8_t *p)

			
 
				+{

			
 
				+    uint32x4_t constant, s0, s1, s2, s3;

			
 
				+    sha1_neon_core cr = *core;

			
 
				+

			
 
				+    constant = vdupq_n_u32(SHA1_STAGE0_CONSTANT);

			
 
				+    s0 = sha1_neon_load_input(p);

			
 
				+    cr = sha1_neon_round4_c(cr, s0, constant);

			
 
				+    s1 = sha1_neon_load_input(p + 16);

			
 
				+    cr = sha1_neon_round4_c(cr, s1, constant);

			
 
				+    s2 = sha1_neon_load_input(p + 32);

			
 
				+    cr = sha1_neon_round4_c(cr, s2, constant);

			
 
				+    s3 = sha1_neon_load_input(p + 48);

			
 
				+    cr = sha1_neon_round4_c(cr, s3, constant);

			
 
				+    s0 = sha1_neon_schedule_update(s0, s1, s2, s3);

			
 
				+    cr = sha1_neon_round4_c(cr, s0, constant);

			
 
				+

			
 
				+    constant = vdupq_n_u32(SHA1_STAGE1_CONSTANT);

			
 
				+    s1 = sha1_neon_schedule_update(s1, s2, s3, s0);

			
 
				+    cr = sha1_neon_round4_p(cr, s1, constant);

			
 
				+    s2 = sha1_neon_schedule_update(s2, s3, s0, s1);

			
 
				+    cr = sha1_neon_round4_p(cr, s2, constant);

			
 
				+    s3 = sha1_neon_schedule_update(s3, s0, s1, s2);

			
 
				+    cr = sha1_neon_round4_p(cr, s3, constant);

			
 
				+    s0 = sha1_neon_schedule_update(s0, s1, s2, s3);

			
 
				+    cr = sha1_neon_round4_p(cr, s0, constant);

			
 
				+    s1 = sha1_neon_schedule_update(s1, s2, s3, s0);

			
 
				+    cr = sha1_neon_round4_p(cr, s1, constant);

			
 
				+

			
 
				+    constant = vdupq_n_u32(SHA1_STAGE2_CONSTANT);

			
 
				+    s2 = sha1_neon_schedule_update(s2, s3, s0, s1);

			
 
				+    cr = sha1_neon_round4_m(cr, s2, constant);

			
 
				+    s3 = sha1_neon_schedule_update(s3, s0, s1, s2);

			
 
				+    cr = sha1_neon_round4_m(cr, s3, constant);

			
 
				+    s0 = sha1_neon_schedule_update(s0, s1, s2, s3);

			
 
				+    cr = sha1_neon_round4_m(cr, s0, constant);

			
 
				+    s1 = sha1_neon_schedule_update(s1, s2, s3, s0);

			
 
				+    cr = sha1_neon_round4_m(cr, s1, constant);

			
 
				+    s2 = sha1_neon_schedule_update(s2, s3, s0, s1);

			
 
				+    cr = sha1_neon_round4_m(cr, s2, constant);

			
 
				+

			
 
				+    constant = vdupq_n_u32(SHA1_STAGE3_CONSTANT);

			
 
				+    s3 = sha1_neon_schedule_update(s3, s0, s1, s2);

			
 
				+    cr = sha1_neon_round4_p(cr, s3, constant);

			
 
				+    s0 = sha1_neon_schedule_update(s0, s1, s2, s3);

			
 
				+    cr = sha1_neon_round4_p(cr, s0, constant);

			
 
				+    s1 = sha1_neon_schedule_update(s1, s2, s3, s0);

			
 
				+    cr = sha1_neon_round4_p(cr, s1, constant);

			
 
				+    s2 = sha1_neon_schedule_update(s2, s3, s0, s1);

			
 
				+    cr = sha1_neon_round4_p(cr, s2, constant);

			
 
				+    s3 = sha1_neon_schedule_update(s3, s0, s1, s2);

			
 
				+    cr = sha1_neon_round4_p(cr, s3, constant);

			
 
				+

			
 
				+    core->abcd = vaddq_u32(core->abcd, cr.abcd);

			
 
				+    core->e += cr.e;

			
 
				+}

			
 
				+

			
 
				+typedef struct sha1_neon {

			
 
				+    sha1_neon_core core;

			
 
				+    sha1_block blk;

			
 
				+    BinarySink_IMPLEMENTATION;

			
 
				+    ssh_hash hash;

			
 
				+} sha1_neon;

			
 
				+

			
 
				+static void sha1_neon_write(BinarySink *bs, const void *vp, size_t len);

			
 
				+

			
 
				+static ssh_hash *sha1_neon_new(const ssh_hashalg *alg)

			
 
				+{

			
 
				+    if (!sha1_hw_available_cached())

			
 
				+        return NULL;

			
 
				+

			
 
				+    sha1_neon *s = snew(sha1_neon);

			
 
				+

			
 
				+    s->core.abcd = vld1q_u32(sha1_initial_state);

			
 
				+    s->core.e = sha1_initial_state[4];

			
 
				+

			
 
				+    sha1_block_setup(&s->blk);

			
 
				+

			
 
				+    s->hash.vt = alg;

			
 
				+    BinarySink_INIT(s, sha1_neon_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+    return &s->hash;

			
 
				+}

			
 
				+

			
 
				+static ssh_hash *sha1_neon_copy(ssh_hash *hash)

			
 
				+{

			
 
				+    sha1_neon *s = container_of(hash, sha1_neon, hash);

			
 
				+    sha1_neon *copy = snew(sha1_neon);

			
 
				+

			
 
				+    *copy = *s; /* structure copy */

			
 
				+

			
 
				+    BinarySink_COPIED(copy);

			
 
				+    BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				+

			
 
				+    return &copy->hash;

			
 
				+}

			
 
				+

			
 
				+static void sha1_neon_free(ssh_hash *hash)

			
 
				+{

			
 
				+    sha1_neon *s = container_of(hash, sha1_neon, hash);

			
 
				+    smemclr(s, sizeof(*s));

			
 
				+    sfree(s);

			
 
				+}

			
 
				+

			
 
				+static void sha1_neon_write(BinarySink *bs, const void *vp, size_t len)

			
 
				+{

			
 
				+    sha1_neon *s = BinarySink_DOWNCAST(bs, sha1_neon);

			
 
				+

			
 
				+    while (len > 0)

			
 
				+        if (sha1_block_write(&s->blk, &vp, &len))

			
 
				+            sha1_neon_block(&s->core, s->blk.block);

			
 
				+}

			
 
				+

			
 
				+static void sha1_neon_final(ssh_hash *hash, uint8_t *digest)

			
 
				+{

			
 
				+    sha1_neon *s = container_of(hash, sha1_neon, hash);

			
 
				+

			
 
				+    sha1_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				+    vst1q_u8(digest, vrev32q_u8(vreinterpretq_u8_u32(s->core.abcd)));

			
 
				+    PUT_32BIT_MSB_FIRST(digest + 16, s->core.e);

			
 
				+    sha1_neon_free(hash);

			
 
				+}

			
 
				+

			
 
				+const ssh_hashalg ssh_sha1_hw = {

			
 
				+    sha1_neon_new, sha1_neon_copy, sha1_neon_final, sha1_neon_free,

			
 
				+    20, 64, HASHALG_NAMES_ANNOTATED("SHA-1", "NEON accelerated"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -650,7 +897,8 @@ static void sha1_stub_final(ssh_hash *hash, uint8_t *digest) STUB_BODY
 
				 

			
 
				 const ssh_hashalg ssh_sha1_hw = {

			
 
				     sha1_stub_new, sha1_stub_copy, sha1_stub_final, sha1_stub_free,

			
 
				-    20, 64, "SHA-1",

			
 
				+    20, 64, HASHALG_NAMES_ANNOTATED(

			
 
				+        "SHA-1", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				 };

			
 
				 

			
 
				 #endif /* HW_SHA1 */

			
--- a/source/putty/windows/winmisc.c
+++ b/source/putty/windows/winmisc.c
@@ -405,3 +405,30 @@ void unescape_registry_key(const char *in, strbuf *out)
 
				 	}

			
 
				     }

			
 
				 }

			
 
				+

			
 
				+#ifdef DEBUG

			
 
				+static FILE *debug_fp = NULL;

			
 
				+static HANDLE debug_hdl = INVALID_HANDLE_VALUE;

			
 
				+static int debug_got_console = 0;

			
 
				+

			
 
				+void dputs(const char *buf)

			
 
				+{

			
 
				+    DWORD dw;

			
 
				+

			
 
				+    if (!debug_got_console) {

			
 
				+	if (AllocConsole()) {

			
 
				+	    debug_got_console = 1;

			
 
				+	    debug_hdl = GetStdHandle(STD_OUTPUT_HANDLE);

			
 
				+	}

			
 
				+    }

			
 
				+    if (!debug_fp) {

			
 
				+	debug_fp = fopen("debug.log", "w");

			
 
				+    }

			
 
				+

			
 
				+    if (debug_hdl != INVALID_HANDLE_VALUE) {

			
 
				+	WriteFile(debug_hdl, buf, strlen(buf), &dw, NULL);

			
 
				+    }

			
 
				+    fputs(buf, debug_fp);

			
 
				+    fflush(debug_fp);

			
 
				+}

			
 
				+#endif

			
--- a/source/putty/windows/winnet.c
+++ b/source/putty/windows/winnet.c
@@ -1337,6 +1337,8 @@ static void sk_net_close(Socket *sock)
 
				     if (s->child)

			
 
				 	sk_net_close(&s->child->sock);

			
 
				 

			
 
				+    bufchain_clear(&s->output_data);

			
 
				+

			
 
				     del234(sktree, s);

			
 
				     do_select(s->s, false);

			
 
				     p_closesocket(s->s);