4 years ago · 307619dd88
--- a/source/Putty.cbproj
+++ b/source/Putty.cbproj
@@ -223,12 +223,18 @@
 
				 			<BuildOrder>29</BuildOrder>

			
 
				 			<BuildOrder>21</BuildOrder>

			
 
				 		</CppCompile>

			
 
				+		<CppCompile Include="putty\sshargon2.c">

			
 
				+			<BuildOrder>29</BuildOrder>

			
 
				+		</CppCompile>

			
 
				 		<CppCompile Include="putty\sshauxcrypt.c">

			
 
				 			<BuildOrder>78</BuildOrder>

			
 
				 		</CppCompile>

			
 
				 		<CppCompile Include="putty\sshbcrypt.c">

			
 
				 			<BuildOrder>50</BuildOrder>

			
 
				 		</CppCompile>

			
 
				+		<CppCompile Include="putty\sshblake2.c">

			
 
				+			<BuildOrder>50</BuildOrder>

			
 
				+		</CppCompile>

			
 
				 		<CppCompile Include="putty\sshblowf.c">

			
 
				 			<BuildOrder>34</BuildOrder>

			
 
				 			<BuildOrder>32</BuildOrder>

			
--- a/source/core/PuttyIntf.cpp
+++ b/source/core/PuttyIntf.cpp
@@ -712,10 +712,15 @@ void SaveKey(TKeyType KeyType, const UnicodeString & FileName,
 
				     switch (KeyType)

			
 
				     {

			
 
				       case ktSSH2:

			
 
				-        if (!ssh2_save_userkey(KeyFile, Ssh2Key, PassphrasePtr))

			
 
				         {

			
 
				-          int Error = errno;

			
 
				-          throw EOSExtException(FMTLOAD(KEY_SAVE_ERROR, (FileName)), Error);

			
 
				+          ppk_save_parameters Params = ppk_save_default_parameters;

			
 
				+          // Other parameters are probably not relevant for version 2

			
 
				+          Params.fmt_version = 2;

			
 
				+          if (!ppk_save_f(KeyFile, Ssh2Key, PassphrasePtr, &Params))

			
 
				+          {

			
 
				+            int Error = errno;

			
 
				+            throw EOSExtException(FMTLOAD(KEY_SAVE_ERROR, (FileName)), Error);

			
 
				+          }

			
 
				         }

			
 
				         break;

			
 
				 

			
@@ -901,8 +906,10 @@ UnicodeString __fastcall ParseOpenSshPubLine(const UnicodeString & Line, const s
 
				   char * CommentPtr = NULL;

			
 
				   const char * ErrorStr = NULL;

			
 
				   strbuf * PubBlobBuf = strbuf_new();

			
 
				+  BinarySource Source[1];

			
 
				+  BinarySource_BARE_INIT(Source, UtfLine.c_str(), UtfLine.Length());

			
 
				   UnicodeString Result;

			
 
				-  if (!openssh_loadpub_line(UtfLine.c_str(), &AlgorithmName, BinarySink_UPCAST(PubBlobBuf), &CommentPtr, &ErrorStr))

			
 
				+  if (!openssh_loadpub(Source, &AlgorithmName, BinarySink_UPCAST(PubBlobBuf), &CommentPtr, &ErrorStr))

			
 
				   {

			
 
				     throw Exception(UnicodeString(ErrorStr));

			
 
				   }

			
--- a/source/putty/defs.h
+++ b/source/putty/defs.h
@@ -29,6 +29,8 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base);
 
				 #ifndef WINSCP

			
 
				 // Not needed by the code WinSCP uses

			
 
				 #include <inttypes.h>

			
 
				+#else

			
 
				+#define PRIu32 "u"

			
 
				 #endif

			
 
				 /* Because we still support older MSVC libraries which don't recognise the

			
 
				  * standard C "z" modifier for size_t-sized integers, we must use an

			
@@ -67,6 +69,7 @@ typedef struct FontSpec FontSpec;
 
				 typedef struct bufchain_tag bufchain;

			
 
				 

			
 
				 typedef struct strbuf strbuf;

			
 
				+typedef struct LoadedFile LoadedFile;

			
 
				 

			
 
				 typedef struct RSAKey RSAKey;

			
 
				 

			
--- a/source/putty/marshal.c
+++ b/source/putty/marshal.c
@@ -218,6 +218,55 @@ const char *BinarySource_get_asciz(BinarySource *src)
 
				     return start;

			
 
				 }

			
 
				 

			
 
				+static ptrlen BinarySource_get_chars_internal(

			
 
				+    BinarySource *src, const char *set, bool include)

			
 
				+{

			
 
				+    const char *start = here;

			
 
				+    while (avail(1)) {

			
 
				+        bool present = NULL != strchr(set, *(const char *)consume(0));

			
 
				+        if (present != include)

			
 
				+            break;

			
 
				+        (void) consume(1);

			
 
				+    }

			
 
				+    { // WINSCP

			
 
				+    const char *end = here;

			
 
				+    return make_ptrlen(start, end - start);

			
 
				+    } // WINSCP

			
 
				+}

			
 
				+

			
 
				+ptrlen BinarySource_get_chars(BinarySource *src, const char *include_set)

			
 
				+{

			
 
				+    return BinarySource_get_chars_internal(src, include_set, true);

			
 
				+}

			
 
				+

			
 
				+ptrlen BinarySource_get_nonchars(BinarySource *src, const char *exclude_set)

			
 
				+{

			
 
				+    return BinarySource_get_chars_internal(src, exclude_set, false);

			
 
				+}

			
 
				+

			
 
				+ptrlen BinarySource_get_chomped_line(BinarySource *src)

			
 
				+{

			
 
				+    const char *start, *end;

			
 
				+

			
 
				+    if (src->err)

			
 
				+        return make_ptrlen(here, 0);

			
 
				+

			
 
				+    start = here;

			
 
				+    end = memchr(start, '\n', src->len - src->pos);

			
 
				+    if (end)

			
 
				+        advance(end + 1 - start);

			
 
				+    else

			
 
				+        advance(src->len - src->pos);

			
 
				+    end = here;

			
 
				+

			
 
				+    if (end > start && end[-1] == '\n')

			
 
				+        end--;

			
 
				+    if (end > start && end[-1] == '\r')

			
 
				+        end--;

			
 
				+

			
 
				+    return make_ptrlen(start, end - start);

			
 
				+}

			
 
				+

			
 
				 ptrlen BinarySource_get_pstring(BinarySource *src)

			
 
				 {

			
 
				     const unsigned char *ucp;

			
--- a/source/putty/marshal.h
+++ b/source/putty/marshal.h
@@ -276,6 +276,12 @@ static inline void BinarySource_INIT__(BinarySource *src, ptrlen data)
 
				     BinarySource_get_string(BinarySource_UPCAST(src))

			
 
				 #define get_asciz(src) \

			
 
				     BinarySource_get_asciz(BinarySource_UPCAST(src))

			
 
				+#define get_chars(src, include) \

			
 
				+    BinarySource_get_chars(BinarySource_UPCAST(src), include)

			
 
				+#define get_nonchars(src, exclude) \

			
 
				+    BinarySource_get_nonchars(BinarySource_UPCAST(src), exclude)

			
 
				+#define get_chomped_line(src) \

			
 
				+    BinarySource_get_chomped_line(BinarySource_UPCAST(src))

			
 
				 #define get_pstring(src) \

			
 
				     BinarySource_get_pstring(BinarySource_UPCAST(src))

			
 
				 #define get_mp_ssh1(src) \

			
@@ -305,6 +311,9 @@ unsigned long BinarySource_get_uint32(BinarySource *);
 
				 uint64_t BinarySource_get_uint64(BinarySource *);

			
 
				 ptrlen BinarySource_get_string(BinarySource *);

			
 
				 const char *BinarySource_get_asciz(BinarySource *);

			
 
				+ptrlen BinarySource_get_chars(BinarySource *, const char *include_set);

			
 
				+ptrlen BinarySource_get_nonchars(BinarySource *, const char *exclude_set);

			
 
				+ptrlen BinarySource_get_chomped_line(BinarySource *);

			
 
				 ptrlen BinarySource_get_pstring(BinarySource *);

			
 
				 mp_int *BinarySource_get_mp_ssh1(BinarySource *src);

			
 
				 mp_int *BinarySource_get_mp_ssh2(BinarySource *src);

			
--- a/source/putty/misc.h
+++ b/source/putty/misc.h
@@ -189,6 +189,10 @@ int string_length_for_printf(size_t);
 
				  * string. */

			
 
				 #define PTRLEN_LITERAL(stringlit) \

			
 
				     TYPECHECK("" stringlit "", make_ptrlen(stringlit, sizeof(stringlit)-1))

			
 
				+/* Make a ptrlen out of a compile-time string literal in a way that

			
 
				+ * allows you to declare the ptrlen itself as a compile-time initialiser. */

			
 
				+#define PTRLEN_DECL_LITERAL(stringlit) \

			
 
				+    { TYPECHECK("" stringlit "", stringlit), sizeof(stringlit)-1 }

			
 
				 /* Make a ptrlen out of a constant byte array. */

			
 
				 #define PTRLEN_FROM_CONST_BYTES(a) make_ptrlen(a, sizeof(a))

			
 
				 

			
@@ -415,4 +419,28 @@ static inline char *stripctrl_string(StripCtrlChars *sccpub, const char *str)
 
				 #define pinitassert(P) const int __assert_dummy = 1/((int)(P))

			
 
				 #endif

			
 
				 

			
 
				+/*

			
 
				+ * A mechanism for loading a file from disk into a memory buffer where

			
 
				+ * it can be picked apart as a BinarySource.

			
 
				+ */

			
 
				+struct LoadedFile {

			
 
				+    char *data;

			
 
				+    size_t len, max_size;

			
 
				+    BinarySource_IMPLEMENTATION;

			
 
				+};

			
 
				+typedef enum {

			
 
				+    LF_OK,      /* file loaded successfully */

			
 
				+    LF_TOO_BIG, /* file didn't fit in buffer */

			
 
				+    LF_ERROR,   /* error from stdio layer */

			
 
				+} LoadFileStatus;

			
 
				+

			
 
				+/* Set the memory block of 'size' bytes at 'out' to the bitwise XOR of

			
 
				+ * the two blocks of the same size at 'in1' and 'in2'.

			
 
				+ *

			
 
				+ * 'out' may point to exactly the same address as one of the inputs,

			
 
				+ * but if the input and output blocks overlap in any other way, the

			
 
				+ * result of this function is not guaranteed. No memmove-style effort

			
 
				+ * is made to handle difficult overlap cases. */

			
 
				+void memxor(uint8_t *out, const uint8_t *in1, const uint8_t *in2, size_t size);

			
 
				+

			
 
				 #endif

			
--- a/source/putty/ssh.h
+++ b/source/putty/ssh.h
@@ -744,32 +744,43 @@ struct ssh_hash {
 
				 

			
 
				 struct ssh_hashalg {

			
 
				     ssh_hash *(*new)(const ssh_hashalg *alg);

			
 
				-    ssh_hash *(*copy)(ssh_hash *);

			
 
				-    void (*final)(ssh_hash *, unsigned char *); /* ALSO FREES THE ssh_hash! */

			
 
				+    void (*reset)(ssh_hash *);

			
 
				+    void (*copyfrom)(ssh_hash *dest, ssh_hash *src);

			
 
				+    void (*digest)(ssh_hash *, unsigned char *);

			
 
				     void (*free)(ssh_hash *);

			
 
				-    int hlen; /* output length in bytes */

			
 
				-    int blocklen; /* length of the hash's input block, or 0 for N/A */

			
 
				+    size_t hlen; /* output length in bytes */

			
 
				+    size_t blocklen; /* length of the hash's input block, or 0 for N/A */

			
 
				     const char *text_basename;     /* the semantic name of the hash */

			
 
				     const char *annotation;   /* extra info, e.g. which of multiple impls */

			
 
				     const char *text_name;    /* both combined, e.g. "SHA-n (unaccelerated)" */

			
 
				+    const void *extra;        /* private to the hash implementation */

			
 
				 };

			
 
				 

			
 
				 static inline ssh_hash *ssh_hash_new(const ssh_hashalg *alg)

			
 
				-{ return alg->new(alg); }

			
 
				-static inline ssh_hash *ssh_hash_copy(ssh_hash *h)

			
 
				-{ return h->vt->copy(h); }

			
 
				-static inline void ssh_hash_final(ssh_hash *h, unsigned char *out)

			
 
				-{ h->vt->final(h, out); }

			
 
				+{ ssh_hash *h = alg->new(alg); if (h) h->vt->reset(h); return h; }

			
 
				+static inline ssh_hash *ssh_hash_copy(ssh_hash *orig)

			
 
				+{ ssh_hash *h = orig->vt->new(orig->vt); h->vt->copyfrom(h, orig); return h; }

			
 
				+static inline void ssh_hash_digest(ssh_hash *h, unsigned char *out)

			
 
				+{ h->vt->digest(h, out); }

			
 
				 static inline void ssh_hash_free(ssh_hash *h)

			
 
				 { h->vt->free(h); }

			
 
				 static inline const ssh_hashalg *ssh_hash_alg(ssh_hash *h)

			
 
				 { return h->vt; }

			
 
				 

			
 
				+/* The reset and copyfrom vtable methods return void. But for call-site

			
 
				+ * convenience, these wrappers return their input pointer. */

			
 
				+static inline ssh_hash *ssh_hash_reset(ssh_hash *h)

			
 
				+{ h->vt->reset(h); return h; }

			
 
				+

			
 
				+/* ssh_hash_final emits the digest _and_ frees the ssh_hash */

			
 
				+static inline void ssh_hash_final(ssh_hash *h, unsigned char *out)

			
 
				+{ h->vt->digest(h, out); h->vt->free(h); }

			
 
				+

			
 
				 /* Handy macros for defining all those text-name fields at once */

			
 
				 #define HASHALG_NAMES_BARE(base) \

			
 
				-    base, NULL, base

			
 
				-#define HASHALG_NAMES_ANNOTATED(base, annotation) \

			
 
				-    base, annotation, base " (" annotation ")"

			
 
				+    /*.text_basename =*/ base, /*.annotation =*/ NULL, /*.text_name =*/ base

			
 
				+#define HASHALG_NAMES_ANNOTATED(base, ann) \

			
 
				+    /*.text_basename =*/ base, /*.annotation =*/ ann, /*.text_name =*/ base " (" ann ")"

			
 
				 

			
 
				 #ifndef WINSCP_VS

			
 
				 

			
@@ -914,8 +925,20 @@ struct ssh2_userkey {
 
				     char *comment;                     /* the key comment */

			
 
				 };

			
 
				 

			
 
				+/* Argon2 password hashing function */

			
 
				+typedef enum { Argon2d = 0, Argon2i = 1, Argon2id = 2 } Argon2Flavour;

			
 
				+void argon2(Argon2Flavour, uint32_t mem, uint32_t passes,

			
 
				+            uint32_t parallel, uint32_t taglen,

			
 
				+            ptrlen P, ptrlen S, ptrlen K, ptrlen X, strbuf *out);

			
 
				+void argon2_choose_passes(

			
 
				+    Argon2Flavour, uint32_t mem, uint32_t milliseconds, uint32_t *passes,

			
 
				+    uint32_t parallel, uint32_t taglen, ptrlen P, ptrlen S, ptrlen K, ptrlen X,

			
 
				+    strbuf *out);

			
 
				+/* The H' hash defined in Argon2, exposed just for testcrypt */

			
 
				+strbuf *argon2_long_hash(unsigned length, ptrlen data);

			
 
				+

			
 
				 /* The maximum length of any hash algorithm. (bytes) */

			
 
				-#define MAX_HASH_LEN (64)              /* longest is SHA-512 */

			
 
				+#define MAX_HASH_LEN (114) /* longest is SHAKE256 with 114-byte output */

			
 
				 

			
 
				 extern const ssh_cipheralg ssh_3des_ssh1;

			
 
				 extern const ssh_cipheralg ssh_blowfish_ssh1;

			
@@ -960,7 +983,12 @@ extern const ssh_hashalg ssh_sha256;
 
				 extern const ssh_hashalg ssh_sha256_hw;

			
 
				 extern const ssh_hashalg ssh_sha256_sw;

			
 
				 extern const ssh_hashalg ssh_sha384;

			
 
				+extern const ssh_hashalg ssh_sha384_hw;

			
 
				+extern const ssh_hashalg ssh_sha384_sw;

			
 
				 extern const ssh_hashalg ssh_sha512;

			
 
				+extern const ssh_hashalg ssh_sha512_hw;

			
 
				+extern const ssh_hashalg ssh_sha512_sw;

			
 
				+extern const ssh_hashalg ssh_blake2b;

			
 
				 extern const ssh_kexes ssh_diffiehellman_group1;

			
 
				 extern const ssh_kexes ssh_diffiehellman_group14;

			
 
				 extern const ssh_kexes ssh_diffiehellman_gex;

			
@@ -986,6 +1014,10 @@ extern const ssh2_macalg ssh_hmac_sha256;
 
				 extern const ssh2_macalg ssh2_poly1305;

			
 
				 extern const ssh_compression_alg ssh_zlib;

			
 
				 

			
 
				+/* Special constructor: BLAKE2b can be instantiated with any hash

			
 
				+ * length up to 128 bytes */

			
 
				+ssh_hash *blake2b_new_general(unsigned hashlen);

			
 
				+

			
 
				 /*

			
 
				  * On some systems, you have to detect hardware crypto acceleration by

			
 
				  * asking the local OS API rather than OS-agnostically asking the CPU

			
@@ -1159,13 +1191,6 @@ mp_int *dh_create_e(dh_ctx *, int nbits);
 
				 const char *dh_validate_f(dh_ctx *, mp_int *f);

			
 
				 mp_int *dh_find_K(dh_ctx *, mp_int *f);

			
 
				 

			
 
				-bool rsa_ssh1_encrypted(const Filename *filename, char **comment);

			
 
				-int rsa_ssh1_loadpub(const Filename *filename, BinarySink *bs,

			
 
				-                     char **commentptr, const char **errorstr);

			
 
				-int rsa_ssh1_loadkey(const Filename *filename, RSAKey *key,

			
 
				-                     const char *passphrase, const char **errorstr);

			
 
				-bool rsa_ssh1_savekey(const Filename *filename, RSAKey *key, char *passphrase);

			
 
				-

			
 
				 static inline bool is_base64_char(char c)

			
 
				 {

			
 
				     return ((c >= '0' && c <= '9') ||

			
@@ -1184,14 +1209,68 @@ extern void base64_encode(FILE *fp, const unsigned char *data, int datalen,
 
				 extern ssh2_userkey ssh2_wrong_passphrase;

			
 
				 #define SSH2_WRONG_PASSPHRASE (&ssh2_wrong_passphrase)

			
 
				 

			
 
				-bool ssh2_userkey_encrypted(const Filename *filename, char **comment);

			
 
				-ssh2_userkey *ssh2_load_userkey(

			
 
				-    const Filename *filename, const char *passphrase, const char **errorstr);

			
 
				-bool ssh2_userkey_loadpub(

			
 
				-    const Filename *filename, char **algorithm, BinarySink *bs,

			
 
				-    char **commentptr, const char **errorstr);

			
 
				-bool ssh2_save_userkey(

			
 
				-    const Filename *filename, ssh2_userkey *key, char *passphrase);

			
 
				+bool ppk_encrypted_s(BinarySource *src, char **comment);

			
 
				+bool ppk_encrypted_f(const Filename *filename, char **comment);

			
 
				+#define ssh2_userkey_encrypted ppk_encrypted_f

			
 
				+bool rsa1_encrypted_s(BinarySource *src, char **comment);

			
 
				+bool rsa1_encrypted_f(const Filename *filename, char **comment);

			
 
				+#define rsa_ssh1_encrypted rsa1_encrypted_f

			
 
				+

			
 
				+ssh2_userkey *ppk_load_s(BinarySource *src, const char *passphrase,

			
 
				+                         const char **errorstr);

			
 
				+ssh2_userkey *ppk_load_f(const Filename *filename, const char *passphrase,

			
 
				+                         const char **errorstr);

			
 
				+#define ssh2_load_userkey ppk_load_f

			
 
				+int rsa1_load_s(BinarySource *src, RSAKey *key,

			
 
				+                const char *passphrase, const char **errorstr);

			
 
				+int rsa1_load_f(const Filename *filename, RSAKey *key,

			
 
				+                const char *passphrase, const char **errorstr);

			
 
				+#define rsa_ssh1_loadkey rsa1_load_f

			
 
				+

			
 
				+typedef struct ppk_save_parameters {

			
 
				+    unsigned fmt_version;              /* currently 2 or 3 */

			
 
				+

			
 
				+    /*

			
 
				+     * Parameters for fmt_version == 3

			
 
				+     */

			
 
				+    Argon2Flavour argon2_flavour;

			
 
				+    uint32_t argon2_mem;               /* in Kbyte */

			
 
				+    bool argon2_passes_auto;

			
 
				+    union {

			
 
				+        uint32_t argon2_passes;        /* if auto == false */

			
 
				+        uint32_t argon2_milliseconds;  /* if auto == true */

			
 
				+    };

			
 
				+    uint32_t argon2_parallelism;

			
 
				+

			
 
				+    /* The ability to choose a specific salt is only intended for the

			
 
				+     * use of the automated test of PuTTYgen. It's a (mild) security

			
 
				+     * risk to do it with any passphrase you actually care about,

			
 
				+     * because it invalidates the entire point of having a salt in the

			
 
				+     * first place. */

			
 
				+    const uint8_t *salt;

			
 
				+    size_t saltlen;

			
 
				+} ppk_save_parameters;

			
 
				+extern const ppk_save_parameters ppk_save_default_parameters;

			
 
				+

			
 
				+strbuf *ppk_save_sb(ssh2_userkey *key, const char *passphrase,

			
 
				+                    const ppk_save_parameters *params);

			
 
				+bool ppk_save_f(const Filename *filename, ssh2_userkey *key,

			
 
				+                const char *passphrase, const ppk_save_parameters *params);

			
 
				+strbuf *rsa1_save_sb(RSAKey *key, const char *passphrase);

			
 
				+bool rsa1_save_f(const Filename *filename, RSAKey *key,

			
 
				+                 const char *passphrase);

			
 
				+

			
 
				+bool ppk_loadpub_s(BinarySource *src, char **algorithm, BinarySink *bs,

			
 
				+                   char **commentptr, const char **errorstr);

			
 
				+bool ppk_loadpub_f(const Filename *filename, char **algorithm, BinarySink *bs,

			
 
				+                   char **commentptr, const char **errorstr);

			
 
				+#define ssh2_userkey_loadpub ppk_loadpub_f

			
 
				+int rsa1_loadpub_s(BinarySource *src, BinarySink *bs,

			
 
				+                   char **commentptr, const char **errorstr);

			
 
				+int rsa1_loadpub_f(const Filename *filename, BinarySink *bs,

			
 
				+                   char **commentptr, const char **errorstr);

			
 
				+#define rsa_ssh1_loadpub rsa1_loadpub_f

			
 
				+

			
 
				 const ssh_keyalg *find_pubkey_alg(const char *name);

			
 
				 const ssh_keyalg *find_pubkey_alg_len(ptrlen name);

			
 
				 

			
@@ -1238,6 +1317,15 @@ enum {
 
				     SSH_KEYTYPE_SSH2_PUBLIC_RFC4716,

			
 
				     SSH_KEYTYPE_SSH2_PUBLIC_OPENSSH

			
 
				 };

			
 
				+

			
 
				+typedef enum {

			
 
				+    SSH_FPTYPE_MD5,

			
 
				+    SSH_FPTYPE_SHA256,

			
 
				+} FingerprintType;

			
 
				+

			
 
				+#define SSH_FPTYPE_DEFAULT SSH_FPTYPE_SHA256

			
 
				+#define SSH_N_FPTYPES (SSH_FPTYPE_SHA256 + 1)

			
 
				+

			
 
				 char *ssh1_pubkey_str(RSAKey *ssh1key);

			
 
				 void ssh1_write_pubkey(FILE *fp, RSAKey *ssh1key);

			
 
				 char *ssh2_pubkey_openssh_str(ssh2_userkey *key);

			
@@ -1247,10 +1335,11 @@ void ssh2_write_pubkey(FILE *fp, const char *comment,
 
				 char *ssh2_fingerprint_blob(ptrlen);

			
 
				 char *ssh2_fingerprint(ssh_key *key);

			
 
				 int key_type(const Filename *filename);

			
 
				+int key_type_s(BinarySource *src);

			
 
				 const char *key_type_to_str(int type);

			
 
				-bool openssh_loadpub_line(char * line, char **algorithm, // WINSCP

			
 
				-                         BinarySink *bs,

			
 
				-                         char **commentptr, const char **errorstr);

			
 
				+bool openssh_loadpub(BinarySource *src, char **algorithm, // WINSCP

			
 
				+                     BinarySink *bs,

			
 
				+                     char **commentptr, const char **errorstr);

			
 
				 

			
 
				 bool import_possible(int type);

			
 
				 int import_target_type(int type);

			
@@ -1270,8 +1359,10 @@ void des3_decrypt_pubkey_ossh(const void *key, const void *iv,
 
				                               void *blk, int len);

			
 
				 void des3_encrypt_pubkey_ossh(const void *key, const void *iv,

			
 
				                               void *blk, int len);

			
 
				-void aes256_encrypt_pubkey(const void *key, void *blk, int len);

			
 
				-void aes256_decrypt_pubkey(const void *key, void *blk, int len);

			
 
				+void aes256_encrypt_pubkey(const void *key, const void *iv,

			
 
				+                           void *blk, int len);

			
 
				+void aes256_decrypt_pubkey(const void *key, const void *iv,

			
 
				+                           void *blk, int len);

			
 
				 

			
 
				 void des_encrypt_xdmauth(const void *key, void *blk, int len);

			
 
				 void des_decrypt_xdmauth(const void *key, void *blk, int len);

			
--- a/source/putty/sshargon2.c
+++ b/source/putty/sshargon2.c
@@ -0,0 +1,593 @@
 
				+/*

			
 
				+ * Implementation of the Argon2 password hash function.

			
 
				+ *

			
 
				+ * My sources for the algorithm description and test vectors (the latter in

			
 
				+ * test/cryptsuite.py) were the reference implementation on Github, and also

			
 
				+ * the Internet-Draft description:

			
 
				+ *

			
 
				+ *   https://github.com/P-H-C/phc-winner-argon2

			
 
				+ *   https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-argon2-13

			
 
				+ */

			
 
				+

			
 
				+#include <assert.h>

			
 
				+

			
 
				+#include "putty.h"

			
 
				+#include "ssh.h"

			
 
				+#include "marshal.h"

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * Argon2 uses data marshalling rules similar to SSH but with 32-bit integers

			
 
				+ * stored little-endian. Start with some local BinarySink routines for storing

			
 
				+ * a uint32 and a string in that fashion.

			
 
				+ */

			
 
				+

			
 
				+static void BinarySink_put_uint32_le(BinarySink *bs, unsigned long val)

			
 
				+{

			
 
				+    unsigned char data[4];

			
 
				+    PUT_32BIT_LSB_FIRST(data, val);

			
 
				+    bs->write(bs, data, sizeof(data));

			
 
				+}

			
 
				+

			
 
				+static void BinarySink_put_stringpl_le(BinarySink *bs, ptrlen pl)

			
 
				+{

			
 
				+    /* Check that the string length fits in a uint32, without doing a

			
 
				+     * potentially implementation-defined shift of more than 31 bits */

			
 
				+    assert((pl.len >> 31) < 2);

			
 
				+

			
 
				+    BinarySink_put_uint32_le(bs, pl.len);

			
 
				+    bs->write(bs, pl.ptr, pl.len);

			
 
				+}

			
 
				+

			
 
				+#define put_uint32_le(bs, val) \

			
 
				+    BinarySink_put_uint32_le(BinarySink_UPCAST(bs), val)

			
 
				+#define put_stringpl_le(bs, val) \

			
 
				+    BinarySink_put_stringpl_le(BinarySink_UPCAST(bs), val)

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * Argon2 defines a hash-function family that's an extension of BLAKE2b to

			
 
				+ * generate longer output digests, by repeatedly outputting half of a BLAKE2

			
 
				+ * hash output and then re-hashing the whole thing until there are 64 or fewer

			
 
				+ * bytes left to output. The spec calls this H' (a variant of the original

			
 
				+ * hash it calls H, which is the unmodified BLAKE2b).

			
 
				+ */

			
 
				+

			
 
				+static ssh_hash *hprime_new(unsigned length)

			
 
				+{

			
 
				+    ssh_hash *h = blake2b_new_general(length > 64 ? 64 : length);

			
 
				+    put_uint32_le(h, length);

			
 
				+    return h;

			
 
				+}

			
 
				+

			
 
				+static void hprime_final(ssh_hash *h, unsigned length, void *vout)

			
 
				+{

			
 
				+    uint8_t *out = (uint8_t *)vout;

			
 
				+

			
 
				+    while (length > 64) {

			
 
				+        uint8_t hashbuf[64];

			
 
				+        ssh_hash_final(h, hashbuf);

			
 
				+

			
 
				+        memcpy(out, hashbuf, 32);

			
 
				+        out += 32;

			
 
				+        length -= 32;

			
 
				+

			
 
				+        h = blake2b_new_general(length > 64 ? 64 : length);

			
 
				+        put_data(h, hashbuf, 64);

			
 
				+

			
 
				+        smemclr(hashbuf, sizeof(hashbuf));

			
 
				+    }

			
 
				+

			
 
				+    ssh_hash_final(h, out);

			
 
				+}

			
 
				+

			
 
				+/* Externally visible entry point for the long hash function. This is only

			
 
				+ * used by testcrypt, so it would be overkill to set it up like a proper

			
 
				+ * ssh_hash. */

			
 
				+strbuf *argon2_long_hash(unsigned length, ptrlen data)

			
 
				+{

			
 
				+    ssh_hash *h = hprime_new(length);

			
 
				+    put_datapl(h, data);

			
 
				+    { // WINSCP

			
 
				+    strbuf *out = strbuf_new();

			
 
				+    hprime_final(h, length, strbuf_append(out, length));

			
 
				+    return out;

			
 
				+    } // WINSCP

			
 
				+}

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * Argon2's own mixing function G, which operates on 1Kb blocks of data.

			
 
				+ *

			
 
				+ * The definition of G in the spec takes two 1Kb blocks as input and produces

			
 
				+ * a 1Kb output block. The first thing that happens to the input blocks is

			
 
				+ * that they get XORed together, and then only the XOR output is used, so you

			
 
				+ * could perfectly well regard G as a 1Kb->1Kb function.

			
 
				+ */

			
 
				+

			
 
				+static inline uint64_t ror(uint64_t x, unsigned rotation)

			
 
				+{

			
 
				+#pragma option push -w-ngu // WINSCP

			
 
				+    unsigned lshift = 63 & -rotation, rshift = 63 & rotation;

			
 
				+#pragma option pop // WINSCP

			
 
				+    return (x << lshift) | (x >> rshift);

			
 
				+}

			
 
				+

			
 
				+static inline uint64_t trunc32(uint64_t x)

			
 
				+{

			
 
				+    return x & 0xFFFFFFFF;

			
 
				+}

			
 
				+

			
 
				+/* Internal function similar to the BLAKE2b round, which mixes up four 64-bit

			
 
				+ * words */

			
 
				+static inline void GB(uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d)

			
 
				+{

			
 
				+    *a += *b + 2 * trunc32(*a) * trunc32(*b);

			
 
				+    *d = ror(*d ^ *a, 32);

			
 
				+    *c += *d + 2 * trunc32(*c) * trunc32(*d);

			
 
				+    *b = ror(*b ^ *c, 24);

			
 
				+    *a += *b + 2 * trunc32(*a) * trunc32(*b);

			
 
				+    *d = ror(*d ^ *a, 16);

			
 
				+    *c += *d + 2 * trunc32(*c) * trunc32(*d);

			
 
				+    *b = ror(*b ^ *c, 63);

			
 
				+}

			
 
				+

			
 
				+/* Higher-level internal function which mixes up sixteen 64-bit words. This is

			
 
				+ * applied to different subsets of the 128 words in a kilobyte block, and the

			
 
				+ * API here is designed to make it easy to apply in the circumstances the spec

			
 
				+ * requires. In every call, the sixteen words form eight pairs adjacent in

			
 
				+ * memory, whose addresses are in arithmetic progression. So the 16 input

			
 
				+ * words are in[0], in[1], in[instep], in[instep+1], ..., in[7*instep],

			
 
				+ * in[7*instep+1], and the 16 output words similarly. */

			
 
				+static inline void P(uint64_t *out, unsigned outstep,

			
 
				+                     uint64_t *in, unsigned instep)

			
 
				+{

			
 
				+    unsigned i; // WINSCP

			
 
				+    for (i = 0; i < 8; i++) {

			
 
				+        out[i*outstep] = in[i*instep];

			
 
				+        out[i*outstep+1] = in[i*instep+1];

			
 
				+    }

			
 
				+

			
 
				+    GB(out+0*outstep+0, out+2*outstep+0, out+4*outstep+0, out+6*outstep+0);

			
 
				+    GB(out+0*outstep+1, out+2*outstep+1, out+4*outstep+1, out+6*outstep+1);

			
 
				+    GB(out+1*outstep+0, out+3*outstep+0, out+5*outstep+0, out+7*outstep+0);

			
 
				+    GB(out+1*outstep+1, out+3*outstep+1, out+5*outstep+1, out+7*outstep+1);

			
 
				+

			
 
				+    GB(out+0*outstep+0, out+2*outstep+1, out+5*outstep+0, out+7*outstep+1);

			
 
				+    GB(out+0*outstep+1, out+3*outstep+0, out+5*outstep+1, out+6*outstep+0);

			
 
				+    GB(out+1*outstep+0, out+3*outstep+1, out+4*outstep+0, out+6*outstep+1);

			
 
				+    GB(out+1*outstep+1, out+2*outstep+0, out+4*outstep+1, out+7*outstep+0);

			
 
				+}

			
 
				+

			
 
				+/* The full G function, taking input blocks X and Y. The result of G is most

			
 
				+ * often XORed into an existing output block, so this API is designed with

			
 
				+ * that in mind: the mixing function's output is always XORed into whatever

			
 
				+ * 1Kb of data is already at 'out'. */

			
 
				+static void G_xor(uint8_t *out, const uint8_t *X, const uint8_t *Y)

			
 
				+{

			
 
				+    uint64_t R[128], Q[128], Z[128];

			
 
				+

			
 
				+    unsigned i; // WINSCP

			
 
				+    for (i = 0; i < 128; i++)

			
 
				+        R[i] = GET_64BIT_LSB_FIRST(X + 8*i) ^ GET_64BIT_LSB_FIRST(Y + 8*i);

			
 
				+

			
 
				+    for (i = 0; i < 8; i++) // WINSCP

			
 
				+        P(Q+16*i, 2, R+16*i, 2);

			
 
				+

			
 
				+    for (i = 0; i < 8; i++) // WINSCP

			
 
				+        P(Z+2*i, 16, Q+2*i, 16);

			
 
				+

			
 
				+    for (i = 0; i < 128; i++) // WINSCP

			
 
				+        PUT_64BIT_LSB_FIRST(out + 8*i,

			
 
				+                            GET_64BIT_LSB_FIRST(out + 8*i) ^ R[i] ^ Z[i]);

			
 
				+

			
 
				+    smemclr(R, sizeof(R));

			
 
				+    smemclr(Q, sizeof(Q));

			
 
				+    smemclr(Z, sizeof(Z));

			
 
				+}

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * The main Argon2 function.

			
 
				+ */

			
 
				+

			
 
				+static void argon2_internal(uint32_t p, uint32_t T, uint32_t m, uint32_t t,

			
 
				+                            uint32_t y, ptrlen P, ptrlen S, ptrlen K, ptrlen X,

			
 
				+                            uint8_t *out)

			
 
				+{

			
 
				+    /*

			
 
				+     * Start by hashing all the input data together: the four string arguments

			
 
				+     * (password P, salt S, optional secret key K, optional associated data

			
 
				+     * X), plus all the parameters for the function's memory and time usage.

			
 
				+     *

			
 
				+     * The output of this hash is the sole input to the subsequent mixing

			
 
				+     * step: Argon2 does not preserve any more entropy from the inputs, it

			
 
				+     * just makes it extra painful to get the final answer.

			
 
				+     */

			
 
				+    uint8_t h0[64];

			
 
				+    {

			
 
				+        ssh_hash *h = blake2b_new_general(64);

			
 
				+        put_uint32_le(h, p);

			
 
				+        put_uint32_le(h, T);

			
 
				+        put_uint32_le(h, m);

			
 
				+        put_uint32_le(h, t);

			
 
				+        put_uint32_le(h, 0x13);        /* hash function version number */

			
 
				+        put_uint32_le(h, y);

			
 
				+        put_stringpl_le(h, P);

			
 
				+        put_stringpl_le(h, S);

			
 
				+        put_stringpl_le(h, K);

			
 
				+        put_stringpl_le(h, X);

			
 
				+        ssh_hash_final(h, h0);

			
 
				+    }

			
 
				+

			
 
				+    { // WINSCP

			
 
				+    struct blk { uint8_t data[1024]; };

			
 
				+

			
 
				+    /*

			
 
				+     * Array of 1Kb blocks. The total size is (approximately) m, the

			
 
				+     * caller-specified parameter for how much memory to use; the blocks are

			
 
				+     * regarded as a rectangular array of p rows ('lanes') by q columns, where

			
 
				+     * p is the 'parallelism' input parameter (the lanes can be processed

			
 
				+     * concurrently up to a point) and q is whatever makes the product pq come

			
 
				+     * to m.

			
 
				+     *

			
 
				+     * Additionally, each row is divided into four equal 'segments', which are

			
 
				+     * important to the way the algorithm decides which blocks to use as input

			
 
				+     * to each step of the function.

			
 
				+     *

			
 
				+     * The term 'slice' refers to a whole set of vertically aligned segments,

			
 
				+     * i.e. slice 0 is the whole left quarter of the array, and slice 3 the

			
 
				+     * whole right quarter.

			
 
				+     */

			
 
				+    size_t SL = m / (4*p); /* segment length: # of 1Kb blocks in a segment */

			
 
				+    size_t q = 4 * SL;     /* width of the array: 4 segments times SL */

			
 
				+    size_t mprime = q * p; /* total size of the array, approximately m */

			
 
				+

			
 
				+    /* Allocate the memory. */

			
 
				+    struct blk *B = snewn(mprime, struct blk);

			
 
				+    memset(B, 0, mprime * sizeof(struct blk));

			
 
				+

			
 
				+    /*

			
 
				+     * Initial setup: fill the first two full columns of the array with data

			
 
				+     * expanded from the starting hash h0. Each block is the result of using

			
 
				+     * the long-output hash function H' to hash h0 itself plus the block's

			
 
				+     * coordinates in the array.

			
 
				+     */

			
 
				+    { // WINSCP

			
 
				+    size_t i; // WINSCP

			
 
				+    for (i = 0; i < p; i++) {

			
 
				+        ssh_hash *h = hprime_new(1024);

			
 
				+        put_data(h, h0, 64);

			
 
				+        put_uint32_le(h, 0);

			
 
				+        put_uint32_le(h, i);

			
 
				+        hprime_final(h, 1024, B[i].data);

			
 
				+    }

			
 
				+    for (i = 0; i < p; i++) { // WINSCP

			
 
				+        ssh_hash *h = hprime_new(1024);

			
 
				+        put_data(h, h0, 64);

			
 
				+        put_uint32_le(h, 1);

			
 
				+        put_uint32_le(h, i);

			
 
				+        hprime_final(h, 1024, B[i+p].data);

			
 
				+    }

			
 
				+

			
 
				+    /*

			
 
				+     * Declarations for the main loop.

			
 
				+     *

			
 
				+     * The basic structure of the main loop is going to involve processing the

			
 
				+     * array one whole slice (vertically divided quarter) at a time. Usually

			
 
				+     * we'll write a new value into every single block in the slice, except

			
 
				+     * that in the initial slice on the first pass, we've already written

			
 
				+     * values into the first two columns during the initial setup above. So

			
 
				+     * 'jstart' indicates the starting index in each segment we process; it

			
 
				+     * starts off as 2 so that we don't overwrite the inital setup, and then

			
 
				+     * after the first slice is done, we set it to 0, and it stays there.

			
 
				+     *

			
 
				+     * d_mode indicates whether we're being data-dependent (true) or

			
 
				+     * data-independent (false). In the hybrid Argon2id mode, we start off

			
 
				+     * independent, and then once we've mixed things up enough, switch over to

			
 
				+     * dependent mode to force long serial chains of computation.

			
 
				+     */

			
 
				+    { // WINSCP

			
 
				+    size_t jstart = 2;

			
 
				+    bool d_mode = (y == 0);

			
 
				+    struct blk out2i, tmp2i, in2i;

			
 
				+

			
 
				+    /* Outermost loop: t whole passes from left to right over the array */

			
 
				+    size_t pass; // WINSCP

			
 
				+    for (pass = 0; pass < t; pass++) {

			
 
				+

			
 
				+        /* Within that, we process the array in its four main slices */

			
 
				+        unsigned slice; // WINSCP

			
 
				+        for (slice = 0; slice < 4; slice++) {

			
 
				+

			
 
				+            /* In Argon2id mode, if we're half way through the first pass,

			
 
				+             * this is the moment to switch d_mode from false to true */

			
 
				+            if (pass == 0 && slice == 2 && y == 2)

			
 
				+                d_mode = true;

			
 
				+

			
 
				+            /* Loop over every segment in the slice (i.e. every row). So i is

			
 
				+             * the y-coordinate of each block we process. */

			
 
				+            { // WINSCP

			
 
				+            size_t i; // WINSCP

			
 
				+            for (i = 0; i < p; i++) {

			
 
				+

			
 
				+                /* And within that segment, process the blocks from left to

			
 
				+                 * right, starting at 'jstart' (usually 0, but 2 in the first

			
 
				+                 * slice). */

			
 
				+                size_t jpre; // WINSCP

			
 
				+                for (jpre = jstart; jpre < SL; jpre++) {

			
 
				+

			
 
				+                    /* j is the x-coordinate of each block we process, made up

			
 
				+                     * of the slice number and the index 'jpre' within the

			
 
				+                     * segment. */

			
 
				+                    size_t j = slice * SL + jpre;

			
 
				+

			
 
				+                    /* jm1 is j-1 (mod q) */

			
 
				+                    uint32_t jm1 = (j == 0 ? q-1 : j-1);

			
 
				+

			
 
				+                    /*

			
 
				+                     * Construct two 32-bit pseudorandom integers J1 and J2.

			
 
				+                     * This is the part of the algorithm that varies between

			
 
				+                     * the data-dependent and independent modes.

			
 
				+                     */

			
 
				+                    uint32_t J1, J2;

			
 
				+                    if (d_mode) {

			
 
				+                        /*

			
 
				+                         * Data-dependent: grab the first 64 bits of the block

			
 
				+                         * to the left of this one.

			
 
				+                         */

			
 
				+                        J1 = GET_32BIT_LSB_FIRST(B[i + p * jm1].data);

			
 
				+                        J2 = GET_32BIT_LSB_FIRST(B[i + p * jm1].data + 4);

			
 
				+                    } else {

			
 
				+                        /*

			
 
				+                         * Data-independent: generate pseudorandom data by

			
 
				+                         * hashing a sequence of preimage blocks that include

			
 
				+                         * all our input parameters, plus the coordinates of

			
 
				+                         * this point in the algorithm (array position and

			
 
				+                         * pass number) to make all the hash outputs distinct.

			
 
				+                         *

			
 
				+                         * The hash we use is G itself, applied twice. So we

			
 
				+                         * generate 1Kb of data at a time, which is enough for

			
 
				+                         * 128 (J1,J2) pairs. Hence we only need to do the

			
 
				+                         * hashing if our index within the segment is a

			
 
				+                         * multiple of 128, or if we're at the very start of

			
 
				+                         * the algorithm (in which case we started at 2 rather

			
 
				+                         * than 0). After that we can just keep picking data

			
 
				+                         * out of our most recent hash output.

			
 
				+                         */

			
 
				+                        if (jpre == jstart || jpre % 128 == 0) {

			
 
				+                            /*

			
 
				+                             * Hash preimage is mostly zeroes, with a

			
 
				+                             * collection of assorted integer values we had

			
 
				+                             * anyway.

			
 
				+                             */

			
 
				+                            memset(in2i.data, 0, sizeof(in2i.data));

			
 
				+                            PUT_64BIT_LSB_FIRST(in2i.data +  0, pass);

			
 
				+                            PUT_64BIT_LSB_FIRST(in2i.data +  8, i);

			
 
				+                            PUT_64BIT_LSB_FIRST(in2i.data + 16, slice);

			
 
				+                            PUT_64BIT_LSB_FIRST(in2i.data + 24, mprime);

			
 
				+                            PUT_64BIT_LSB_FIRST(in2i.data + 32, t);

			
 
				+                            PUT_64BIT_LSB_FIRST(in2i.data + 40, y);

			
 
				+                            PUT_64BIT_LSB_FIRST(in2i.data + 48, jpre / 128 + 1);

			
 
				+

			
 
				+                            /*

			
 
				+                             * Now apply G twice to generate the hash output

			
 
				+                             * in out2i.

			
 
				+                             */

			
 
				+                            memset(tmp2i.data, 0, sizeof(tmp2i.data));

			
 
				+                            G_xor(tmp2i.data, tmp2i.data, in2i.data);

			
 
				+                            memset(out2i.data, 0, sizeof(out2i.data));

			
 
				+                            G_xor(out2i.data, out2i.data, tmp2i.data);

			
 
				+                        }

			
 
				+

			
 
				+                        /*

			
 
				+                         * Extract J1 and J2 from the most recent hash output

			
 
				+                         * (whether we've just computed it or not).

			
 
				+                         */

			
 
				+                        J1 = GET_32BIT_LSB_FIRST(

			
 
				+                            out2i.data + 8 * (jpre % 128));

			
 
				+                        J2 = GET_32BIT_LSB_FIRST(

			
 
				+                            out2i.data + 8 * (jpre % 128) + 4);

			
 
				+                    }

			
 
				+

			
 
				+                    /*

			
 
				+                     * Now convert J1 and J2 into the index of an existing

			
 
				+                     * block of the array to use as input to this step. This

			
 
				+                     * is fairly fiddly.

			
 
				+                     *

			
 
				+                     * The easy part: the y-coordinate of the input block is

			
 
				+                     * obtained by reducing J2 mod p, except that at the very

			
 
				+                     * start of the algorithm (processing the first slice on

			
 
				+                     * the first pass) we simply use the same y-coordinate as

			
 
				+                     * our output block.

			
 
				+                     *

			
 
				+                     * Note that it's safe to use the ordinary % operator

			
 
				+                     * here, without any concern for timing side channels: in

			
 
				+                     * data-independent mode J2 is not correlated to any

			
 
				+                     * secrets, and in data-dependent mode we're going to be

			
 
				+                     * giving away side-channel data _anyway_ when we use it

			
 
				+                     * as an array index (and by assumption we don't care,

			
 
				+                     * because it's already massively randomised from the real

			
 
				+                     * inputs).

			
 
				+                     */

			
 
				+                    { // WINSCP

			
 
				+                    uint32_t index_l = (pass == 0 && slice == 0) ? i : J2 % p;

			
 
				+

			
 
				+                    /*

			
 
				+                     * The hard part: which block in this array row do we use?

			
 
				+                     *

			
 
				+                     * First, we decide what the possible candidates are. This

			
 
				+                     * requires some case analysis, and depends on whether the

			
 
				+                     * array row is the same one we're writing into or not.

			
 
				+                     *

			
 
				+                     * If it's not the same row: we can't use any block from

			
 
				+                     * the current slice (because the segments within a slice

			
 
				+                     * have to be processable in parallel, so in a concurrent

			
 
				+                     * implementation those blocks are potentially in the

			
 
				+                     * process of being overwritten by other threads). But the

			
 
				+                     * other three slices are fair game, except that in the

			
 
				+                     * first pass, slices to the right of us won't have had

			
 
				+                     * any values written into them yet at all.

			
 
				+                     *

			
 
				+                     * If it is the same row, we _are_ allowed to use blocks

			
 
				+                     * from the current slice, but only the ones before our

			
 
				+                     * current position.

			
 
				+                     *

			
 
				+                     * In both cases, we also exclude the individual _column_

			
 
				+                     * just to the left of the current one. (The block

			
 
				+                     * immediately to our left is going to be the _other_

			
 
				+                     * input to G, but the spec also says that we avoid that

			
 
				+                     * column even in a different row.)

			
 
				+                     *

			
 
				+                     * All of this means that we end up choosing from a

			
 
				+                     * cyclically contiguous interval of blocks within this

			
 
				+                     * lane, but the start and end points require some thought

			
 
				+                     * to get them right.

			
 
				+                     */

			
 
				+

			
 
				+                    /* Start position is the beginning of the _next_ slice

			
 
				+                     * (containing data from the previous pass), unless we're

			
 
				+                     * on pass 0, where the start position has to be 0. */

			
 
				+                    uint32_t Wstart = (pass == 0 ? 0 : (slice + 1) % 4 * SL);

			
 
				+

			
 
				+                    /* End position splits up by cases. */

			
 
				+                    uint32_t Wend;

			
 
				+                    if (index_l == i) {

			
 
				+                        /* Same lane as output: we can use anything up to (but

			
 
				+                         * not including) the block immediately left of us. */

			
 
				+                        Wend = jm1;

			
 
				+                    } else {

			
 
				+                        /* Different lane from output: we can use anything up

			
 
				+                         * to the previous slice boundary, or one less than

			
 
				+                         * that if we're at the very left edge of our slice

			
 
				+                         * right now. */

			
 
				+                        Wend = SL * slice;

			
 
				+                        if (jpre == 0)

			
 
				+                            Wend = (Wend + q-1) % q;

			
 
				+                    }

			
 
				+

			
 
				+                    /* Total number of blocks available to choose from */

			
 
				+                    { // WINSCP

			
 
				+                    uint32_t Wsize = (Wend + q - Wstart) % q;

			
 
				+

			
 
				+                    /* Fiddly computation from the spec that chooses from the

			
 
				+                     * available blocks, in a deliberately non-uniform

			
 
				+                     * fashion, using J1 as pseudorandom input data. Output is

			
 
				+                     * zz which is the index within our contiguous interval. */

			
 
				+                    uint32_t x = ((uint64_t)J1 * J1) >> 32;

			
 
				+                    uint32_t y = ((uint64_t)Wsize * x) >> 32;

			
 
				+                    uint32_t zz = Wsize - 1 - y;

			
 
				+

			
 
				+                    /* And index_z is the actual x coordinate of the block we

			
 
				+                     * want. */

			
 
				+                    uint32_t index_z = (Wstart + zz) % q;

			
 
				+

			
 
				+                    /* Phew! Combine that block with the one immediately to

			
 
				+                     * our left, and XOR over the top of whatever is already

			
 
				+                     * in our current output block. */

			
 
				+                    G_xor(B[i + p * j].data, B[i + p * jm1].data,

			
 
				+                          B[index_l + p * index_z].data);

			
 
				+                    } // WINSCP

			
 
				+                    } // WINSCP

			
 
				+                }

			
 
				+            }

			
 
				+

			
 
				+            /* We've finished processing a slice. Reset jstart to 0. It will

			
 
				+             * onily _not_ have been 0 if this was pass 0 slice 0, in which

			
 
				+             * case it still had its initial value of 2 to avoid the starting

			
 
				+             * data. */

			
 
				+            jstart = 0;

			
 
				+            } // WINSCP

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    /*

			
 
				+     * The main output is all done. Final output works by taking the XOR of

			
 
				+     * all the blocks in the rightmost column of the array, and then using

			
 
				+     * that as input to our long hash H'. The output of _that_ is what we

			
 
				+     * deliver to the caller.

			
 
				+     */

			
 
				+

			
 
				+    { // WINSCP

			
 
				+    struct blk C = B[p * (q-1)];

			
 
				+    size_t i; // WINSCP

			
 
				+    for (i = 1; i < p; i++)

			
 
				+        memxor(C.data, C.data, B[i + p * (q-1)].data, 1024);

			
 
				+

			
 
				+    {

			
 
				+        ssh_hash *h = hprime_new(T);

			
 
				+        put_data(h, C.data, 1024);

			
 
				+        hprime_final(h, T, out);

			
 
				+    }

			
 
				+

			
 
				+    /*

			
 
				+     * Clean up.

			
 
				+     */

			
 
				+    smemclr(out2i.data, sizeof(out2i.data));

			
 
				+    smemclr(tmp2i.data, sizeof(tmp2i.data));

			
 
				+    smemclr(in2i.data, sizeof(in2i.data));

			
 
				+    smemclr(C.data, sizeof(C.data));

			
 
				+    smemclr(B, mprime * sizeof(struct blk));

			
 
				+    sfree(B);

			
 
				+    } // WINSCP

			
 
				+    } // WINSCP

			
 
				+    } // WINSCP

			
 
				+    } // WINSCP

			
 
				+}

			
 
				+

			
 
				+/*

			
 
				+ * Wrapper function that appends to a strbuf (which sshpubk.c will want).

			
 
				+ */

			
 
				+void argon2(Argon2Flavour flavour, uint32_t mem, uint32_t passes,

			
 
				+            uint32_t parallel, uint32_t taglen,

			
 
				+            ptrlen P, ptrlen S, ptrlen K, ptrlen X, strbuf *out)

			
 
				+{

			
 
				+    argon2_internal(parallel, taglen, mem, passes, flavour,

			
 
				+                    P, S, K, X, strbuf_append(out, taglen));

			
 
				+}

			
 
				+

			
 
				+/*

			
 
				+ * Wrapper function which dynamically chooses the number of passes to run in

			
 
				+ * order to hit an approximate total amount of CPU time. Writes the result

			
 
				+ * into 'passes'.

			
 
				+ */

			
 
				+void argon2_choose_passes(

			
 
				+    Argon2Flavour flavour, uint32_t mem,

			
 
				+    uint32_t milliseconds, uint32_t *passes,

			
 
				+    uint32_t parallel, uint32_t taglen,

			
 
				+    ptrlen P, ptrlen S, ptrlen K, ptrlen X,

			
 
				+    strbuf *out)

			
 
				+{

			
 
				+    unsigned long desired_time = (TICKSPERSEC * milliseconds) / 1000;

			
 
				+

			
 
				+    /*

			
 
				+     * We only need the time taken to be approximately right, so we

			
 
				+     * scale up the number of passes geometrically, which avoids

			
 
				+     * taking O(t^2) time to find a pass count taking time t.

			
 
				+     *

			
 
				+     * Using the Fibonacci numbers is slightly nicer than the obvious

			
 
				+     * approach of powers of 2, because it's still very easy to

			
 
				+     * compute, and grows less fast (powers of 1.6 instead of 2), so

			
 
				+     * you get just a touch more precision.

			
 
				+     */

			
 
				+    uint32_t a = 1, b = 1;

			
 
				+

			
 
				+    while (true) {

			
 
				+        unsigned long start_time = GETTICKCOUNT();

			
 
				+        argon2(flavour, mem, b, parallel, taglen, P, S, K, X, out);

			
 
				+        { // WINSCP

			
 
				+        unsigned long ticks = GETTICKCOUNT() - start_time;

			
 
				+

			
 
				+        /* But just in case computers get _too_ fast, we have to cap

			
 
				+         * the growth before it gets past the uint32_t upper bound! So

			
 
				+         * if computing a+b would overflow, stop here. */

			
 
				+

			
 
				+        if (ticks >= desired_time || a > (uint32_t)~b) {

			
 
				+            *passes = b;

			
 
				+            return;

			
 
				+        } else {

			
 
				+            strbuf_clear(out);

			
 
				+

			
 
				+            /* Next Fibonacci number: replace (a, b) with (b, a+b) */

			
 
				+            b += a;

			
 
				+            a = b - a;

			
 
				+        }

			
 
				+        } // WINSCP

			
 
				+    }

			
 
				+}

			
--- a/source/putty/sshauxcrypt.c
+++ b/source/putty/sshauxcrypt.c
@@ -11,14 +11,12 @@
 
				 

			
 
				 #include "ssh.h"

			
 
				 

			
 
				-static ssh_cipher *aes256_pubkey_cipher(const void *key)

			
 
				+static ssh_cipher *aes256_pubkey_cipher(const void *key, const void *iv)

			
 
				 {

			
 
				     /*

			
 
				      * PuTTY's own .PPK format for SSH-2 private key files is

			
 
				      * encrypted with 256-bit AES in CBC mode.

			
 
				      */

			
 
				-    char iv[16];

			
 
				-    memset(iv, 0, 16);

			
 
				     { // WINSCP

			
 
				     ssh_cipher *cipher = ssh_cipher_new(&ssh_aes256_cbc);

			
 
				     ssh_cipher_setkey(cipher, key);

			
@@ -27,16 +25,16 @@ static ssh_cipher *aes256_pubkey_cipher(const void *key)
 
				     } // WINSCP

			
 
				 }

			
 
				 

			
 
				-void aes256_encrypt_pubkey(const void *key, void *blk, int len)

			
 
				+void aes256_encrypt_pubkey(const void *key, const void *iv, void *blk, int len)

			
 
				 {

			
 
				-    ssh_cipher *c = aes256_pubkey_cipher(key);

			
 
				+    ssh_cipher *c = aes256_pubkey_cipher(key, iv);

			
 
				     ssh_cipher_encrypt(c, blk, len);

			
 
				     ssh_cipher_free(c);

			
 
				 }

			
 
				 

			
 
				-void aes256_decrypt_pubkey(const void *key, void *blk, int len)

			
 
				+void aes256_decrypt_pubkey(const void *key, const void *iv, void *blk, int len)

			
 
				 {

			
 
				-    ssh_cipher *c = aes256_pubkey_cipher(key);

			
 
				+    ssh_cipher *c = aes256_pubkey_cipher(key, iv);

			
 
				     ssh_cipher_decrypt(c, blk, len);

			
 
				     ssh_cipher_free(c);

			
 
				 }

			
--- a/source/putty/sshblake2.c
+++ b/source/putty/sshblake2.c
@@ -0,0 +1,242 @@
 
				+/*

			
 
				+ * BLAKE2 (RFC 7693) implementation for PuTTY.

			
 
				+ *

			
 
				+ * The BLAKE2 hash family includes BLAKE2s, in which the hash state is

			
 
				+ * operated on as a collection of 32-bit integers, and BLAKE2b, based

			
 
				+ * on 64-bit integers. At present this code implements BLAKE2b only.

			
 
				+ */

			
 
				+

			
 
				+#include <assert.h>

			
 
				+#include "ssh.h"

			
 
				+

			
 
				+static inline uint64_t ror(uint64_t x, unsigned rotation)

			
 
				+{

			
 
				+#pragma option push -w-ngu // WINSCP

			
 
				+    unsigned lshift = 63 & -rotation, rshift = 63 & rotation;

			
 
				+#pragma option pop // WINSCP

			
 
				+    return (x << lshift) | (x >> rshift);

			
 
				+}

			
 
				+

			
 
				+/* RFC 7963 section 2.1 */

			
 
				+enum { R1 = 32, R2 = 24, R3 = 16, R4 = 63 };

			
 
				+

			
 
				+/* RFC 7693 section 2.6 */

			
 
				+static const uint64_t iv[] = {

			
 
				+    // WINSCP (ULL)

			
 
				+    0x6a09e667f3bcc908ULL,                /* floor(2^64 * frac(sqrt(2)))  */

			
 
				+    0xbb67ae8584caa73bULL,                /* floor(2^64 * frac(sqrt(3)))  */

			
 
				+    0x3c6ef372fe94f82bULL,                /* floor(2^64 * frac(sqrt(5)))  */

			
 
				+    0xa54ff53a5f1d36f1ULL,                /* floor(2^64 * frac(sqrt(7)))  */

			
 
				+    0x510e527fade682d1ULL,                /* floor(2^64 * frac(sqrt(11))) */

			
 
				+    0x9b05688c2b3e6c1fULL,                /* floor(2^64 * frac(sqrt(13))) */

			
 
				+    0x1f83d9abfb41bd6bULL,                /* floor(2^64 * frac(sqrt(17))) */

			
 
				+    0x5be0cd19137e2179ULL,                /* floor(2^64 * frac(sqrt(19))) */

			
 
				+};

			
 
				+

			
 
				+/* RFC 7693 section 2.7 */

			
 
				+static const unsigned char sigma[][16] = {

			
 
				+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15},

			
 
				+    {14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3},

			
 
				+    {11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4},

			
 
				+    { 7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8},

			
 
				+    { 9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13},

			
 
				+    { 2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9},

			
 
				+    {12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11},

			
 
				+    {13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10},

			
 
				+    { 6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5},

			
 
				+    {10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13,  0},

			
 
				+    /* This array recycles if you have more than 10 rounds. BLAKE2b

			
 
				+     * has 12, so we repeat the first two rows again. */

			
 
				+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15},

			
 
				+    {14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3},

			
 
				+};

			
 
				+

			
 
				+static inline void g_half(uint64_t v[16], unsigned a, unsigned b, unsigned c,

			
 
				+                          unsigned d, uint64_t x, unsigned r1, unsigned r2)

			
 
				+{

			
 
				+    v[a] += v[b] + x;

			
 
				+    v[d] ^= v[a];

			
 
				+    v[d] = ror(v[d], r1);

			
 
				+    v[c] += v[d];

			
 
				+    v[b] ^= v[c];

			
 
				+    v[b] = ror(v[b], r2);

			
 
				+}

			
 
				+

			
 
				+static inline void g(uint64_t v[16], unsigned a, unsigned b, unsigned c,

			
 
				+                     unsigned d, uint64_t x, uint64_t y)

			
 
				+{

			
 
				+    g_half(v, a, b, c, d, x, R1, R2);

			
 
				+    g_half(v, a, b, c, d, y, R3, R4);

			
 
				+}

			
 
				+

			
 
				+static inline void f(uint64_t h[8], uint64_t m[16], uint64_t offset_hi,

			
 
				+                     uint64_t offset_lo, unsigned final)

			
 
				+{

			
 
				+    uint64_t v[16];

			
 
				+    memcpy(v, h, 8 * sizeof(*v));

			
 
				+    memcpy(v + 8, iv, 8 * sizeof(*v));

			
 
				+    v[12] ^= offset_lo;

			
 
				+    v[13] ^= offset_hi;

			
 
				+    v[14] ^= -(uint64_t)final;

			
 
				+    { // WINSCP

			
 
				+    unsigned round; // WINSCP

			
 
				+    for (round = 0; round < 12; round++) {

			
 
				+        const unsigned char *s = sigma[round];

			
 
				+        g(v,  0,  4,  8, 12, m[s[ 0]], m[s[ 1]]);

			
 
				+        g(v,  1,  5,  9, 13, m[s[ 2]], m[s[ 3]]);

			
 
				+        g(v,  2,  6, 10, 14, m[s[ 4]], m[s[ 5]]);

			
 
				+        g(v,  3,  7, 11, 15, m[s[ 6]], m[s[ 7]]);

			
 
				+        g(v,  0,  5, 10, 15, m[s[ 8]], m[s[ 9]]);

			
 
				+        g(v,  1,  6, 11, 12, m[s[10]], m[s[11]]);

			
 
				+        g(v,  2,  7,  8, 13, m[s[12]], m[s[13]]);

			
 
				+        g(v,  3,  4,  9, 14, m[s[14]], m[s[15]]);

			
 
				+    }

			
 
				+    { // WINSCP

			
 
				+    unsigned i; // WINSCP

			
 
				+    for (i = 0; i < 8; i++)

			
 
				+        h[i] ^= v[i] ^ v[i+8];

			
 
				+    smemclr(v, sizeof(v));

			
 
				+    } // WINSCP

			
 
				+    } // WINSCP

			
 
				+}

			
 
				+

			
 
				+static inline void f_outer(uint64_t h[8], uint8_t blk[128], uint64_t offset_hi,

			
 
				+                           uint64_t offset_lo, unsigned final)

			
 
				+{

			
 
				+    uint64_t m[16];

			
 
				+    unsigned i; // WINSCP

			
 
				+    for (i = 0; i < 16; i++)

			
 
				+        m[i] = GET_64BIT_LSB_FIRST(blk + 8*i);

			
 
				+    f(h, m, offset_hi, offset_lo, final);

			
 
				+    smemclr(m, sizeof(m));

			
 
				+}

			
 
				+

			
 
				+typedef struct blake2b {

			
 
				+    uint64_t h[8];

			
 
				+    unsigned hashlen;

			
 
				+

			
 
				+    uint8_t block[128];

			
 
				+    size_t used;

			
 
				+    uint64_t lenhi, lenlo;

			
 
				+

			
 
				+    BinarySink_IMPLEMENTATION;

			
 
				+    ssh_hash hash;

			
 
				+} blake2b;

			
 
				+

			
 
				+static void blake2b_write(BinarySink *bs, const void *vp, size_t len);

			
 
				+

			
 
				+static ssh_hash *blake2b_new_inner(unsigned hashlen)

			
 
				+{

			
 
				+    assert(hashlen <= ssh_blake2b.hlen);

			
 
				+

			
 
				+    { // WINSCP

			
 
				+    blake2b *s = snew(blake2b);

			
 
				+    s->hash.vt = &ssh_blake2b;

			
 
				+    s->hashlen = hashlen;

			
 
				+    BinarySink_INIT(s, blake2b_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+    return &s->hash;

			
 
				+    } // WINSCP

			
 
				+}

			
 
				+

			
 
				+static ssh_hash *blake2b_new(const ssh_hashalg *alg)

			
 
				+{

			
 
				+    return blake2b_new_inner(alg->hlen);

			
 
				+}

			
 
				+

			
 
				+ssh_hash *blake2b_new_general(unsigned hashlen)

			
 
				+{

			
 
				+    ssh_hash *h = blake2b_new_inner(hashlen);

			
 
				+    ssh_hash_reset(h);

			
 
				+    return h;

			
 
				+}

			
 
				+

			
 
				+static void blake2b_reset(ssh_hash *hash)

			
 
				+{

			
 
				+    blake2b *s = container_of(hash, blake2b, hash);

			
 
				+

			
 
				+    /* Initialise the hash to the standard IV */

			
 
				+    memcpy(s->h, iv, sizeof(s->h));

			
 
				+

			
 
				+    /* XOR in the parameters: secret key length (here always 0) in

			
 
				+     * byte 1, and hash length in byte 0. */

			
 
				+    s->h[0] ^= 0x01010000 ^ s->hashlen;

			
 
				+

			
 
				+    s->used = 0;

			
 
				+    s->lenhi = s->lenlo = 0;

			
 
				+}

			
 
				+

			
 
				+static void blake2b_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    blake2b *copy = container_of(hcopy, blake2b, hash);

			
 
				+    blake2b *orig = container_of(horig, blake2b, hash);

			
 
				+

			
 
				+    memcpy(copy, orig, sizeof(*copy));

			
 
				+    BinarySink_COPIED(copy);

			
 
				+    BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				+}

			
 
				+

			
 
				+static void blake2b_free(ssh_hash *hash)

			
 
				+{

			
 
				+    blake2b *s = container_of(hash, blake2b, hash);

			
 
				+

			
 
				+    smemclr(s, sizeof(*s));

			
 
				+    sfree(s);

			
 
				+}

			
 
				+

			
 
				+static void blake2b_write(BinarySink *bs, const void *vp, size_t len)

			
 
				+{

			
 
				+    blake2b *s = BinarySink_DOWNCAST(bs, blake2b);

			
 
				+    const uint8_t *p = vp;

			
 
				+

			
 
				+    while (len > 0) {

			
 
				+        if (s->used == sizeof(s->block)) {

			
 
				+            f_outer(s->h, s->block, s->lenhi, s->lenlo, 0);

			
 
				+            s->used = 0;

			
 
				+        }

			
 
				+

			
 
				+        { // WINSCP

			
 
				+        size_t chunk = sizeof(s->block) - s->used;

			
 
				+        if (chunk > len)

			
 
				+            chunk = len;

			
 
				+

			
 
				+        memcpy(s->block + s->used, p, chunk);

			
 
				+        s->used += chunk;

			
 
				+        p += chunk;

			
 
				+        len -= chunk;

			
 
				+

			
 
				+        s->lenlo += chunk;

			
 
				+        s->lenhi += (s->lenlo < chunk);

			
 
				+        } // WINSCP

			
 
				+    }

			
 
				+}

			
 
				+

			
 
				+static void blake2b_digest(ssh_hash *hash, uint8_t *digest)

			
 
				+{

			
 
				+    blake2b *s = container_of(hash, blake2b, hash);

			
 
				+

			
 
				+    memset(s->block + s->used, 0, sizeof(s->block) - s->used);

			
 
				+    f_outer(s->h, s->block, s->lenhi, s->lenlo, 1);

			
 
				+

			
 
				+    { // WINSCP

			
 
				+    uint8_t hash_pre[128];

			
 
				+    unsigned i; // WINSCP

			
 
				+    for (i = 0; i < 8; i++)

			
 
				+        PUT_64BIT_LSB_FIRST(hash_pre + 8*i, s->h[i]);

			
 
				+    memcpy(digest, hash_pre, s->hashlen);

			
 
				+    smemclr(hash_pre, sizeof(hash_pre));

			
 
				+    } // WINSCP

			
 
				+}

			
 
				+

			
 
				+const ssh_hashalg ssh_blake2b = {

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ blake2b_new,

			
 
				+    /*.reset =*/ blake2b_reset,

			
 
				+    /*.copyfrom =*/ blake2b_copyfrom,

			
 
				+    /*.digest =*/ blake2b_digest,

			
 
				+    /*.free =*/ blake2b_free,

			
 
				+    /*.hlen =*/ 64,

			
 
				+    /*.blocklen =*/ 128,

			
 
				+    HASHALG_NAMES_BARE("BLAKE2b-64"),

			
 
				+    NULL, // WINSCP

			
 
				+};

			
--- a/source/putty/sshmd5.c
+++ b/source/putty/sshmd5.c
@@ -1,278 +1,256 @@
 
				-#include <assert.h>

			
 
				-#include "ssh.h"

			
 
				-

			
 
				 /*

			
 
				  * MD5 implementation for PuTTY. Written directly from the spec by

			
 
				  * Simon Tatham.

			
 
				  */

			
 
				 

			
 
				-typedef struct {

			
 
				-    uint32_t h[4];

			
 
				-} MD5_Core_State;

			
 
				+#include <assert.h>

			
 
				+#include "ssh.h"

			
 
				 

			
 
				-struct MD5Context {

			
 
				-    MD5_Core_State core;

			
 
				-    unsigned char block[64];

			
 
				-    int blkused;

			
 
				-    uint64_t len;

			
 
				-    BinarySink_IMPLEMENTATION;

			
 
				+static const uint32_t md5_initial_state[] = {

			
 
				+    0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476,

			
 
				 };

			
 
				 

			
 
				-/* ----------------------------------------------------------------------

			
 
				- * Core MD5 algorithm: processes 16-word blocks into a message digest.

			
 
				- */

			
 
				-

			
 
				-#define F(x,y,z) ( ((x) & (y)) | ((~(x)) & (z)) )

			
 
				-#define G(x,y,z) ( ((x) & (z)) | ((~(z)) & (y)) )

			
 
				-#define H(x,y,z) ( (x) ^ (y) ^ (z) )

			
 
				-#define I(x,y,z) ( (y) ^ ( (x) | ~(z) ) )

			
 
				-

			
 
				-#define rol(x,y) ( ((x) << (y)) | (((uint32_t)x) >> (32-y)) )

			
 
				+static const struct md5_round_constant {

			
 
				+    uint32_t addition, rotation, msg_index;

			
 
				+} md5_round_constants[] = {

			
 
				+    { 0xd76aa478,  7,  0 }, { 0xe8c7b756, 12,  1 },

			
 
				+    { 0x242070db, 17,  2 }, { 0xc1bdceee, 22,  3 },

			
 
				+    { 0xf57c0faf,  7,  4 }, { 0x4787c62a, 12,  5 },

			
 
				+    { 0xa8304613, 17,  6 }, { 0xfd469501, 22,  7 },

			
 
				+    { 0x698098d8,  7,  8 }, { 0x8b44f7af, 12,  9 },

			
 
				+    { 0xffff5bb1, 17, 10 }, { 0x895cd7be, 22, 11 },

			
 
				+    { 0x6b901122,  7, 12 }, { 0xfd987193, 12, 13 },

			
 
				+    { 0xa679438e, 17, 14 }, { 0x49b40821, 22, 15 },

			
 
				+    { 0xf61e2562,  5,  1 }, { 0xc040b340,  9,  6 },

			
 
				+    { 0x265e5a51, 14, 11 }, { 0xe9b6c7aa, 20,  0 },

			
 
				+    { 0xd62f105d,  5,  5 }, { 0x02441453,  9, 10 },

			
 
				+    { 0xd8a1e681, 14, 15 }, { 0xe7d3fbc8, 20,  4 },

			
 
				+    { 0x21e1cde6,  5,  9 }, { 0xc33707d6,  9, 14 },

			
 
				+    { 0xf4d50d87, 14,  3 }, { 0x455a14ed, 20,  8 },

			
 
				+    { 0xa9e3e905,  5, 13 }, { 0xfcefa3f8,  9,  2 },

			
 
				+    { 0x676f02d9, 14,  7 }, { 0x8d2a4c8a, 20, 12 },

			
 
				+    { 0xfffa3942,  4,  5 }, { 0x8771f681, 11,  8 },

			
 
				+    { 0x6d9d6122, 16, 11 }, { 0xfde5380c, 23, 14 },

			
 
				+    { 0xa4beea44,  4,  1 }, { 0x4bdecfa9, 11,  4 },

			
 
				+    { 0xf6bb4b60, 16,  7 }, { 0xbebfbc70, 23, 10 },

			
 
				+    { 0x289b7ec6,  4, 13 }, { 0xeaa127fa, 11,  0 },

			
 
				+    { 0xd4ef3085, 16,  3 }, { 0x04881d05, 23,  6 },

			
 
				+    { 0xd9d4d039,  4,  9 }, { 0xe6db99e5, 11, 12 },

			
 
				+    { 0x1fa27cf8, 16, 15 }, { 0xc4ac5665, 23,  2 },

			
 
				+    { 0xf4292244,  6,  0 }, { 0x432aff97, 10,  7 },

			
 
				+    { 0xab9423a7, 15, 14 }, { 0xfc93a039, 21,  5 },

			
 
				+    { 0x655b59c3,  6, 12 }, { 0x8f0ccc92, 10,  3 },

			
 
				+    { 0xffeff47d, 15, 10 }, { 0x85845dd1, 21,  1 },

			
 
				+    { 0x6fa87e4f,  6,  8 }, { 0xfe2ce6e0, 10, 15 },

			
 
				+    { 0xa3014314, 15,  6 }, { 0x4e0811a1, 21, 13 },

			
 
				+    { 0xf7537e82,  6,  4 }, { 0xbd3af235, 10, 11 },

			
 
				+    { 0x2ad7d2bb, 15,  2 }, { 0xeb86d391, 21,  9 },

			
 
				+};

			
 
				 

			
 
				-#define subround(f,w,x,y,z,k,s,ti) \

			
 
				-       w = x + rol(w + f(x,y,z) + block[k] + ti, s)

			
 
				+typedef struct md5_block md5_block;

			
 
				+struct md5_block {

			
 
				+    uint8_t block[64];

			
 
				+    size_t used;

			
 
				+    uint64_t len;

			
 
				+};

			
 
				 

			
 
				-static void MD5_Core_Init(MD5_Core_State * s)

			
 
				+static inline void md5_block_setup(md5_block *blk)

			
 
				 {

			
 
				-    s->h[0] = 0x67452301;

			
 
				-    s->h[1] = 0xefcdab89;

			
 
				-    s->h[2] = 0x98badcfe;

			
 
				-    s->h[3] = 0x10325476;

			
 
				+    blk->used = 0;

			
 
				+    blk->len = 0;

			
 
				 }

			
 
				 

			
 
				-static void MD5_Block(MD5_Core_State *s, uint32_t *block)

			
 
				+static inline bool md5_block_write(

			
 
				+    md5_block *blk, const void **vdata, size_t *len)

			
 
				 {

			
 
				-    uint32_t a, b, c, d;

			
 
				-

			
 
				-    a = s->h[0];

			
 
				-    b = s->h[1];

			
 
				-    c = s->h[2];

			
 
				-    d = s->h[3];

			
 
				-

			
 
				-    subround(F, a, b, c, d, 0, 7, 0xd76aa478);

			
 
				-    subround(F, d, a, b, c, 1, 12, 0xe8c7b756);

			
 
				-    subround(F, c, d, a, b, 2, 17, 0x242070db);

			
 
				-    subround(F, b, c, d, a, 3, 22, 0xc1bdceee);

			
 
				-    subround(F, a, b, c, d, 4, 7, 0xf57c0faf);

			
 
				-    subround(F, d, a, b, c, 5, 12, 0x4787c62a);

			
 
				-    subround(F, c, d, a, b, 6, 17, 0xa8304613);

			
 
				-    subround(F, b, c, d, a, 7, 22, 0xfd469501);

			
 
				-    subround(F, a, b, c, d, 8, 7, 0x698098d8);

			
 
				-    subround(F, d, a, b, c, 9, 12, 0x8b44f7af);

			
 
				-    subround(F, c, d, a, b, 10, 17, 0xffff5bb1);

			
 
				-    subround(F, b, c, d, a, 11, 22, 0x895cd7be);

			
 
				-    subround(F, a, b, c, d, 12, 7, 0x6b901122);

			
 
				-    subround(F, d, a, b, c, 13, 12, 0xfd987193);

			
 
				-    subround(F, c, d, a, b, 14, 17, 0xa679438e);

			
 
				-    subround(F, b, c, d, a, 15, 22, 0x49b40821);

			
 
				-    subround(G, a, b, c, d, 1, 5, 0xf61e2562);

			
 
				-    subround(G, d, a, b, c, 6, 9, 0xc040b340);

			
 
				-    subround(G, c, d, a, b, 11, 14, 0x265e5a51);

			
 
				-    subround(G, b, c, d, a, 0, 20, 0xe9b6c7aa);

			
 
				-    subround(G, a, b, c, d, 5, 5, 0xd62f105d);

			
 
				-    subround(G, d, a, b, c, 10, 9, 0x02441453);

			
 
				-    subround(G, c, d, a, b, 15, 14, 0xd8a1e681);

			
 
				-    subround(G, b, c, d, a, 4, 20, 0xe7d3fbc8);

			
 
				-    subround(G, a, b, c, d, 9, 5, 0x21e1cde6);

			
 
				-    subround(G, d, a, b, c, 14, 9, 0xc33707d6);

			
 
				-    subround(G, c, d, a, b, 3, 14, 0xf4d50d87);

			
 
				-    subround(G, b, c, d, a, 8, 20, 0x455a14ed);

			
 
				-    subround(G, a, b, c, d, 13, 5, 0xa9e3e905);

			
 
				-    subround(G, d, a, b, c, 2, 9, 0xfcefa3f8);

			
 
				-    subround(G, c, d, a, b, 7, 14, 0x676f02d9);

			
 
				-    subround(G, b, c, d, a, 12, 20, 0x8d2a4c8a);

			
 
				-    subround(H, a, b, c, d, 5, 4, 0xfffa3942);

			
 
				-    subround(H, d, a, b, c, 8, 11, 0x8771f681);

			
 
				-    subround(H, c, d, a, b, 11, 16, 0x6d9d6122);

			
 
				-    subround(H, b, c, d, a, 14, 23, 0xfde5380c);

			
 
				-    subround(H, a, b, c, d, 1, 4, 0xa4beea44);

			
 
				-    subround(H, d, a, b, c, 4, 11, 0x4bdecfa9);

			
 
				-    subround(H, c, d, a, b, 7, 16, 0xf6bb4b60);

			
 
				-    subround(H, b, c, d, a, 10, 23, 0xbebfbc70);

			
 
				-    subround(H, a, b, c, d, 13, 4, 0x289b7ec6);

			
 
				-    subround(H, d, a, b, c, 0, 11, 0xeaa127fa);

			
 
				-    subround(H, c, d, a, b, 3, 16, 0xd4ef3085);

			
 
				-    subround(H, b, c, d, a, 6, 23, 0x04881d05);

			
 
				-    subround(H, a, b, c, d, 9, 4, 0xd9d4d039);

			
 
				-    subround(H, d, a, b, c, 12, 11, 0xe6db99e5);

			
 
				-    subround(H, c, d, a, b, 15, 16, 0x1fa27cf8);

			
 
				-    subround(H, b, c, d, a, 2, 23, 0xc4ac5665);

			
 
				-    subround(I, a, b, c, d, 0, 6, 0xf4292244);

			
 
				-    subround(I, d, a, b, c, 7, 10, 0x432aff97);

			
 
				-    subround(I, c, d, a, b, 14, 15, 0xab9423a7);

			
 
				-    subround(I, b, c, d, a, 5, 21, 0xfc93a039);

			
 
				-    subround(I, a, b, c, d, 12, 6, 0x655b59c3);

			
 
				-    subround(I, d, a, b, c, 3, 10, 0x8f0ccc92);

			
 
				-    subround(I, c, d, a, b, 10, 15, 0xffeff47d);

			
 
				-    subround(I, b, c, d, a, 1, 21, 0x85845dd1);

			
 
				-    subround(I, a, b, c, d, 8, 6, 0x6fa87e4f);

			
 
				-    subround(I, d, a, b, c, 15, 10, 0xfe2ce6e0);

			
 
				-    subround(I, c, d, a, b, 6, 15, 0xa3014314);

			
 
				-    subround(I, b, c, d, a, 13, 21, 0x4e0811a1);

			
 
				-    subround(I, a, b, c, d, 4, 6, 0xf7537e82);

			
 
				-    subround(I, d, a, b, c, 11, 10, 0xbd3af235);

			
 
				-    subround(I, c, d, a, b, 2, 15, 0x2ad7d2bb);

			
 
				-    subround(I, b, c, d, a, 9, 21, 0xeb86d391);

			
 
				-

			
 
				-    s->h[0] += a;

			
 
				-    s->h[1] += b;

			
 
				-    s->h[2] += c;

			
 
				-    s->h[3] += d;

			
 
				+    size_t blkleft = sizeof(blk->block) - blk->used;

			
 
				+    size_t chunk = *len < blkleft ? *len : blkleft;

			
 
				+

			
 
				+    const uint8_t *p = *vdata;

			
 
				+    memcpy(blk->block + blk->used, p, chunk);

			
 
				+    *vdata = p + chunk;

			
 
				+    *len -= chunk;

			
 
				+    blk->used += chunk;

			
 
				+    blk->len += chunk;

			
 
				+

			
 
				+    if (blk->used == sizeof(blk->block)) {

			
 
				+        blk->used = 0;

			
 
				+        return true;

			
 
				+    }

			
 
				+

			
 
				+    return false;

			
 
				 }

			
 
				 

			
 
				-/* ----------------------------------------------------------------------

			
 
				- * Outer MD5 algorithm: take an arbitrary length byte string,

			
 
				- * convert it into 16-word blocks with the prescribed padding at

			
 
				- * the end, and pass those blocks to the core MD5 algorithm.

			
 
				- */

			
 
				+static inline void md5_block_pad(md5_block *blk, BinarySink *bs)

			
 
				+{

			
 
				+    uint64_t final_len = blk->len << 3;

			
 
				+    size_t pad = 63 & (55 - blk->used);

			
 
				 

			
 
				-#define BLKSIZE 64

			
 
				+    put_byte(bs, 0x80);

			
 
				+    put_padding(bs, pad, 0);

			
 
				 

			
 
				-static void MD5_BinarySink_write(BinarySink *bs, const void *data, size_t len);

			
 
				+    { // WINSCP

			
 
				+    unsigned char buf[8];

			
 
				+    PUT_64BIT_LSB_FIRST(buf, final_len);

			
 
				+    put_data(bs, buf, 8);

			
 
				+    smemclr(buf, 8);

			
 
				+    } // WINSCP

			
 
				 

			
 
				-void MD5Init(struct MD5Context *s)

			
 
				-{

			
 
				-    MD5_Core_Init(&s->core);

			
 
				-    s->blkused = 0;

			
 
				-    s->len = 0;

			
 
				-    BinarySink_INIT(s, MD5_BinarySink_write);

			
 
				+    assert(blk->used == 0 && "Should have exactly hit a block boundary");

			
 
				 }

			
 
				 

			
 
				-static void MD5_BinarySink_write(BinarySink *bs, const void *data, size_t len)

			
 
				+static inline uint32_t rol(uint32_t x, unsigned y)

			
 
				 {

			
 
				-    struct MD5Context *s = BinarySink_DOWNCAST(bs, struct MD5Context);

			
 
				-    const unsigned char *q = (const unsigned char *)data;

			
 
				-    uint32_t wordblock[16];

			
 
				-    uint32_t lenw = len;

			
 
				-    int i;

			
 
				-

			
 
				-    assert(lenw == len);

			
 
				-

			
 
				-    /*

			
 
				-     * Update the length field.

			
 
				-     */

			
 
				-    s->len += lenw;

			
 
				-

			
 
				-    if (s->blkused + len < BLKSIZE) {

			
 
				-        /*

			
 
				-         * Trivial case: just add to the block.

			
 
				-         */

			
 
				-        memcpy(s->block + s->blkused, q, len);

			
 
				-        s->blkused += len;

			
 
				-    } else {

			
 
				-        /*

			
 
				-         * We must complete and process at least one block.

			
 
				-         */

			
 
				-        while (s->blkused + len >= BLKSIZE) {

			
 
				-            memcpy(s->block + s->blkused, q, BLKSIZE - s->blkused);

			
 
				-            q += BLKSIZE - s->blkused;

			
 
				-            len -= BLKSIZE - s->blkused;

			
 
				-            /* Now process the block. Gather bytes little-endian into words */

			
 
				-            for (i = 0; i < 16; i++) {

			
 
				-                wordblock[i] =

			
 
				-                    (((uint32_t) s->block[i * 4 + 3]) << 24) |

			
 
				-                    (((uint32_t) s->block[i * 4 + 2]) << 16) |

			
 
				-                    (((uint32_t) s->block[i * 4 + 1]) << 8) |

			
 
				-                    (((uint32_t) s->block[i * 4 + 0]) << 0);

			
 
				-            }

			
 
				-            MD5_Block(&s->core, wordblock);

			
 
				-            s->blkused = 0;

			
 
				-        }

			
 
				-#ifdef MPEXT

			
 
				-	if (len > 0)

			
 
				-#endif

			
 
				-        memcpy(s->block, q, len);

			
 
				-        s->blkused = len;

			
 
				-    }

			
 
				+#pragma option push -w-ngu // WINSCP

			
 
				+    return (x << (31 & y)) | (x >> (31 & -y));

			
 
				+#pragma option pop // WINSCP

			
 
				 }

			
 
				 

			
 
				-void MD5Final(unsigned char output[16], struct MD5Context *s)

			
 
				+static inline uint32_t Ch(uint32_t ctrl, uint32_t if1, uint32_t if0)

			
 
				 {

			
 
				-    int i;

			
 
				-    unsigned pad;

			
 
				-    unsigned char c[64];

			
 
				-    uint64_t len;

			
 
				-

			
 
				-    if (s->blkused >= 56)

			
 
				-        pad = 56 + 64 - s->blkused;

			
 
				-    else

			
 
				-        pad = 56 - s->blkused;

			
 
				-

			
 
				-    len = (s->len << 3);

			
 
				-

			
 
				-    memset(c, 0, pad);

			
 
				-    c[0] = 0x80;

			
 
				-    put_data(s, c, pad);

			
 
				-

			
 
				-    PUT_64BIT_LSB_FIRST(c, len);

			
 
				+    return if0 ^ (ctrl & (if1 ^ if0));

			
 
				+}

			
 
				 

			
 
				-    put_data(s, c, 8);

			
 
				+/* Parameter functions for the four MD5 round types */

			
 
				+static inline uint32_t F(uint32_t x, uint32_t y, uint32_t z)

			
 
				+{ return Ch(x, y, z); }

			
 
				+static inline uint32_t G(uint32_t x, uint32_t y, uint32_t z)

			
 
				+{ return Ch(z, x, y); }

			
 
				+static inline uint32_t H(uint32_t x, uint32_t y, uint32_t z)

			
 
				+{ return x ^ y ^ z; }

			
 
				+static inline uint32_t I(uint32_t x, uint32_t y, uint32_t z)

			
 
				+{ return y ^ (x | ~z); }

			
 
				+

			
 
				+static inline void md5_round(

			
 
				+    unsigned round_index, const uint32_t *message,

			
 
				+    uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d,

			
 
				+    uint32_t (*f)(uint32_t, uint32_t, uint32_t))

			
 
				+{

			
 
				+    struct md5_round_constant rc = md5_round_constants[round_index];

			
 
				 

			
 
				-    for (i = 0; i < 4; i++) {

			
 
				-        output[4 * i + 3] = (s->core.h[i] >> 24) & 0xFF;

			
 
				-        output[4 * i + 2] = (s->core.h[i] >> 16) & 0xFF;

			
 
				-        output[4 * i + 1] = (s->core.h[i] >> 8) & 0xFF;

			
 
				-        output[4 * i + 0] = (s->core.h[i] >> 0) & 0xFF;

			
 
				-    }

			
 
				+    *a = *b + rol(*a + f(*b, *c, *d) + message[rc.msg_index] + rc.addition,

			
 
				+                  rc.rotation);

			
 
				 }

			
 
				 

			
 
				-void MD5Simple(void const *p, unsigned len, unsigned char output[16])

			
 
				+static void md5_do_block(uint32_t *core, const uint8_t *block)

			
 
				 {

			
 
				-    struct MD5Context s;

			
 
				+    uint32_t message_words[16];

			
 
				+    size_t i; // WINSCP

			
 
				+    for (i = 0; i < 16; i++)

			
 
				+        message_words[i] = GET_32BIT_LSB_FIRST(block + 4*i);

			
 
				+

			
 
				+    { // WINSCP

			
 
				+    uint32_t a = core[0], b = core[1], c = core[2], d = core[3];

			
 
				+

			
 
				+    size_t t = 0;

			
 
				+    size_t u; // WINSCP

			
 
				+    for (u = 0; u < 4; u++) {

			
 
				+        md5_round(t++, message_words, &a, &b, &c, &d, F);

			
 
				+        md5_round(t++, message_words, &d, &a, &b, &c, F);

			
 
				+        md5_round(t++, message_words, &c, &d, &a, &b, F);

			
 
				+        md5_round(t++, message_words, &b, &c, &d, &a, F);

			
 
				+    }

			
 
				+    for (u = 0; u < 4; u++) {

			
 
				+        md5_round(t++, message_words, &a, &b, &c, &d, G);

			
 
				+        md5_round(t++, message_words, &d, &a, &b, &c, G);

			
 
				+        md5_round(t++, message_words, &c, &d, &a, &b, G);

			
 
				+        md5_round(t++, message_words, &b, &c, &d, &a, G);

			
 
				+    }

			
 
				+    for (u = 0; u < 4; u++) {

			
 
				+        md5_round(t++, message_words, &a, &b, &c, &d, H);

			
 
				+        md5_round(t++, message_words, &d, &a, &b, &c, H);

			
 
				+        md5_round(t++, message_words, &c, &d, &a, &b, H);

			
 
				+        md5_round(t++, message_words, &b, &c, &d, &a, H);

			
 
				+    }

			
 
				+    for (u = 0; u < 4; u++) {

			
 
				+        md5_round(t++, message_words, &a, &b, &c, &d, I);

			
 
				+        md5_round(t++, message_words, &d, &a, &b, &c, I);

			
 
				+        md5_round(t++, message_words, &c, &d, &a, &b, I);

			
 
				+        md5_round(t++, message_words, &b, &c, &d, &a, I);

			
 
				+    }

			
 
				 

			
 
				-    MD5Init(&s);

			
 
				-    put_data(&s, (unsigned char const *)p, len);

			
 
				-    MD5Final(output, &s);

			
 
				-    smemclr(&s, sizeof(s));

			
 
				-}

			
 
				+    core[0] += a;

			
 
				+    core[1] += b;

			
 
				+    core[2] += c;

			
 
				+    core[3] += d;

			
 
				+    } // WINSCP

			
 
				 

			
 
				-/* ----------------------------------------------------------------------

			
 
				- * Thin abstraction for things where hashes are pluggable.

			
 
				- */

			
 
				+    smemclr(message_words, sizeof(message_words));

			
 
				+}

			
 
				 

			
 
				-struct md5_hash {

			
 
				-    struct MD5Context state;

			
 
				+typedef struct md5 {

			
 
				+    uint32_t core[4];

			
 
				+    md5_block blk;

			
 
				+    BinarySink_IMPLEMENTATION;

			
 
				     ssh_hash hash;

			
 
				-};

			
 
				+} md5;

			
 
				+

			
 
				+static void md5_write(BinarySink *bs, const void *vp, size_t len);

			
 
				 

			
 
				 static ssh_hash *md5_new(const ssh_hashalg *alg)

			
 
				 {

			
 
				-    struct md5_hash *h = snew(struct md5_hash);

			
 
				-    MD5Init(&h->state);

			
 
				-    h->hash.vt = alg;

			
 
				-    BinarySink_DELEGATE_INIT(&h->hash, &h->state);

			
 
				-    return &h->hash;

			
 
				+    md5 *s = snew(md5);

			
 
				+

			
 
				+    s->hash.vt = alg;

			
 
				+    BinarySink_INIT(s, md5_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+    return &s->hash;

			
 
				 }

			
 
				 

			
 
				-static ssh_hash *md5_copy(ssh_hash *hashold)

			
 
				+static void md5_reset(ssh_hash *hash)

			
 
				 {

			
 
				-    struct md5_hash *hold, *hnew;

			
 
				-    ssh_hash *hashnew = md5_new(hashold->vt);

			
 
				+    md5 *s = container_of(hash, md5, hash);

			
 
				 

			
 
				-    hold = container_of(hashold, struct md5_hash, hash);

			
 
				-    hnew = container_of(hashnew, struct md5_hash, hash);

			
 
				+    memcpy(s->core, md5_initial_state, sizeof(s->core));

			
 
				+    md5_block_setup(&s->blk);

			
 
				+}

			
 
				 

			
 
				-    hnew->state = hold->state;

			
 
				-    BinarySink_COPIED(&hnew->state);

			
 
				+static void md5_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    md5 *copy = container_of(hcopy, md5, hash);

			
 
				+    md5 *orig = container_of(horig, md5, hash);

			
 
				 

			
 
				-    return hashnew;

			
 
				+    memcpy(copy, orig, sizeof(*copy));

			
 
				+    BinarySink_COPIED(copy);

			
 
				+    BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				 }

			
 
				 

			
 
				 static void md5_free(ssh_hash *hash)

			
 
				 {

			
 
				-    struct md5_hash *h = container_of(hash, struct md5_hash, hash);

			
 
				+    md5 *s = container_of(hash, md5, hash);

			
 
				 

			
 
				-    smemclr(h, sizeof(*h));

			
 
				-    sfree(h);

			
 
				+    smemclr(s, sizeof(*s));

			
 
				+    sfree(s);

			
 
				 }

			
 
				 

			
 
				-static void md5_final(ssh_hash *hash, unsigned char *output)

			
 
				+static void md5_write(BinarySink *bs, const void *vp, size_t len)

			
 
				 {

			
 
				-    struct md5_hash *h = container_of(hash, struct md5_hash, hash);

			
 
				-    MD5Final(output, &h->state);

			
 
				-    md5_free(hash);

			
 
				+    md5 *s = BinarySink_DOWNCAST(bs, md5);

			
 
				+

			
 
				+    while (len > 0)

			
 
				+        if (md5_block_write(&s->blk, &vp, &len))

			
 
				+            md5_do_block(s->core, s->blk.block);

			
 
				+}

			
 
				+

			
 
				+static void md5_digest(ssh_hash *hash, uint8_t *digest)

			
 
				+{

			
 
				+    md5 *s = container_of(hash, md5, hash);

			
 
				+

			
 
				+    size_t i; // WINSCP

			
 
				+    md5_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				+    for (i = 0; i < 4; i++)

			
 
				+        PUT_32BIT_LSB_FIRST(digest + 4*i, s->core[i]);

			
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_md5 = {

			
 
				-    md5_new, md5_copy, md5_final, md5_free, 16, 64, HASHALG_NAMES_BARE("MD5"),

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ md5_new,

			
 
				+    /*.reset =*/ md5_reset,

			
 
				+    /*.copyfrom =*/ md5_copyfrom,

			
 
				+    /*.digest =*/ md5_digest,

			
 
				+    /*.free =*/ md5_free,

			
 
				+    /*.hlen =*/ 16,

			
 
				+    /*.blocklen =*/ 64,

			
 
				+    HASHALG_NAMES_BARE("MD5"),

			
 
				+    NULL, // WINSCP

			
 
				 };

			
--- a/source/putty/sshpubk.c
+++ b/source/putty/sshpubk.c
--- a/source/putty/sshsh256.c
+++ b/source/putty/sshsh256.c
@@ -105,8 +105,16 @@ static ssh_hash *sha256_select(const ssh_hashalg *alg)
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha256 = {

			
 
				-    sha256_select, NULL, NULL, NULL,

			
 
				-    32, 64, HASHALG_NAMES_ANNOTATED("SHA-256", "dummy selector vtable"),

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha256_select,

			
 
				+    NULL,

			
 
				+    NULL,

			
 
				+    NULL,

			
 
				+    NULL,

			
 
				+    /*.hlen =*/ 32,

			
 
				+    /*.blocklen =*/ 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-256", "dummy selector vtable"),

			
 
				+    NULL,

			
 
				 };

			
 
				 

			
 
				 #else

			
@@ -300,26 +308,28 @@ static ssh_hash *sha256_sw_new(const ssh_hashalg *alg)
 
				 {

			
 
				     sha256_sw *s = snew(sha256_sw);

			
 
				 

			
 
				-    memcpy(s->core, sha256_initial_state, sizeof(s->core));

			
 
				-

			
 
				-    sha256_block_setup(&s->blk);

			
 
				-

			
 
				     s->hash.vt = alg;

			
 
				     BinarySink_INIT(s, sha256_sw_write);

			
 
				     BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				     return &s->hash;

			
 
				 }

			
 
				 

			
 
				-static ssh_hash *sha256_sw_copy(ssh_hash *hash)

			
 
				+static void sha256_sw_reset(ssh_hash *hash)

			
 
				 {

			
 
				     sha256_sw *s = container_of(hash, sha256_sw, hash);

			
 
				-    sha256_sw *copy = snew(sha256_sw);

			
 
				 

			
 
				-    memcpy(copy, s, sizeof(*copy));

			
 
				+    memcpy(s->core, sha256_initial_state, sizeof(s->core));

			
 
				+    sha256_block_setup(&s->blk);

			
 
				+}

			
 
				+

			
 
				+static void sha256_sw_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    sha256_sw *copy = container_of(hcopy, sha256_sw, hash);

			
 
				+    sha256_sw *orig = container_of(horig, sha256_sw, hash);

			
 
				+

			
 
				+    memcpy(copy, orig, sizeof(*copy));

			
 
				     BinarySink_COPIED(copy);

			
 
				     BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				-

			
 
				-    return &copy->hash;

			
 
				 }

			
 
				 

			
 
				 static void sha256_sw_free(ssh_hash *hash)

			
@@ -339,7 +349,7 @@ static void sha256_sw_write(BinarySink *bs, const void *vp, size_t len)
 
				             sha256_sw_block(s->core, s->blk.block);

			
 
				 }

			
 
				 

			
 
				-static void sha256_sw_final(ssh_hash *hash, uint8_t *digest)

			
 
				+static void sha256_sw_digest(ssh_hash *hash, uint8_t *digest)

			
 
				 {

			
 
				     sha256_sw *s = container_of(hash, sha256_sw, hash);

			
 
				 

			
@@ -348,13 +358,20 @@ static void sha256_sw_final(ssh_hash *hash, uint8_t *digest)
 
				     size_t i; // WINSCP

			
 
				     for (i = 0; i < 8; i++)

			
 
				         PUT_32BIT_MSB_FIRST(digest + 4*i, s->core[i]);

			
 
				-    sha256_sw_free(hash);

			
 
				     } // WINSCP

			
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha256_sw = {

			
 
				-    sha256_sw_new, sha256_sw_copy, sha256_sw_final, sha256_sw_free,

			
 
				-    32, 64, HASHALG_NAMES_BARE("SHA-256"), // WINSCP (removed "unaccelerated" annotation)

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha256_sw_new,

			
 
				+    /*.reset =*/ sha256_sw_reset,

			
 
				+    /*.copyfrom =*/ sha256_sw_copyfrom,

			
 
				+    /*.digest =*/ sha256_sw_digest,

			
 
				+    /*.free =*/ sha256_sw_free,

			
 
				+    /*.hlen =*/ 32,

			
 
				+    /*.blocklen =*/ 64,

			
 
				+    HASHALG_NAMES_BARE("SHA-256"), // WINSCP (removed "unaccelerated" annotation)

			
 
				+    NULL,

			
 
				 };

			
 
				 #endif // !WINSCP_VS

			
 
				 

			
@@ -632,13 +649,24 @@ static sha256_ni *sha256_ni_alloc(void)
 
				     return s;

			
 
				 }

			
 
				 

			
 
				-FUNC_ISA /*WINSCP static*/ ssh_hash *sha256_ni_new(const ssh_hashalg *alg)

			
 
				+/*WINSCP static*/ ssh_hash *sha256_ni_new(const ssh_hashalg *alg)

			
 
				 {

			
 
				     if (!sha256_hw_available_cached())

			
 
				         return NULL;

			
 
				 

			
 
				     sha256_ni *s = sha256_ni_alloc();

			
 
				 

			
 
				+    s->hash.vt = alg;

			
 
				+    BinarySink_INIT(s, sha256_ni_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+

			
 
				+    return &s->hash;

			
 
				+}

			
 
				+

			
 
				+FUNC_ISA static void sha256_ni_reset(ssh_hash *hash)

			
 
				+{

			
 
				+    sha256_ni *s = container_of(hash, sha256_ni, hash);

			
 
				+

			
 
				     /* Initialise the core vectors in their storage order */

			
 
				     s->core[0] = _mm_set_epi64x(

			
 
				         0x6a09e667bb67ae85ULL, 0x510e527f9b05688cULL);

			
@@ -646,26 +674,19 @@ FUNC_ISA /*WINSCP static*/ ssh_hash *sha256_ni_new(const ssh_hashalg *alg)
 
				         0x3c6ef372a54ff53aULL, 0x1f83d9ab5be0cd19ULL);

			
 
				 

			
 
				     sha256_block_setup(&s->blk);

			
 
				-

			
 
				-    s->hash.vt = alg;

			
 
				-    BinarySink_INIT(s, sha256_ni_write);

			
 
				-    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				-    return &s->hash;

			
 
				 }

			
 
				 

			
 
				-/*WINSCP static*/ ssh_hash *sha256_ni_copy(ssh_hash *hash)

			
 
				+/*WINSCP static*/ void sha256_ni_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				 {

			
 
				-    sha256_ni *s = container_of(hash, sha256_ni, hash);

			
 
				-    sha256_ni *copy = sha256_ni_alloc();

			
 
				+    sha256_ni *copy = container_of(hcopy, sha256_ni, hash);

			
 
				+    sha256_ni *orig = container_of(horig, sha256_ni, hash);

			
 
				 

			
 
				     void *ptf_save = copy->pointer_to_free;

			
 
				-    *copy = *s; /* structure copy */

			
 
				+    *copy = *orig; /* structure copy */

			
 
				     copy->pointer_to_free = ptf_save;

			
 
				 

			
 
				     BinarySink_COPIED(copy);

			
 
				     BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				-

			
 
				-    return &copy->hash;

			
 
				 }

			
 
				 

			
 
				 /*WINSCP static*/ void sha256_ni_free(ssh_hash *hash)

			
@@ -686,7 +707,7 @@ static void sha256_ni_write(BinarySink *bs, const void *vp, size_t len)
 
				             sha256_ni_block(s->core, s->blk.block);

			
 
				 }

			
 
				 

			
 
				-FUNC_ISA /*WINSCP static*/ void sha256_ni_final(ssh_hash *hash, uint8_t *digest)

			
 
				+FUNC_ISA /*WINSCP static*/ void sha256_ni_digest(ssh_hash *hash, uint8_t *digest)

			
 
				 {

			
 
				     sha256_ni *s = container_of(hash, sha256_ni, hash);

			
 
				 

			
@@ -707,8 +728,6 @@ FUNC_ISA /*WINSCP static*/ void sha256_ni_final(ssh_hash *hash, uint8_t *digest)
 
				     __m128i *output = (__m128i *)digest;

			
 
				     _mm_storeu_si128(output, dcba);

			
 
				     _mm_storeu_si128(output+1, hgfe);

			
 
				-

			
 
				-    sha256_ni_free(hash);

			
 
				 }

			
 
				 

			
 
				 #endif // WINSCP_VS

			
@@ -721,8 +740,14 @@ void sha256_ni_final(ssh_hash *hash, uint8_t *digest);
 
				 void sha256_ni_free(ssh_hash *hash);

			
 
				 

			
 
				 const ssh_hashalg ssh_sha256_hw = {

			
 
				-    sha256_ni_new, sha256_ni_copy, sha256_ni_final, sha256_ni_free,

			
 
				-    32, 64, HASHALG_NAMES_ANNOTATED("SHA-256", "SHA-NI accelerated"),

			
 
				+    .new = sha256_ni_new,

			
 
				+    .reset = sha256_ni_reset,

			
 
				+    .copyfrom = sha256_ni_copyfrom,

			
 
				+    .digest = sha256_ni_digest,

			
 
				+    .free = sha256_ni_free,

			
 
				+    .hlen = 32,

			
 
				+    .blocklen = 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-256", "SHA-NI accelerated"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -857,28 +882,31 @@ static ssh_hash *sha256_neon_new(const ssh_hashalg *alg)
 
				 

			
 
				     sha256_neon *s = snew(sha256_neon);

			
 
				 

			
 
				-    s->core.abcd = vld1q_u32(sha256_initial_state);

			
 
				-    s->core.efgh = vld1q_u32(sha256_initial_state + 4);

			
 
				-

			
 
				-    sha256_block_setup(&s->blk);

			
 
				-

			
 
				     s->hash.vt = alg;

			
 
				     BinarySink_INIT(s, sha256_neon_write);

			
 
				     BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				     return &s->hash;

			
 
				 }

			
 
				 

			
 
				-static ssh_hash *sha256_neon_copy(ssh_hash *hash)

			
 
				+static void sha256_neon_reset(ssh_hash *hash)

			
 
				 {

			
 
				     sha256_neon *s = container_of(hash, sha256_neon, hash);

			
 
				-    sha256_neon *copy = snew(sha256_neon);

			
 
				 

			
 
				-    *copy = *s; /* structure copy */

			
 
				+    s->core.abcd = vld1q_u32(sha256_initial_state);

			
 
				+    s->core.efgh = vld1q_u32(sha256_initial_state + 4);

			
 
				+

			
 
				+    sha256_block_setup(&s->blk);

			
 
				+}

			
 
				+

			
 
				+static void sha256_neon_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    sha256_neon *copy = container_of(hcopy, sha256_neon, hash);

			
 
				+    sha256_neon *orig = container_of(horig, sha256_neon, hash);

			
 
				+

			
 
				+    *copy = *orig; /* structure copy */

			
 
				 

			
 
				     BinarySink_COPIED(copy);

			
 
				     BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				-

			
 
				-    return &copy->hash;

			
 
				 }

			
 
				 

			
 
				 static void sha256_neon_free(ssh_hash *hash)

			
@@ -897,19 +925,24 @@ static void sha256_neon_write(BinarySink *bs, const void *vp, size_t len)
 
				             sha256_neon_block(&s->core, s->blk.block);

			
 
				 }

			
 
				 

			
 
				-static void sha256_neon_final(ssh_hash *hash, uint8_t *digest)

			
 
				+static void sha256_neon_digest(ssh_hash *hash, uint8_t *digest)

			
 
				 {

			
 
				     sha256_neon *s = container_of(hash, sha256_neon, hash);

			
 
				 

			
 
				     sha256_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				     vst1q_u8(digest,      vrev32q_u8(vreinterpretq_u8_u32(s->core.abcd)));

			
 
				     vst1q_u8(digest + 16, vrev32q_u8(vreinterpretq_u8_u32(s->core.efgh)));

			
 
				-    sha256_neon_free(hash);

			
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha256_hw = {

			
 
				-    sha256_neon_new, sha256_neon_copy, sha256_neon_final, sha256_neon_free,

			
 
				-    32, 64, HASHALG_NAMES_ANNOTATED("SHA-256", "NEON accelerated"),

			
 
				+    .new = sha256_neon_new,

			
 
				+    .reset = sha256_neon_reset,

			
 
				+    .copyfrom = sha256_neon_copyfrom,

			
 
				+    .digest = sha256_neon_digest,

			
 
				+    .free = sha256_neon_free,

			
 
				+    .hlen = 32,

			
 
				+    .blocklen = 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-256", "NEON accelerated"),

			
 
				 };

			
 
				 

			
 
				 #endif

			
@@ -938,14 +971,22 @@ static ssh_hash *sha256_stub_new(const ssh_hashalg *alg)
 
				 

			
 
				 #define STUB_BODY { unreachable("Should never be called"); }

			
 
				 

			
 
				-static ssh_hash *sha256_stub_copy(ssh_hash *hash) { STUB_BODY; return NULL; }

			
 
				+static void sha256_stub_reset(ssh_hash *hash) STUB_BODY

			
 
				+static void sha256_stub_copyfrom(ssh_hash *hash, ssh_hash *orig) STUB_BODY

			
 
				 static void sha256_stub_free(ssh_hash *hash) STUB_BODY

			
 
				-static void sha256_stub_final(ssh_hash *hash, uint8_t *digest) STUB_BODY

			
 
				+static void sha256_stub_digest(ssh_hash *hash, uint8_t *digest) STUB_BODY

			
 
				 

			
 
				 const ssh_hashalg ssh_sha256_hw = {

			
 
				-    sha256_stub_new, sha256_stub_copy, sha256_stub_final, sha256_stub_free,

			
 
				-    32, 64, HASHALG_NAMES_ANNOTATED(

			
 
				-        "SHA-256", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha256_stub_new,

			
 
				+    /*.reset =*/ sha256_stub_reset,

			
 
				+    /*.copyfrom =*/ sha256_stub_copyfrom,

			
 
				+    /*.digest =*/ sha256_stub_digest,

			
 
				+    /*.free =*/ sha256_stub_free,

			
 
				+    /*.hlen =*/ 32,

			
 
				+    /*.blocklen =*/ 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-256", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				+    NULL,

			
 
				 };

			
 
				 

			
 
				 #endif // !WINSCP_VS

			
--- a/source/putty/sshsh512.c
+++ b/source/putty/sshsh512.c
@@ -9,361 +9,845 @@
 
				 #include <assert.h>

			
 
				 #include "ssh.h"

			
 
				 

			
 
				-#define BLKSIZE 128

			
 
				+/*

			
 
				+ * Start by deciding whether we can support hardware SHA at all.

			
 
				+ */

			
 
				+#define HW_SHA512_NONE 0

			
 
				+#define HW_SHA512_NEON 1

			
 
				+

			
 
				+#ifdef _FORCE_SHA512_NEON

			
 
				+#   define HW_SHA512 HW_SHA512_NEON

			
 
				+#elif defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__

			
 
				+    /* Arm can potentially support both endiannesses, but this code

			
 
				+     * hasn't been tested on anything but little. If anyone wants to

			
 
				+     * run big-endian, they'll need to fix it first. */

			
 
				+#elif defined __ARM_FEATURE_SHA512

			
 
				+    /* If the Arm SHA-512 extension is available already, we can

			
 
				+     * support NEON SHA without having to enable anything by hand */

			
 
				+#   define HW_SHA512 HW_SHA512_NEON

			
 
				+#elif defined(__clang__)

			
 
				+#   if __has_attribute(target) && __has_include(<arm_neon.h>) &&       \

			
 
				+    (defined(__aarch64__))

			
 
				+        /* clang can enable the crypto extension in AArch64 using

			
 
				+         * __attribute__((target)) */

			
 
				+#       define HW_SHA512 HW_SHA512_NEON

			
 
				+#       define USE_CLANG_ATTR_TARGET_AARCH64

			
 
				+#   endif

			
 
				+#endif

			
 
				+

			
 
				+#if defined _FORCE_SOFTWARE_SHA || !defined HW_SHA512

			
 
				+#   undef HW_SHA512

			
 
				+#   define HW_SHA512 HW_SHA512_NONE

			
 
				+#endif

			
 
				 

			
 
				-typedef struct {

			
 
				-    uint64_t h[8];

			
 
				-    unsigned char block[BLKSIZE];

			
 
				-    int blkused;

			
 
				-    uint64_t lenhi, lenlo;

			
 
				-    BinarySink_IMPLEMENTATION;

			
 
				-} SHA512_State;

			
 
				+/*

			
 
				+ * The actual query function that asks if hardware acceleration is

			
 
				+ * available.

			
 
				+ */

			
 
				+static bool sha512_hw_available(void);

			
 
				 

			
 
				 /*

			
 
				- * Arithmetic implementations. Note that AND, XOR and NOT can

			
 
				- * overlap destination with one source, but the others can't.

			
 
				+ * The top-level selection function, caching the results of

			
 
				+ * sha512_hw_available() so it only has to run once.

			
 
				  */

			
 
				-#define add(r,x,y) ( r = (x) + (y) )

			
 
				-#define rorB(r,x,y) ( r = ((x) >> (y)) | ((x) << (64-(y))) )

			
 
				-#define rorL(r,x,y) ( r = ((x) >> (y)) | ((x) << (64-(y))) )

			
 
				-#define shrB(r,x,y) ( r = (x) >> (y) )

			
 
				-#define shrL(r,x,y) ( r = (x) >> (y) )

			
 
				-#define and(r,x,y) ( r = (x) & (y) )

			
 
				-#define xor(r,x,y) ( r = (x) ^ (y) )

			
 
				-#define not(r,x) ( r = ~(x) )

			
 
				-#define INIT(h,l) ((((uint64_t)(h)) << 32) | (l))

			
 
				-#define BUILD(r,h,l) ( r = ((((uint64_t)(h)) << 32) | (l)) )

			
 
				-#define EXTRACT(h,l,r) ( h = (r) >> 32, l = (r) & 0xFFFFFFFFU )

			
 
				+static bool sha512_hw_available_cached(void)

			
 
				+{

			
 
				+    static bool initialised = false;

			
 
				+    static bool hw_available;

			
 
				+    if (!initialised) {

			
 
				+        hw_available = sha512_hw_available();

			
 
				+        initialised = true;

			
 
				+    }

			
 
				+    return hw_available;

			
 
				+}

			
 
				+

			
 
				+struct sha512_select_options {

			
 
				+    const ssh_hashalg *hw, *sw;

			
 
				+};

			
 
				+

			
 
				+static ssh_hash *sha512_select(const ssh_hashalg *alg)

			
 
				+{

			
 
				+    const struct sha512_select_options *options =

			
 
				+        (const struct sha512_select_options *)alg->extra;

			
 
				+

			
 
				+    const ssh_hashalg *real_alg =

			
 
				+        sha512_hw_available_cached() ? options->hw : options->sw;

			
 
				+

			
 
				+    return ssh_hash_new(real_alg);

			
 
				+}

			
 
				+

			
 
				+const struct sha512_select_options ssh_sha512_select_options = {

			
 
				+    &ssh_sha512_hw, &ssh_sha512_sw,

			
 
				+};

			
 
				+const struct sha512_select_options ssh_sha384_select_options = {

			
 
				+    &ssh_sha384_hw, &ssh_sha384_sw,

			
 
				+};

			
 
				+

			
 
				+const ssh_hashalg ssh_sha512 = {

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha512_select,

			
 
				+    NULL, NULL, NULL, NULL, // WINSCP

			
 
				+    /*.hlen =*/ 64,

			
 
				+    /*.blocklen =*/ 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-512", "dummy selector vtable"),

			
 
				+    /*.extra =*/ &ssh_sha512_select_options,

			
 
				+};

			
 
				+

			
 
				+const ssh_hashalg ssh_sha384 = {

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha512_select,

			
 
				+    NULL, NULL, NULL, NULL, // WINSCP

			
 
				+    /*.hlen =*/ 48,

			
 
				+    /*.blocklen =*/ 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-384", "dummy selector vtable"),

			
 
				+    /*.extra =*/ &ssh_sha384_select_options,

			
 
				+};

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
 
				- * Core SHA512 algorithm: processes 16-doubleword blocks into a

			
 
				- * message digest.

			
 
				+ * Definitions likely to be helpful to multiple implementations.

			
 
				  */

			
 
				 

			
 
				-#define Ch(r,t,x,y,z) ( not(t,x), and(r,t,z), and(t,x,y), xor(r,r,t) )

			
 
				-#define Maj(r,t,x,y,z) ( and(r,x,y), and(t,x,z), xor(r,r,t), \

			
 
				-                         and(t,y,z), xor(r,r,t) )

			
 
				-#define bigsigma0(r,t,x) ( rorL(r,x,28), rorB(t,x,34), xor(r,r,t), \

			
 
				-                           rorB(t,x,39), xor(r,r,t) )

			
 
				-#define bigsigma1(r,t,x) ( rorL(r,x,14), rorL(t,x,18), xor(r,r,t), \

			
 
				-                           rorB(t,x,41), xor(r,r,t) )

			
 
				-#define smallsigma0(r,t,x) ( rorL(r,x,1), rorL(t,x,8), xor(r,r,t), \

			
 
				-                             shrL(t,x,7), xor(r,r,t) )

			
 
				-#define smallsigma1(r,t,x) ( rorL(r,x,19), rorB(t,x,61), xor(r,r,t), \

			
 
				-                             shrL(t,x,6), xor(r,r,t) )

			
 
				-

			
 
				-static void SHA512_Core_Init(SHA512_State *s) {

			
 
				-    static const uint64_t iv[] = {

			
 
				-        INIT(0x6a09e667, 0xf3bcc908),

			
 
				-        INIT(0xbb67ae85, 0x84caa73b),

			
 
				-        INIT(0x3c6ef372, 0xfe94f82b),

			
 
				-        INIT(0xa54ff53a, 0x5f1d36f1),

			
 
				-        INIT(0x510e527f, 0xade682d1),

			
 
				-        INIT(0x9b05688c, 0x2b3e6c1f),

			
 
				-        INIT(0x1f83d9ab, 0xfb41bd6b),

			
 
				-        INIT(0x5be0cd19, 0x137e2179),

			
 
				-    };

			
 
				-    int i;

			
 
				-    for (i = 0; i < 8; i++)

			
 
				-        s->h[i] = iv[i];

			
 
				-}

			
 
				-

			
 
				-static void SHA384_Core_Init(SHA512_State *s) {

			
 
				-    static const uint64_t iv[] = {

			
 
				-        INIT(0xcbbb9d5d, 0xc1059ed8),

			
 
				-        INIT(0x629a292a, 0x367cd507),

			
 
				-        INIT(0x9159015a, 0x3070dd17),

			
 
				-        INIT(0x152fecd8, 0xf70e5939),

			
 
				-        INIT(0x67332667, 0xffc00b31),

			
 
				-        INIT(0x8eb44a87, 0x68581511),

			
 
				-        INIT(0xdb0c2e0d, 0x64f98fa7),

			
 
				-        INIT(0x47b5481d, 0xbefa4fa4),

			
 
				-    };

			
 
				-    int i;

			
 
				-    for (i = 0; i < 8; i++)

			
 
				-        s->h[i] = iv[i];

			
 
				-}

			
 
				-

			
 
				-static void SHA512_Block(SHA512_State *s, uint64_t *block) {

			
 
				-    uint64_t w[80];

			
 
				-    uint64_t a,b,c,d,e,f,g,h;

			
 
				-    static const uint64_t k[] = {

			
 
				-        INIT(0x428a2f98, 0xd728ae22), INIT(0x71374491, 0x23ef65cd),

			
 
				-        INIT(0xb5c0fbcf, 0xec4d3b2f), INIT(0xe9b5dba5, 0x8189dbbc),

			
 
				-        INIT(0x3956c25b, 0xf348b538), INIT(0x59f111f1, 0xb605d019),

			
 
				-        INIT(0x923f82a4, 0xaf194f9b), INIT(0xab1c5ed5, 0xda6d8118),

			
 
				-        INIT(0xd807aa98, 0xa3030242), INIT(0x12835b01, 0x45706fbe),

			
 
				-        INIT(0x243185be, 0x4ee4b28c), INIT(0x550c7dc3, 0xd5ffb4e2),

			
 
				-        INIT(0x72be5d74, 0xf27b896f), INIT(0x80deb1fe, 0x3b1696b1),

			
 
				-        INIT(0x9bdc06a7, 0x25c71235), INIT(0xc19bf174, 0xcf692694),

			
 
				-        INIT(0xe49b69c1, 0x9ef14ad2), INIT(0xefbe4786, 0x384f25e3),

			
 
				-        INIT(0x0fc19dc6, 0x8b8cd5b5), INIT(0x240ca1cc, 0x77ac9c65),

			
 
				-        INIT(0x2de92c6f, 0x592b0275), INIT(0x4a7484aa, 0x6ea6e483),

			
 
				-        INIT(0x5cb0a9dc, 0xbd41fbd4), INIT(0x76f988da, 0x831153b5),

			
 
				-        INIT(0x983e5152, 0xee66dfab), INIT(0xa831c66d, 0x2db43210),

			
 
				-        INIT(0xb00327c8, 0x98fb213f), INIT(0xbf597fc7, 0xbeef0ee4),

			
 
				-        INIT(0xc6e00bf3, 0x3da88fc2), INIT(0xd5a79147, 0x930aa725),

			
 
				-        INIT(0x06ca6351, 0xe003826f), INIT(0x14292967, 0x0a0e6e70),

			
 
				-        INIT(0x27b70a85, 0x46d22ffc), INIT(0x2e1b2138, 0x5c26c926),

			
 
				-        INIT(0x4d2c6dfc, 0x5ac42aed), INIT(0x53380d13, 0x9d95b3df),

			
 
				-        INIT(0x650a7354, 0x8baf63de), INIT(0x766a0abb, 0x3c77b2a8),

			
 
				-        INIT(0x81c2c92e, 0x47edaee6), INIT(0x92722c85, 0x1482353b),

			
 
				-        INIT(0xa2bfe8a1, 0x4cf10364), INIT(0xa81a664b, 0xbc423001),

			
 
				-        INIT(0xc24b8b70, 0xd0f89791), INIT(0xc76c51a3, 0x0654be30),

			
 
				-        INIT(0xd192e819, 0xd6ef5218), INIT(0xd6990624, 0x5565a910),

			
 
				-        INIT(0xf40e3585, 0x5771202a), INIT(0x106aa070, 0x32bbd1b8),

			
 
				-        INIT(0x19a4c116, 0xb8d2d0c8), INIT(0x1e376c08, 0x5141ab53),

			
 
				-        INIT(0x2748774c, 0xdf8eeb99), INIT(0x34b0bcb5, 0xe19b48a8),

			
 
				-        INIT(0x391c0cb3, 0xc5c95a63), INIT(0x4ed8aa4a, 0xe3418acb),

			
 
				-        INIT(0x5b9cca4f, 0x7763e373), INIT(0x682e6ff3, 0xd6b2b8a3),

			
 
				-        INIT(0x748f82ee, 0x5defb2fc), INIT(0x78a5636f, 0x43172f60),

			
 
				-        INIT(0x84c87814, 0xa1f0ab72), INIT(0x8cc70208, 0x1a6439ec),

			
 
				-        INIT(0x90befffa, 0x23631e28), INIT(0xa4506ceb, 0xde82bde9),

			
 
				-        INIT(0xbef9a3f7, 0xb2c67915), INIT(0xc67178f2, 0xe372532b),

			
 
				-        INIT(0xca273ece, 0xea26619c), INIT(0xd186b8c7, 0x21c0c207),

			
 
				-        INIT(0xeada7dd6, 0xcde0eb1e), INIT(0xf57d4f7f, 0xee6ed178),

			
 
				-        INIT(0x06f067aa, 0x72176fba), INIT(0x0a637dc5, 0xa2c898a6),

			
 
				-        INIT(0x113f9804, 0xbef90dae), INIT(0x1b710b35, 0x131c471b),

			
 
				-        INIT(0x28db77f5, 0x23047d84), INIT(0x32caab7b, 0x40c72493),

			
 
				-        INIT(0x3c9ebe0a, 0x15c9bebc), INIT(0x431d67c4, 0x9c100d4c),

			
 
				-        INIT(0x4cc5d4be, 0xcb3e42b6), INIT(0x597f299c, 0xfc657e2a),

			
 
				-        INIT(0x5fcb6fab, 0x3ad6faec), INIT(0x6c44198c, 0x4a475817),

			
 
				-    };

			
 
				+static const uint64_t sha512_initial_state[] = {

			
 
				+    0x6a09e667f3bcc908ULL,

			
 
				+    0xbb67ae8584caa73bULL,

			
 
				+    0x3c6ef372fe94f82bULL,

			
 
				+    0xa54ff53a5f1d36f1ULL,

			
 
				+    0x510e527fade682d1ULL,

			
 
				+    0x9b05688c2b3e6c1fULL,

			
 
				+    0x1f83d9abfb41bd6bULL,

			
 
				+    0x5be0cd19137e2179ULL,

			
 
				+};

			
 
				 

			
 
				-    int t;

			
 
				+static const uint64_t sha384_initial_state[] = {

			
 
				+    0xcbbb9d5dc1059ed8ULL,

			
 
				+    0x629a292a367cd507ULL,

			
 
				+    0x9159015a3070dd17ULL,

			
 
				+    0x152fecd8f70e5939ULL,

			
 
				+    0x67332667ffc00b31ULL,

			
 
				+    0x8eb44a8768581511ULL,

			
 
				+    0xdb0c2e0d64f98fa7ULL,

			
 
				+    0x47b5481dbefa4fa4ULL,

			
 
				+};

			
 
				 

			
 
				-    for (t = 0; t < 16; t++)

			
 
				-        w[t] = block[t];

			
 
				-

			
 
				-    for (t = 16; t < 80; t++) {

			
 
				-        uint64_t p, q, r, tmp;

			
 
				-        smallsigma1(p, tmp, w[t-2]);

			
 
				-        smallsigma0(q, tmp, w[t-15]);

			
 
				-        add(r, p, q);

			
 
				-        add(p, r, w[t-7]);

			
 
				-        add(w[t], p, w[t-16]);

			
 
				-    }

			
 
				+static const uint64_t sha512_round_constants[] = {

			
 
				+    0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,

			
 
				+    0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,

			
 
				+    0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,

			
 
				+    0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,

			
 
				+    0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,

			
 
				+    0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,

			
 
				+    0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,

			
 
				+    0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,

			
 
				+    0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,

			
 
				+    0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,

			
 
				+    0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,

			
 
				+    0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,

			
 
				+    0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,

			
 
				+    0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,

			
 
				+    0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,

			
 
				+    0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,

			
 
				+    0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,

			
 
				+    0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,

			
 
				+    0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,

			
 
				+    0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,

			
 
				+    0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,

			
 
				+    0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,

			
 
				+    0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,

			
 
				+    0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,

			
 
				+    0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,

			
 
				+    0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,

			
 
				+    0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,

			
 
				+    0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,

			
 
				+    0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,

			
 
				+    0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,

			
 
				+    0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,

			
 
				+    0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,

			
 
				+    0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,

			
 
				+    0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,

			
 
				+    0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,

			
 
				+    0x113f9804bef90daeULL, 0x1b710b35131c471bULL,

			
 
				+    0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,

			
 
				+    0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,

			
 
				+    0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,

			
 
				+    0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,

			
 
				+};

			
 
				 

			
 
				-    a = s->h[0]; b = s->h[1]; c = s->h[2]; d = s->h[3];

			
 
				-    e = s->h[4]; f = s->h[5]; g = s->h[6]; h = s->h[7];

			
 
				-

			
 
				-    for (t = 0; t < 80; t+=8) {

			
 
				-        uint64_t tmp, p, q, r;

			
 
				-

			
 
				-#define ROUND(j,a,b,c,d,e,f,g,h) \

			
 
				-        bigsigma1(p, tmp, e); \

			
 
				-        Ch(q, tmp, e, f, g); \

			
 
				-        add(r, p, q); \

			
 
				-        add(p, r, k[j]) ; \

			
 
				-        add(q, p, w[j]); \

			
 
				-        add(r, q, h); \

			
 
				-        bigsigma0(p, tmp, a); \

			
 
				-        Maj(tmp, q, a, b, c); \

			
 
				-        add(q, tmp, p); \

			
 
				-        add(p, r, d); \

			
 
				-        d = p; \

			
 
				-        add(h, q, r);

			
 
				-

			
 
				-        ROUND(t+0, a,b,c,d,e,f,g,h);

			
 
				-        ROUND(t+1, h,a,b,c,d,e,f,g);

			
 
				-        ROUND(t+2, g,h,a,b,c,d,e,f);

			
 
				-        ROUND(t+3, f,g,h,a,b,c,d,e);

			
 
				-        ROUND(t+4, e,f,g,h,a,b,c,d);

			
 
				-        ROUND(t+5, d,e,f,g,h,a,b,c);

			
 
				-        ROUND(t+6, c,d,e,f,g,h,a,b);

			
 
				-        ROUND(t+7, b,c,d,e,f,g,h,a);

			
 
				-    }

			
 
				+#define SHA512_ROUNDS 80

			
 
				+

			
 
				+typedef struct sha512_block sha512_block;

			
 
				+struct sha512_block {

			
 
				+    uint8_t block[128];

			
 
				+    size_t used;

			
 
				+    uint64_t lenhi, lenlo;

			
 
				+};

			
 
				+

			
 
				+static inline void sha512_block_setup(sha512_block *blk)

			
 
				+{

			
 
				+    blk->used = 0;

			
 
				+    blk->lenhi = blk->lenlo = 0;

			
 
				+}

			
 
				+

			
 
				+static inline bool sha512_block_write(

			
 
				+    sha512_block *blk, const void **vdata, size_t *len)

			
 
				+{

			
 
				+    size_t blkleft = sizeof(blk->block) - blk->used;

			
 
				+    size_t chunk = *len < blkleft ? *len : blkleft;

			
 
				+

			
 
				+    const uint8_t *p = *vdata;

			
 
				+    memcpy(blk->block + blk->used, p, chunk);

			
 
				+    *vdata = p + chunk;

			
 
				+    *len -= chunk;

			
 
				+    blk->used += chunk;

			
 
				 

			
 
				-    {

			
 
				-        uint64_t tmp;

			
 
				-#define UPDATE(state, local) ( tmp = state, add(state, tmp, local) )

			
 
				-        UPDATE(s->h[0], a); UPDATE(s->h[1], b);

			
 
				-        UPDATE(s->h[2], c); UPDATE(s->h[3], d);

			
 
				-        UPDATE(s->h[4], e); UPDATE(s->h[5], f);

			
 
				-        UPDATE(s->h[6], g); UPDATE(s->h[7], h);

			
 
				+    { // WINSCP

			
 
				+    size_t chunkbits = chunk << 3;

			
 
				+

			
 
				+    blk->lenlo += chunkbits;

			
 
				+    blk->lenhi += (blk->lenlo < chunkbits);

			
 
				+

			
 
				+    if (blk->used == sizeof(blk->block)) {

			
 
				+        blk->used = 0;

			
 
				+        return true;

			
 
				     }

			
 
				+

			
 
				+    return false;

			
 
				+    } // WINSCP

			
 
				+}

			
 
				+

			
 
				+static inline void sha512_block_pad(sha512_block *blk, BinarySink *bs)

			
 
				+{

			
 
				+    uint64_t final_lenhi = blk->lenhi;

			
 
				+    uint64_t final_lenlo = blk->lenlo;

			
 
				+    size_t pad = 127 & (111 - blk->used);

			
 
				+

			
 
				+    put_byte(bs, 0x80);

			
 
				+    put_padding(bs, pad, 0);

			
 
				+    put_uint64(bs, final_lenhi);

			
 
				+    put_uint64(bs, final_lenlo);

			
 
				+

			
 
				+    assert(blk->used == 0 && "Should have exactly hit a block boundary");

			
 
				 }

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
 
				- * Outer SHA512 algorithm: take an arbitrary length byte string,

			
 
				- * convert it into 16-doubleword blocks with the prescribed padding

			
 
				- * at the end, and pass those blocks to the core SHA512 algorithm.

			
 
				+ * Software implementation of SHA-512.

			
 
				  */

			
 
				 

			
 
				-static void SHA512_BinarySink_write(BinarySink *bs,

			
 
				-                                    const void *p, size_t len);

			
 
				+static inline uint64_t ror(uint64_t x, unsigned y)

			
 
				+{

			
 
				+#pragma option push -w-ngu // WINSCP

			
 
				+    return (x << (63 & -y)) | (x >> (63 & y));

			
 
				+#pragma option pop // WINSCP

			
 
				+}

			
 
				 

			
 
				-void SHA512_Init(SHA512_State *s) {

			
 
				-    SHA512_Core_Init(s);

			
 
				-    s->blkused = 0;

			
 
				-    s->lenhi = s->lenlo = 0;

			
 
				-    BinarySink_INIT(s, SHA512_BinarySink_write);

			
 
				+static inline uint64_t Ch(uint64_t ctrl, uint64_t if1, uint64_t if0)

			
 
				+{

			
 
				+    return if0 ^ (ctrl & (if1 ^ if0));

			
 
				 }

			
 
				 

			
 
				-void SHA384_Init(SHA512_State *s) {

			
 
				-    SHA384_Core_Init(s);

			
 
				-    s->blkused = 0;

			
 
				-    s->lenhi = s->lenlo = 0;

			
 
				-    BinarySink_INIT(s, SHA512_BinarySink_write);

			
 
				+static inline uint64_t Maj(uint64_t x, uint64_t y, uint64_t z)

			
 
				+{

			
 
				+    return (x & y) | (z & (x | y));

			
 
				 }

			
 
				 

			
 
				-static void SHA512_BinarySink_write(BinarySink *bs,

			
 
				-                                    const void *p, size_t len)

			
 
				+static inline uint64_t Sigma_0(uint64_t x)

			
 
				 {

			
 
				-    SHA512_State *s = BinarySink_DOWNCAST(bs, SHA512_State);

			
 
				-    unsigned char *q = (unsigned char *)p;

			
 
				-    uint64_t wordblock[16];

			
 
				-    int i;

			
 
				+    return ror(x,28) ^ ror(x,34) ^ ror(x,39);

			
 
				+}

			
 
				 

			
 
				-    /*

			
 
				-     * Update the length field.

			
 
				-     */

			
 
				-    s->lenlo += len;

			
 
				-    s->lenhi += (s->lenlo < len);

			
 
				-

			
 
				-    if (s->blkused && s->blkused+len < BLKSIZE) {

			
 
				-        /*

			
 
				-         * Trivial case: just add to the block.

			
 
				-         */

			
 
				-        memcpy(s->block + s->blkused, q, len);

			
 
				-        s->blkused += len;

			
 
				-    } else {

			
 
				-        /*

			
 
				-         * We must complete and process at least one block.

			
 
				-         */

			
 
				-        while (s->blkused + len >= BLKSIZE) {

			
 
				-            memcpy(s->block + s->blkused, q, BLKSIZE - s->blkused);

			
 
				-            q += BLKSIZE - s->blkused;

			
 
				-            len -= BLKSIZE - s->blkused;

			
 
				-            /* Now process the block. Gather bytes big-endian into words */

			
 
				-            for (i = 0; i < 16; i++)

			
 
				-                wordblock[i] = GET_64BIT_MSB_FIRST(s->block + i*8);

			
 
				-            SHA512_Block(s, wordblock);

			
 
				-            s->blkused = 0;

			
 
				-        }

			
 
				-        memcpy(s->block, q, len);

			
 
				-        s->blkused = len;

			
 
				+static inline uint64_t Sigma_1(uint64_t x)

			
 
				+{

			
 
				+    return ror(x,14) ^ ror(x,18) ^ ror(x,41);

			
 
				+}

			
 
				+

			
 
				+static inline uint64_t sigma_0(uint64_t x)

			
 
				+{

			
 
				+    return ror(x,1) ^ ror(x,8) ^ (x >> 7);

			
 
				+}

			
 
				+

			
 
				+static inline uint64_t sigma_1(uint64_t x)

			
 
				+{

			
 
				+    return ror(x,19) ^ ror(x,61) ^ (x >> 6);

			
 
				+}

			
 
				+

			
 
				+static inline void sha512_sw_round(

			
 
				+    unsigned round_index, const uint64_t *schedule,

			
 
				+    uint64_t *a, uint64_t *b, uint64_t *c, uint64_t *d,

			
 
				+    uint64_t *e, uint64_t *f, uint64_t *g, uint64_t *h)

			
 
				+{

			
 
				+    uint64_t t1 = *h + Sigma_1(*e) + Ch(*e,*f,*g) +

			
 
				+        sha512_round_constants[round_index] + schedule[round_index];

			
 
				+

			
 
				+    uint64_t t2 = Sigma_0(*a) + Maj(*a,*b,*c);

			
 
				+

			
 
				+    *d += t1;

			
 
				+    *h = t1 + t2;

			
 
				+}

			
 
				+

			
 
				+static void sha512_sw_block(uint64_t *core, const uint8_t *block)

			
 
				+{

			
 
				+    uint64_t w[SHA512_ROUNDS];

			
 
				+    uint64_t a,b,c,d,e,f,g,h;

			
 
				+

			
 
				+    int t;

			
 
				+

			
 
				+    for (t = 0; t < 16; t++)

			
 
				+        w[t] = GET_64BIT_MSB_FIRST(block + 8*t);

			
 
				+

			
 
				+    for (t = 16; t < SHA512_ROUNDS; t++)

			
 
				+        w[t] = w[t-16] + w[t-7] + sigma_0(w[t-15]) + sigma_1(w[t-2]);

			
 
				+

			
 
				+    a = core[0]; b = core[1]; c = core[2]; d = core[3];

			
 
				+    e = core[4]; f = core[5]; g = core[6]; h = core[7];

			
 
				+

			
 
				+    for (t = 0; t < SHA512_ROUNDS; t+=8) {

			
 
				+        sha512_sw_round(t+0, w, &a,&b,&c,&d,&e,&f,&g,&h);

			
 
				+        sha512_sw_round(t+1, w, &h,&a,&b,&c,&d,&e,&f,&g);

			
 
				+        sha512_sw_round(t+2, w, &g,&h,&a,&b,&c,&d,&e,&f);

			
 
				+        sha512_sw_round(t+3, w, &f,&g,&h,&a,&b,&c,&d,&e);

			
 
				+        sha512_sw_round(t+4, w, &e,&f,&g,&h,&a,&b,&c,&d);

			
 
				+        sha512_sw_round(t+5, w, &d,&e,&f,&g,&h,&a,&b,&c);

			
 
				+        sha512_sw_round(t+6, w, &c,&d,&e,&f,&g,&h,&a,&b);

			
 
				+        sha512_sw_round(t+7, w, &b,&c,&d,&e,&f,&g,&h,&a);

			
 
				     }

			
 
				+

			
 
				+    core[0] += a; core[1] += b; core[2] += c; core[3] += d;

			
 
				+    core[4] += e; core[5] += f; core[6] += g; core[7] += h;

			
 
				+

			
 
				+    smemclr(w, sizeof(w));

			
 
				 }

			
 
				 

			
 
				-void SHA512_Final(SHA512_State *s, unsigned char *digest) {

			
 
				-    int i;

			
 
				-    int pad;

			
 
				-    unsigned char c[BLKSIZE];

			
 
				-    uint64_t lenhi, lenlo;

			
 
				+typedef struct sha512_sw {

			
 
				+    uint64_t core[8];

			
 
				+    sha512_block blk;

			
 
				+    BinarySink_IMPLEMENTATION;

			
 
				+    ssh_hash hash;

			
 
				+} sha512_sw;

			
 
				 

			
 
				-    if (s->blkused >= BLKSIZE-16)

			
 
				-        pad = (BLKSIZE-16) + BLKSIZE - s->blkused;

			
 
				-    else

			
 
				-        pad = (BLKSIZE-16) - s->blkused;

			
 
				+static void sha512_sw_write(BinarySink *bs, const void *vp, size_t len);

			
 
				 

			
 
				-    lenhi = (s->lenhi << 3) | (s->lenlo >> (32-3));

			
 
				-    lenlo = (s->lenlo << 3);

			
 
				+static ssh_hash *sha512_sw_new(const ssh_hashalg *alg)

			
 
				+{

			
 
				+    sha512_sw *s = snew(sha512_sw);

			
 
				 

			
 
				-    memset(c, 0, pad);

			
 
				-    c[0] = 0x80;

			
 
				-    put_data(s, &c, pad);

			
 
				+    s->hash.vt = alg;

			
 
				+    BinarySink_INIT(s, sha512_sw_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+    return &s->hash;

			
 
				+}

			
 
				 

			
 
				-    put_uint64(s, lenhi);

			
 
				-    put_uint64(s, lenlo);

			
 
				+static void sha512_sw_reset(ssh_hash *hash)

			
 
				+{

			
 
				+    sha512_sw *s = container_of(hash, sha512_sw, hash);

			
 
				 

			
 
				-    for (i = 0; i < 8; i++)

			
 
				-        PUT_64BIT_MSB_FIRST(digest + i*8, s->h[i]);

			
 
				+    /* The 'extra' field in the ssh_hashalg indicates which

			
 
				+     * initialisation vector we're using */

			
 
				+    memcpy(s->core, hash->vt->extra, sizeof(s->core));

			
 
				+    sha512_block_setup(&s->blk);

			
 
				 }

			
 
				 

			
 
				-void SHA384_Final(SHA512_State *s, unsigned char *digest) {

			
 
				-    unsigned char biggerDigest[512 / 8];

			
 
				-    SHA512_Final(s, biggerDigest);

			
 
				-    memcpy(digest, biggerDigest, 384 / 8);

			
 
				+static void sha512_sw_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    sha512_sw *copy = container_of(hcopy, sha512_sw, hash);

			
 
				+    sha512_sw *orig = container_of(horig, sha512_sw, hash);

			
 
				+

			
 
				+    memcpy(copy, orig, sizeof(*copy));

			
 
				+    BinarySink_COPIED(copy);

			
 
				+    BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				 }

			
 
				 

			
 
				-void SHA512_Simple(const void *p, int len, unsigned char *output) {

			
 
				-    SHA512_State s;

			
 
				+static void sha512_sw_free(ssh_hash *hash)

			
 
				+{

			
 
				+    sha512_sw *s = container_of(hash, sha512_sw, hash);

			
 
				 

			
 
				-    SHA512_Init(&s);

			
 
				-    put_data(&s, p, len);

			
 
				-    SHA512_Final(&s, output);

			
 
				-    smemclr(&s, sizeof(s));

			
 
				+    smemclr(s, sizeof(*s));

			
 
				+    sfree(s);

			
 
				 }

			
 
				 

			
 
				-void SHA384_Simple(const void *p, int len, unsigned char *output) {

			
 
				-    SHA512_State s;

			
 
				+static void sha512_sw_write(BinarySink *bs, const void *vp, size_t len)

			
 
				+{

			
 
				+    sha512_sw *s = BinarySink_DOWNCAST(bs, sha512_sw);

			
 
				 

			
 
				-    SHA384_Init(&s);

			
 
				-    put_data(&s, p, len);

			
 
				-    SHA384_Final(&s, output);

			
 
				-    smemclr(&s, sizeof(s));

			
 
				+    while (len > 0)

			
 
				+        if (sha512_block_write(&s->blk, &vp, &len))

			
 
				+            sha512_sw_block(s->core, s->blk.block);

			
 
				 }

			
 
				 

			
 
				+static void sha512_sw_digest(ssh_hash *hash, uint8_t *digest)

			
 
				+{

			
 
				+    sha512_sw *s = container_of(hash, sha512_sw, hash);

			
 
				+

			
 
				+    sha512_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				+    { // WINSCP

			
 
				+    size_t i; // WINSCP

			
 
				+    for (i = 0; i < hash->vt->hlen / 8; i++)

			
 
				+        PUT_64BIT_MSB_FIRST(digest + 8*i, s->core[i]);

			
 
				+    }  // WINSCP

			
 
				+}

			
 
				+

			
 
				+const ssh_hashalg ssh_sha512_sw = {

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha512_sw_new,

			
 
				+    /*.reset =*/ sha512_sw_reset,

			
 
				+    /*.copyfrom =*/ sha512_sw_copyfrom,

			
 
				+    /*.digest =*/ sha512_sw_digest,

			
 
				+    /*.free =*/ sha512_sw_free,

			
 
				+    /*.hlen =*/ 64,

			
 
				+    /*.blocklen =*/ 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-512", "unaccelerated"),

			
 
				+    /*.extra =*/ sha512_initial_state,

			
 
				+};

			
 
				+

			
 
				+const ssh_hashalg ssh_sha384_sw = {

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha512_sw_new,

			
 
				+    /*.reset =*/ sha512_sw_reset,

			
 
				+    /*.copyfrom =*/ sha512_sw_copyfrom,

			
 
				+    /*.digest =*/ sha512_sw_digest,

			
 
				+    /*.free =*/ sha512_sw_free,

			
 
				+    /*.hlen =*/ 48,

			
 
				+    /*.blocklen =*/ 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-384", "unaccelerated"),

			
 
				+    /*.extra =*/ sha384_initial_state,

			
 
				+};

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * Hardware-accelerated implementation of SHA-512 using Arm NEON.

			
 
				+ */

			
 
				+

			
 
				+#if HW_SHA512 == HW_SHA512_NEON

			
 
				+

			
 
				 /*

			
 
				- * Thin abstraction for things where hashes are pluggable.

			
 
				+ * Manually set the target architecture, if we decided above that we

			
 
				+ * need to.

			
 
				  */

			
 
				+#ifdef USE_CLANG_ATTR_TARGET_AARCH64

			
 
				+/*

			
 
				+ * A spot of cheating: redefine some ACLE feature macros before

			
 
				+ * including arm_neon.h. Otherwise we won't get the SHA intrinsics

			
 
				+ * defined by that header, because it will be looking at the settings

			
 
				+ * for the whole translation unit rather than the ones we're going to

			
 
				+ * put on some particular functions using __attribute__((target)).

			
 
				+ */

			
 
				+#define __ARM_NEON 1

			
 
				+#define __ARM_FEATURE_CRYPTO 1

			
 
				+#define FUNC_ISA __attribute__ ((target("neon,sha3")))

			
 
				+#endif /* USE_CLANG_ATTR_TARGET_AARCH64 */

			
 
				+

			
 
				+#ifndef FUNC_ISA

			
 
				+#define FUNC_ISA

			
 
				+#endif

			
 
				+

			
 
				+#ifdef USE_ARM64_NEON_H

			
 
				+#include <arm64_neon.h>

			
 
				+#else

			
 
				+#include <arm_neon.h>

			
 
				+#endif

			
 
				+

			
 
				+static bool sha512_hw_available(void)

			
 
				+{

			
 
				+    /*

			
 
				+     * For Arm, we delegate to a per-platform detection function (see

			
 
				+     * explanation in sshaes.c).

			
 
				+     */

			
 
				+    return platform_sha512_hw_available();

			
 
				+}

			
 
				 

			
 
				-struct sha512_hash {

			
 
				-    SHA512_State state;

			
 
				-    ssh_hash hash;

			
 
				+#if defined __clang__

			
 
				+/*

			
 
				+ * As of 2020-12-24, I've found that clang doesn't provide the SHA-512

			
 
				+ * NEON intrinsics. So I define my own set using inline assembler, and

			
 
				+ * use #define to effectively rename them over the top of the standard

			
 
				+ * names.

			
 
				+ *

			
 
				+ * The aim of that #define technique is that it should avoid a build

			
 
				+ * failure if these intrinsics _are_ defined in <arm_neon.h>.

			
 
				+ * Obviously it would be better in that situation to switch back to

			
 
				+ * using the real intrinsics, but until I see a version of clang that

			
 
				+ * supports them, I won't know what version number to test in the

			
 
				+ * ifdef.

			
 
				+ */

			
 
				+static inline FUNC_ISA

			
 
				+uint64x2_t vsha512su0q_u64_asm(uint64x2_t x, uint64x2_t y) {

			
 
				+    __asm__("sha512su0 %0.2D,%1.2D" : "+w" (x) : "w" (y));

			
 
				+    return x;

			
 
				+}

			
 
				+static inline FUNC_ISA

			
 
				+uint64x2_t vsha512su1q_u64_asm(uint64x2_t x, uint64x2_t y, uint64x2_t z) {

			
 
				+    __asm__("sha512su1 %0.2D,%1.2D,%2.2D" : "+w" (x) : "w" (y), "w" (z));

			
 
				+    return x;

			
 
				+}

			
 
				+static inline FUNC_ISA

			
 
				+uint64x2_t vsha512hq_u64_asm(uint64x2_t x, uint64x2_t y, uint64x2_t z) {

			
 
				+    __asm__("sha512h %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z));

			
 
				+    return x;

			
 
				+}

			
 
				+static inline FUNC_ISA

			
 
				+uint64x2_t vsha512h2q_u64_asm(uint64x2_t x, uint64x2_t y, uint64x2_t z) {

			
 
				+    __asm__("sha512h2 %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z));

			
 
				+    return x;

			
 
				+}

			
 
				+#undef vsha512su0q_u64

			
 
				+#define vsha512su0q_u64 vsha512su0q_u64_asm

			
 
				+#undef vsha512su1q_u64

			
 
				+#define vsha512su1q_u64 vsha512su1q_u64_asm

			
 
				+#undef vsha512hq_u64

			
 
				+#define vsha512hq_u64 vsha512hq_u64_asm

			
 
				+#undef vsha512h2q_u64

			
 
				+#define vsha512h2q_u64 vsha512h2q_u64_asm

			
 
				+#endif /* defined __clang__ */

			
 
				+

			
 
				+typedef struct sha512_neon_core sha512_neon_core;

			
 
				+struct sha512_neon_core {

			
 
				+    uint64x2_t ab, cd, ef, gh;

			
 
				 };

			
 
				 

			
 
				-static ssh_hash *sha512_new(const ssh_hashalg *alg)

			
 
				+FUNC_ISA

			
 
				+static inline uint64x2_t sha512_neon_load_input(const uint8_t *p)

			
 
				 {

			
 
				-    struct sha512_hash *h = snew(struct sha512_hash);

			
 
				-    SHA512_Init(&h->state);

			
 
				-    h->hash.vt = alg;

			
 
				-    BinarySink_DELEGATE_INIT(&h->hash, &h->state);

			
 
				-    return &h->hash;

			
 
				+    return vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(p)));

			
 
				 }

			
 
				 

			
 
				-static ssh_hash *sha512_copy(ssh_hash *hashold)

			
 
				+FUNC_ISA

			
 
				+static inline uint64x2_t sha512_neon_schedule_update(

			
 
				+    uint64x2_t m8, uint64x2_t m7, uint64x2_t m4, uint64x2_t m3, uint64x2_t m1)

			
 
				 {

			
 
				-    struct sha512_hash *hold, *hnew;

			
 
				-    ssh_hash *hashnew = sha512_new(hashold->vt);

			
 
				+    /*

			
 
				+     * vsha512su0q_u64() takes words from a long way back in the

			
 
				+     * schedule and performs the sigma_0 half of the computation of

			
 
				+     * the next two 64-bit message-schedule words.

			
 
				+     *

			
 
				+     * vsha512su1q_u64() combines the result of that with the sigma_1

			
 
				+     * steps, to output the finished version of those two words. The

			
 
				+     * total amount of input data it requires fits nicely into three

			
 
				+     * 128-bit vector registers, but one of those registers is

			
 
				+     * misaligned compared to the 128-bit chunks that the message

			
 
				+     * schedule is stored in. So we use vextq_u64 to make one of its

			
 
				+     * input words out of the second half of m4 and the first half of

			
 
				+     * m3.

			
 
				+     */

			
 
				+    return vsha512su1q_u64(vsha512su0q_u64(m8, m7), m1, vextq_u64(m4, m3, 1));

			
 
				+}

			
 
				+

			
 
				+FUNC_ISA

			
 
				+static inline void sha512_neon_round2(

			
 
				+    unsigned round_index, uint64x2_t schedule_words,

			
 
				+    uint64x2_t *ab, uint64x2_t *cd, uint64x2_t *ef, uint64x2_t *gh)

			
 
				+{

			
 
				+    /*

			
 
				+     * vsha512hq_u64 performs the Sigma_1 and Ch half of the

			
 
				+     * computation of two rounds of SHA-512 (including feeding back

			
 
				+     * one of the outputs from the first of those half-rounds into the

			
 
				+     * second one).

			
 
				+     *

			
 
				+     * vsha512h2q_u64 combines the result of that with the Sigma_0 and

			
 
				+     * Maj steps, and outputs one 128-bit vector that replaces the gh

			
 
				+     * piece of the input hash state, and a second that updates cd by

			
 
				+     * addition.

			
 
				+     *

			
 
				+     * Similarly to vsha512su1q_u64 above, some of the input registers

			
 
				+     * expected by these instructions are misaligned by 64 bits

			
 
				+     * relative to the chunks we've divided the hash state into, so we

			
 
				+     * have to start by making 'de' and 'fg' words out of our input

			
 
				+     * cd,ef,gh, using vextq_u64.

			
 
				+     *

			
 
				+     * Also, one of the inputs to vsha512hq_u64 is expected to contain

			
 
				+     * the results of summing gh + two round constants + two words of

			
 
				+     * message schedule, but the two words of the message schedule

			
 
				+     * have to be the opposite way round in the vector register from

			
 
				+     * the way that vsha512su1q_u64 output them. Hence, there's

			
 
				+     * another vextq_u64 in here that swaps the two halves of the

			
 
				+     * initial_sum vector register.

			
 
				+     *

			
 
				+     * (This also means that I don't have to prepare a specially

			
 
				+     * reordered version of the sha512_round_constants[] array: as

			
 
				+     * long as I'm unavoidably doing a swap at run time _anyway_, I

			
 
				+     * can load from the normally ordered version of that array, and

			
 
				+     * just take care to fold in that data _before_ the swap rather

			
 
				+     * than after.)

			
 
				+     */

			
 
				 

			
 
				-    hold = container_of(hashold, struct sha512_hash, hash);

			
 
				-    hnew = container_of(hashnew, struct sha512_hash, hash);

			
 
				+    /* Load two round constants, with the first one in the low half */

			
 
				+    uint64x2_t round_constants = vld1q_u64(

			
 
				+        sha512_round_constants + round_index);

			
 
				 

			
 
				-    hnew->state = hold->state;

			
 
				-    BinarySink_COPIED(&hnew->state);

			
 
				+    /* Add schedule words to round constants */

			
 
				+    uint64x2_t initial_sum = vaddq_u64(schedule_words, round_constants);

			
 
				 

			
 
				-    return hashnew;

			
 
				+    /* Swap that sum around so the word used in the first of the two

			
 
				+     * rounds is in the _high_ half of the vector, matching where h

			
 
				+     * lives in the gh vector */

			
 
				+    uint64x2_t swapped_initial_sum = vextq_u64(initial_sum, initial_sum, 1);

			
 
				+

			
 
				+    /* Add gh to that, now that they're matching ways round */

			
 
				+    uint64x2_t sum = vaddq_u64(swapped_initial_sum, *gh);

			
 
				+

			
 
				+    /* Make the misaligned de and fg words */

			
 
				+    uint64x2_t de = vextq_u64(*cd, *ef, 1);

			
 
				+    uint64x2_t fg = vextq_u64(*ef, *gh, 1);

			
 
				+

			
 
				+    /* Now we're ready to put all the pieces together. The output from

			
 
				+     * vsha512h2q_u64 can be used directly as the new gh, and the

			
 
				+     * output from vsha512hq_u64 is simultaneously the intermediate

			
 
				+     * value passed to h2 and the thing you have to add on to cd. */

			
 
				+    uint64x2_t intermed = vsha512hq_u64(sum, fg, de);

			
 
				+    *gh = vsha512h2q_u64(intermed, *cd, *ab);

			
 
				+    *cd = vaddq_u64(*cd, intermed);

			
 
				 }

			
 
				 

			
 
				-static void sha512_free(ssh_hash *hash)

			
 
				+FUNC_ISA

			
 
				+static inline void sha512_neon_block(sha512_neon_core *core, const uint8_t *p)

			
 
				 {

			
 
				-    struct sha512_hash *h = container_of(hash, struct sha512_hash, hash);

			
 
				+    uint64x2_t s0, s1, s2, s3, s4, s5, s6, s7;

			
 
				+

			
 
				+    uint64x2_t ab = core->ab, cd = core->cd, ef = core->ef, gh = core->gh;

			
 
				+

			
 
				+    s0 = sha512_neon_load_input(p + 16*0);

			
 
				+    sha512_neon_round2(0, s0, &ab, &cd, &ef, &gh);

			
 
				+    s1 = sha512_neon_load_input(p + 16*1);

			
 
				+    sha512_neon_round2(2, s1, &gh, &ab, &cd, &ef);

			
 
				+    s2 = sha512_neon_load_input(p + 16*2);

			
 
				+    sha512_neon_round2(4, s2, &ef, &gh, &ab, &cd);

			
 
				+    s3 = sha512_neon_load_input(p + 16*3);

			
 
				+    sha512_neon_round2(6, s3, &cd, &ef, &gh, &ab);

			
 
				+    s4 = sha512_neon_load_input(p + 16*4);

			
 
				+    sha512_neon_round2(8, s4, &ab, &cd, &ef, &gh);

			
 
				+    s5 = sha512_neon_load_input(p + 16*5);

			
 
				+    sha512_neon_round2(10, s5, &gh, &ab, &cd, &ef);

			
 
				+    s6 = sha512_neon_load_input(p + 16*6);

			
 
				+    sha512_neon_round2(12, s6, &ef, &gh, &ab, &cd);

			
 
				+    s7 = sha512_neon_load_input(p + 16*7);

			
 
				+    sha512_neon_round2(14, s7, &cd, &ef, &gh, &ab);

			
 
				+    s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7);

			
 
				+    sha512_neon_round2(16, s0, &ab, &cd, &ef, &gh);

			
 
				+    s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0);

			
 
				+    sha512_neon_round2(18, s1, &gh, &ab, &cd, &ef);

			
 
				+    s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1);

			
 
				+    sha512_neon_round2(20, s2, &ef, &gh, &ab, &cd);

			
 
				+    s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2);

			
 
				+    sha512_neon_round2(22, s3, &cd, &ef, &gh, &ab);

			
 
				+    s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3);

			
 
				+    sha512_neon_round2(24, s4, &ab, &cd, &ef, &gh);

			
 
				+    s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4);

			
 
				+    sha512_neon_round2(26, s5, &gh, &ab, &cd, &ef);

			
 
				+    s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5);

			
 
				+    sha512_neon_round2(28, s6, &ef, &gh, &ab, &cd);

			
 
				+    s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6);

			
 
				+    sha512_neon_round2(30, s7, &cd, &ef, &gh, &ab);

			
 
				+    s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7);

			
 
				+    sha512_neon_round2(32, s0, &ab, &cd, &ef, &gh);

			
 
				+    s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0);

			
 
				+    sha512_neon_round2(34, s1, &gh, &ab, &cd, &ef);

			
 
				+    s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1);

			
 
				+    sha512_neon_round2(36, s2, &ef, &gh, &ab, &cd);

			
 
				+    s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2);

			
 
				+    sha512_neon_round2(38, s3, &cd, &ef, &gh, &ab);

			
 
				+    s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3);

			
 
				+    sha512_neon_round2(40, s4, &ab, &cd, &ef, &gh);

			
 
				+    s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4);

			
 
				+    sha512_neon_round2(42, s5, &gh, &ab, &cd, &ef);

			
 
				+    s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5);

			
 
				+    sha512_neon_round2(44, s6, &ef, &gh, &ab, &cd);

			
 
				+    s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6);

			
 
				+    sha512_neon_round2(46, s7, &cd, &ef, &gh, &ab);

			
 
				+    s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7);

			
 
				+    sha512_neon_round2(48, s0, &ab, &cd, &ef, &gh);

			
 
				+    s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0);

			
 
				+    sha512_neon_round2(50, s1, &gh, &ab, &cd, &ef);

			
 
				+    s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1);

			
 
				+    sha512_neon_round2(52, s2, &ef, &gh, &ab, &cd);

			
 
				+    s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2);

			
 
				+    sha512_neon_round2(54, s3, &cd, &ef, &gh, &ab);

			
 
				+    s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3);

			
 
				+    sha512_neon_round2(56, s4, &ab, &cd, &ef, &gh);

			
 
				+    s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4);

			
 
				+    sha512_neon_round2(58, s5, &gh, &ab, &cd, &ef);

			
 
				+    s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5);

			
 
				+    sha512_neon_round2(60, s6, &ef, &gh, &ab, &cd);

			
 
				+    s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6);

			
 
				+    sha512_neon_round2(62, s7, &cd, &ef, &gh, &ab);

			
 
				+    s0 = sha512_neon_schedule_update(s0, s1, s4, s5, s7);

			
 
				+    sha512_neon_round2(64, s0, &ab, &cd, &ef, &gh);

			
 
				+    s1 = sha512_neon_schedule_update(s1, s2, s5, s6, s0);

			
 
				+    sha512_neon_round2(66, s1, &gh, &ab, &cd, &ef);

			
 
				+    s2 = sha512_neon_schedule_update(s2, s3, s6, s7, s1);

			
 
				+    sha512_neon_round2(68, s2, &ef, &gh, &ab, &cd);

			
 
				+    s3 = sha512_neon_schedule_update(s3, s4, s7, s0, s2);

			
 
				+    sha512_neon_round2(70, s3, &cd, &ef, &gh, &ab);

			
 
				+    s4 = sha512_neon_schedule_update(s4, s5, s0, s1, s3);

			
 
				+    sha512_neon_round2(72, s4, &ab, &cd, &ef, &gh);

			
 
				+    s5 = sha512_neon_schedule_update(s5, s6, s1, s2, s4);

			
 
				+    sha512_neon_round2(74, s5, &gh, &ab, &cd, &ef);

			
 
				+    s6 = sha512_neon_schedule_update(s6, s7, s2, s3, s5);

			
 
				+    sha512_neon_round2(76, s6, &ef, &gh, &ab, &cd);

			
 
				+    s7 = sha512_neon_schedule_update(s7, s0, s3, s4, s6);

			
 
				+    sha512_neon_round2(78, s7, &cd, &ef, &gh, &ab);

			
 
				+

			
 
				+    core->ab = vaddq_u64(core->ab, ab);

			
 
				+    core->cd = vaddq_u64(core->cd, cd);

			
 
				+    core->ef = vaddq_u64(core->ef, ef);

			
 
				+    core->gh = vaddq_u64(core->gh, gh);

			
 
				+}

			
 
				 

			
 
				-    smemclr(h, sizeof(*h));

			
 
				-    sfree(h);

			
 
				+typedef struct sha512_neon {

			
 
				+    sha512_neon_core core;

			
 
				+    sha512_block blk;

			
 
				+    BinarySink_IMPLEMENTATION;

			
 
				+    ssh_hash hash;

			
 
				+} sha512_neon;

			
 
				+

			
 
				+static void sha512_neon_write(BinarySink *bs, const void *vp, size_t len);

			
 
				+

			
 
				+static ssh_hash *sha512_neon_new(const ssh_hashalg *alg)

			
 
				+{

			
 
				+    if (!sha512_hw_available_cached())

			
 
				+        return NULL;

			
 
				+

			
 
				+    sha512_neon *s = snew(sha512_neon);

			
 
				+

			
 
				+    s->hash.vt = alg;

			
 
				+    BinarySink_INIT(s, sha512_neon_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+    return &s->hash;

			
 
				 }

			
 
				 

			
 
				-static void sha512_final(ssh_hash *hash, unsigned char *output)

			
 
				+static void sha512_neon_reset(ssh_hash *hash)

			
 
				 {

			
 
				-    struct sha512_hash *h = container_of(hash, struct sha512_hash, hash);

			
 
				-    SHA512_Final(&h->state, output);

			
 
				-    sha512_free(hash);

			
 
				+    sha512_neon *s = container_of(hash, sha512_neon, hash);

			
 
				+    const uint64_t *iv = (const uint64_t *)hash->vt->extra;

			
 
				+

			
 
				+    s->core.ab = vld1q_u64(iv);

			
 
				+    s->core.cd = vld1q_u64(iv+2);

			
 
				+    s->core.ef = vld1q_u64(iv+4);

			
 
				+    s->core.gh = vld1q_u64(iv+6);

			
 
				+

			
 
				+    sha512_block_setup(&s->blk);

			
 
				 }

			
 
				 

			
 
				-const ssh_hashalg ssh_sha512 = {

			
 
				-    sha512_new, sha512_copy, sha512_final, sha512_free,

			
 
				-    64, BLKSIZE, HASHALG_NAMES_BARE("SHA-512"),

			
 
				+static void sha512_neon_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    sha512_neon *copy = container_of(hcopy, sha512_neon, hash);

			
 
				+    sha512_neon *orig = container_of(horig, sha512_neon, hash);

			
 
				+

			
 
				+    *copy = *orig; /* structure copy */

			
 
				+

			
 
				+    BinarySink_COPIED(copy);

			
 
				+    BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				+}

			
 
				+

			
 
				+static void sha512_neon_free(ssh_hash *hash)

			
 
				+{

			
 
				+    sha512_neon *s = container_of(hash, sha512_neon, hash);

			
 
				+    smemclr(s, sizeof(*s));

			
 
				+    sfree(s);

			
 
				+}

			
 
				+

			
 
				+static void sha512_neon_write(BinarySink *bs, const void *vp, size_t len)

			
 
				+{

			
 
				+    sha512_neon *s = BinarySink_DOWNCAST(bs, sha512_neon);

			
 
				+

			
 
				+    while (len > 0)

			
 
				+        if (sha512_block_write(&s->blk, &vp, &len))

			
 
				+            sha512_neon_block(&s->core, s->blk.block);

			
 
				+}

			
 
				+

			
 
				+static void sha512_neon_digest(ssh_hash *hash, uint8_t *digest)

			
 
				+{

			
 
				+    sha512_neon *s = container_of(hash, sha512_neon, hash);

			
 
				+

			
 
				+    sha512_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				+

			
 
				+    vst1q_u8(digest,    vrev64q_u8(vreinterpretq_u8_u64(s->core.ab)));

			
 
				+    vst1q_u8(digest+16, vrev64q_u8(vreinterpretq_u8_u64(s->core.cd)));

			
 
				+    vst1q_u8(digest+32, vrev64q_u8(vreinterpretq_u8_u64(s->core.ef)));

			
 
				+    vst1q_u8(digest+48, vrev64q_u8(vreinterpretq_u8_u64(s->core.gh)));

			
 
				+}

			
 
				+

			
 
				+static void sha384_neon_digest(ssh_hash *hash, uint8_t *digest)

			
 
				+{

			
 
				+    sha512_neon *s = container_of(hash, sha512_neon, hash);

			
 
				+

			
 
				+    sha512_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				+

			
 
				+    vst1q_u8(digest,    vrev64q_u8(vreinterpretq_u8_u64(s->core.ab)));

			
 
				+    vst1q_u8(digest+16, vrev64q_u8(vreinterpretq_u8_u64(s->core.cd)));

			
 
				+    vst1q_u8(digest+32, vrev64q_u8(vreinterpretq_u8_u64(s->core.ef)));

			
 
				+}

			
 
				+

			
 
				+const ssh_hashalg ssh_sha512_hw = {

			
 
				+    .new = sha512_neon_new,

			
 
				+    .reset = sha512_neon_reset,

			
 
				+    .copyfrom = sha512_neon_copyfrom,

			
 
				+    .digest = sha512_neon_digest,

			
 
				+    .free = sha512_neon_free,

			
 
				+    .hlen = 64,

			
 
				+    .blocklen = 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-512", "NEON accelerated"),

			
 
				+    .extra = sha512_initial_state,

			
 
				 };

			
 
				 

			
 
				-static ssh_hash *sha384_new(const ssh_hashalg *alg)

			
 
				+const ssh_hashalg ssh_sha384_hw = {

			
 
				+    .new = sha512_neon_new,

			
 
				+    .reset = sha512_neon_reset,

			
 
				+    .copyfrom = sha512_neon_copyfrom,

			
 
				+    .digest = sha384_neon_digest,

			
 
				+    .free = sha512_neon_free,

			
 
				+    .hlen = 48,

			
 
				+    .blocklen = 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-384", "NEON accelerated"),

			
 
				+    .extra = sha384_initial_state,

			
 
				+};

			
 
				+

			
 
				+/* ----------------------------------------------------------------------

			
 
				+ * Stub functions if we have no hardware-accelerated SHA-512. In this

			
 
				+ * case, sha512_hw_new returns NULL (though it should also never be

			
 
				+ * selected by sha512_select, so the only thing that should even be

			
 
				+ * _able_ to call it is testcrypt). As a result, the remaining vtable

			
 
				+ * functions should never be called at all.

			
 
				+ */

			
 
				+

			
 
				+#elif HW_SHA512 == HW_SHA512_NONE

			
 
				+

			
 
				+static bool sha512_hw_available(void)

			
 
				 {

			
 
				-    struct sha512_hash *h = snew(struct sha512_hash);

			
 
				-    SHA384_Init(&h->state);

			
 
				-    h->hash.vt = alg;

			
 
				-    BinarySink_DELEGATE_INIT(&h->hash, &h->state);

			
 
				-    return &h->hash;

			
 
				+    return false;

			
 
				 }

			
 
				 

			
 
				-static void sha384_final(ssh_hash *hash, unsigned char *output)

			
 
				+static ssh_hash *sha512_stub_new(const ssh_hashalg *alg)

			
 
				 {

			
 
				-    struct sha512_hash *h = container_of(hash, struct sha512_hash, hash);

			
 
				-    SHA384_Final(&h->state, output);

			
 
				-    sha512_free(hash);

			
 
				+    return NULL;

			
 
				 }

			
 
				 

			
 
				-const ssh_hashalg ssh_sha384 = {

			
 
				-    sha384_new, sha512_copy, sha384_final, sha512_free,

			
 
				-    48, BLKSIZE, HASHALG_NAMES_BARE("SHA-384"),

			
 
				+#define STUB_BODY { unreachable("Should never be called"); }

			
 
				+

			
 
				+static void sha512_stub_reset(ssh_hash *hash) STUB_BODY

			
 
				+static void sha512_stub_copyfrom(ssh_hash *hash, ssh_hash *orig) STUB_BODY

			
 
				+static void sha512_stub_free(ssh_hash *hash) STUB_BODY

			
 
				+static void sha512_stub_digest(ssh_hash *hash, uint8_t *digest) STUB_BODY

			
 
				+

			
 
				+const ssh_hashalg ssh_sha512_hw = {

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha512_stub_new,

			
 
				+    /*.reset =*/ sha512_stub_reset,

			
 
				+    /*.copyfrom =*/ sha512_stub_copyfrom,

			
 
				+    /*.digest =*/ sha512_stub_digest,

			
 
				+    /*.free =*/ sha512_stub_free,

			
 
				+    /*.hlen =*/ 64,

			
 
				+    /*.blocklen =*/ 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-512", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				+    NULL, // WINSCP

			
 
				 };

			
 
				+

			
 
				+const ssh_hashalg ssh_sha384_hw = {

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha512_stub_new,

			
 
				+    /*.reset =*/ sha512_stub_reset,

			
 
				+    /*.copyfrom =*/ sha512_stub_copyfrom,

			
 
				+    /*.digest =*/ sha512_stub_digest,

			
 
				+    /*.free =*/ sha512_stub_free,

			
 
				+    /*.hlen =*/ 48,

			
 
				+    /*.blocklen =*/ 128,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-384", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				+    NULL, // WINSCP

			
 
				+};

			
 
				+

			
 
				+#endif /* HW_SHA512 */

			
--- a/source/putty/sshsha.c
+++ b/source/putty/sshsha.c
@@ -98,8 +98,16 @@ static ssh_hash *sha1_select(const ssh_hashalg *alg)
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha1 = {

			
 
				-    sha1_select, NULL, NULL, NULL,

			
 
				-    20, 64, HASHALG_NAMES_ANNOTATED("SHA-1", "dummy selector vtable"),

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha1_select,

			
 
				+    NULL,

			
 
				+    NULL,

			
 
				+    NULL,

			
 
				+    NULL,

			
 
				+    /*.hlen =*/ 20,

			
 
				+    /*.blocklen =*/ 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-1", "dummy selector vtable"),

			
 
				+    NULL,

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -266,26 +274,28 @@ static ssh_hash *sha1_sw_new(const ssh_hashalg *alg)
 
				 {

			
 
				     sha1_sw *s = snew(sha1_sw);

			
 
				 

			
 
				-    memcpy(s->core, sha1_initial_state, sizeof(s->core));

			
 
				-

			
 
				-    sha1_block_setup(&s->blk);

			
 
				-

			
 
				     s->hash.vt = alg;

			
 
				     BinarySink_INIT(s, sha1_sw_write);

			
 
				     BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				     return &s->hash;

			
 
				 }

			
 
				 

			
 
				-static ssh_hash *sha1_sw_copy(ssh_hash *hash)

			
 
				+static void sha1_sw_reset(ssh_hash *hash)

			
 
				 {

			
 
				     sha1_sw *s = container_of(hash, sha1_sw, hash);

			
 
				-    sha1_sw *copy = snew(sha1_sw);

			
 
				 

			
 
				-    memcpy(copy, s, sizeof(*copy));

			
 
				+    memcpy(s->core, sha1_initial_state, sizeof(s->core));

			
 
				+    sha1_block_setup(&s->blk);

			
 
				+}

			
 
				+

			
 
				+static void sha1_sw_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    sha1_sw *copy = container_of(hcopy, sha1_sw, hash);

			
 
				+    sha1_sw *orig = container_of(horig, sha1_sw, hash);

			
 
				+

			
 
				+    memcpy(copy, orig, sizeof(*copy));

			
 
				     BinarySink_COPIED(copy);

			
 
				     BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				-

			
 
				-    return &copy->hash;

			
 
				 }

			
 
				 

			
 
				 static void sha1_sw_free(ssh_hash *hash)

			
@@ -305,7 +315,7 @@ static void sha1_sw_write(BinarySink *bs, const void *vp, size_t len)
 
				             sha1_sw_block(s->core, s->blk.block);

			
 
				 }

			
 
				 

			
 
				-static void sha1_sw_final(ssh_hash *hash, uint8_t *digest)

			
 
				+static void sha1_sw_digest(ssh_hash *hash, uint8_t *digest)

			
 
				 {

			
 
				     sha1_sw *s = container_of(hash, sha1_sw, hash);

			
 
				 

			
@@ -314,13 +324,20 @@ static void sha1_sw_final(ssh_hash *hash, uint8_t *digest)
 
				     size_t i; // WINSCP

			
 
				     for (i = 0; i < 5; i++)

			
 
				         PUT_32BIT_MSB_FIRST(digest + 4*i, s->core[i]);

			
 
				-    sha1_sw_free(hash);

			
 
				     } // WINSCP

			
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha1_sw = {

			
 
				-    sha1_sw_new, sha1_sw_copy, sha1_sw_final, sha1_sw_free,

			
 
				-    20, 64, HASHALG_NAMES_BARE("SHA-1"), // WINSCP (removed "unaccelerated" annotation)

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha1_sw_new,

			
 
				+    /*.reset =*/ sha1_sw_reset,

			
 
				+    /*.copyfrom =*/ sha1_sw_copyfrom,

			
 
				+    /*.digest =*/ sha1_sw_digest,

			
 
				+    /*.free =*/ sha1_sw_free,

			
 
				+    /*.hlen =*/ 20,

			
 
				+    /*.blocklen =*/ 64,

			
 
				+    HASHALG_NAMES_BARE("SHA-1"), // WINSCP (removed "unaccelerated" annotation)

			
 
				+    NULL,

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -583,39 +600,42 @@ static sha1_ni *sha1_ni_alloc(void)
 
				     return s;

			
 
				 }

			
 
				 

			
 
				-FUNC_ISA static ssh_hash *sha1_ni_new(const ssh_hashalg *alg)

			
 
				+static ssh_hash *sha1_ni_new(const ssh_hashalg *alg)

			
 
				 {

			
 
				     if (!sha1_hw_available_cached())

			
 
				         return NULL;

			
 
				 

			
 
				     sha1_ni *s = sha1_ni_alloc();

			
 
				 

			
 
				+    s->hash.vt = alg;

			
 
				+    BinarySink_INIT(s, sha1_ni_write);

			
 
				+    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				+    return &s->hash;

			
 
				+}

			
 
				+

			
 
				+FUNC_ISA static void sha1_ni_reset(ssh_hash *hash)

			
 
				+{

			
 
				+    sha1_ni *s = container_of(hash, sha1_ni, hash);

			
 
				+

			
 
				     /* Initialise the core vectors in their storage order */

			
 
				     s->core[0] = _mm_set_epi64x(

			
 
				         0x67452301efcdab89ULL, 0x98badcfe10325476ULL);

			
 
				     s->core[1] = _mm_set_epi32(0xc3d2e1f0, 0, 0, 0);

			
 
				 

			
 
				     sha1_block_setup(&s->blk);

			
 
				-

			
 
				-    s->hash.vt = alg;

			
 
				-    BinarySink_INIT(s, sha1_ni_write);

			
 
				-    BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				-    return &s->hash;

			
 
				 }

			
 
				 

			
 
				-static ssh_hash *sha1_ni_copy(ssh_hash *hash)

			
 
				+static void sha1_ni_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				 {

			
 
				-    sha1_ni *s = container_of(hash, sha1_ni, hash);

			
 
				-    sha1_ni *copy = sha1_ni_alloc();

			
 
				+    sha1_ni *copy = container_of(hcopy, sha1_ni, hash);

			
 
				+    sha1_ni *orig = container_of(horig, sha1_ni, hash);

			
 
				 

			
 
				     void *ptf_save = copy->pointer_to_free;

			
 
				-    *copy = *s; /* structure copy */

			
 
				+    *copy = *orig; /* structure copy */

			
 
				     copy->pointer_to_free = ptf_save;

			
 
				 

			
 
				     BinarySink_COPIED(copy);

			
 
				     BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				-

			
 
				-    return &copy->hash;

			
 
				 }

			
 
				 

			
 
				 static void sha1_ni_free(ssh_hash *hash)

			
@@ -636,7 +656,7 @@ static void sha1_ni_write(BinarySink *bs, const void *vp, size_t len)
 
				             sha1_ni_block(s->core, s->blk.block);

			
 
				 }

			
 
				 

			
 
				-FUNC_ISA static void sha1_ni_final(ssh_hash *hash, uint8_t *digest)

			
 
				+FUNC_ISA static void sha1_ni_digest(ssh_hash *hash, uint8_t *digest)

			
 
				 {

			
 
				     sha1_ni *s = container_of(hash, sha1_ni, hash);

			
 
				 

			
@@ -655,13 +675,17 @@ FUNC_ISA static void sha1_ni_final(ssh_hash *hash, uint8_t *digest)
 
				     /* Finally, store the leftover word */

			
 
				     uint32_t e = _mm_extract_epi32(s->core[1], 3);

			
 
				     PUT_32BIT_MSB_FIRST(digest + 16, e);

			
 
				-

			
 
				-    sha1_ni_free(hash);

			
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha1_hw = {

			
 
				-    sha1_ni_new, sha1_ni_copy, sha1_ni_final, sha1_ni_free,

			
 
				-    20, 64, HASHALG_NAMES_ANNOTATED("SHA-1", "SHA-NI accelerated"),

			
 
				+    .new = sha1_ni_new,

			
 
				+    .reset = sha1_ni_reset,

			
 
				+    .copyfrom = sha1_ni_copyfrom,

			
 
				+    .digest = sha1_ni_digest,

			
 
				+    .free = sha1_ni_free,

			
 
				+    .hlen = 20,

			
 
				+    .blocklen = 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-1", "SHA-NI accelerated"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -823,28 +847,31 @@ static ssh_hash *sha1_neon_new(const ssh_hashalg *alg)
 
				 

			
 
				     sha1_neon *s = snew(sha1_neon);

			
 
				 

			
 
				-    s->core.abcd = vld1q_u32(sha1_initial_state);

			
 
				-    s->core.e = sha1_initial_state[4];

			
 
				-

			
 
				-    sha1_block_setup(&s->blk);

			
 
				-

			
 
				     s->hash.vt = alg;

			
 
				     BinarySink_INIT(s, sha1_neon_write);

			
 
				     BinarySink_DELEGATE_INIT(&s->hash, s);

			
 
				     return &s->hash;

			
 
				 }

			
 
				 

			
 
				-static ssh_hash *sha1_neon_copy(ssh_hash *hash)

			
 
				+static void sha1_neon_reset(ssh_hash *hash)

			
 
				 {

			
 
				     sha1_neon *s = container_of(hash, sha1_neon, hash);

			
 
				-    sha1_neon *copy = snew(sha1_neon);

			
 
				 

			
 
				-    *copy = *s; /* structure copy */

			
 
				+    s->core.abcd = vld1q_u32(sha1_initial_state);

			
 
				+    s->core.e = sha1_initial_state[4];

			
 
				+

			
 
				+    sha1_block_setup(&s->blk);

			
 
				+}

			
 
				+

			
 
				+static void sha1_neon_copyfrom(ssh_hash *hcopy, ssh_hash *horig)

			
 
				+{

			
 
				+    sha1_neon *copy = container_of(hcopy, sha1_neon, hash);

			
 
				+    sha1_neon *orig = container_of(horig, sha1_neon, hash);

			
 
				+

			
 
				+    *copy = *orig; /* structure copy */

			
 
				 

			
 
				     BinarySink_COPIED(copy);

			
 
				     BinarySink_DELEGATE_INIT(&copy->hash, copy);

			
 
				-

			
 
				-    return &copy->hash;

			
 
				 }

			
 
				 

			
 
				 static void sha1_neon_free(ssh_hash *hash)

			
@@ -863,19 +890,24 @@ static void sha1_neon_write(BinarySink *bs, const void *vp, size_t len)
 
				             sha1_neon_block(&s->core, s->blk.block);

			
 
				 }

			
 
				 

			
 
				-static void sha1_neon_final(ssh_hash *hash, uint8_t *digest)

			
 
				+static void sha1_neon_digest(ssh_hash *hash, uint8_t *digest)

			
 
				 {

			
 
				     sha1_neon *s = container_of(hash, sha1_neon, hash);

			
 
				 

			
 
				     sha1_block_pad(&s->blk, BinarySink_UPCAST(s));

			
 
				     vst1q_u8(digest, vrev32q_u8(vreinterpretq_u8_u32(s->core.abcd)));

			
 
				     PUT_32BIT_MSB_FIRST(digest + 16, s->core.e);

			
 
				-    sha1_neon_free(hash);

			
 
				 }

			
 
				 

			
 
				 const ssh_hashalg ssh_sha1_hw = {

			
 
				-    sha1_neon_new, sha1_neon_copy, sha1_neon_final, sha1_neon_free,

			
 
				-    20, 64, HASHALG_NAMES_ANNOTATED("SHA-1", "NEON accelerated"),

			
 
				+    .new = sha1_neon_new,

			
 
				+    .reset = sha1_neon_reset,

			
 
				+    .copyfrom = sha1_neon_copyfrom,

			
 
				+    .digest = sha1_neon_digest,

			
 
				+    .free = sha1_neon_free,

			
 
				+    .hlen = 20,

			
 
				+    .blocklen = 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-1", "NEON accelerated"),

			
 
				 };

			
 
				 

			
 
				 /* ----------------------------------------------------------------------

			
@@ -900,14 +932,22 @@ static ssh_hash *sha1_stub_new(const ssh_hashalg *alg)
 
				 

			
 
				 #define STUB_BODY { unreachable("Should never be called"); }

			
 
				 

			
 
				-static ssh_hash *sha1_stub_copy(ssh_hash *hash) STUB_BODY

			
 
				+static void sha1_stub_reset(ssh_hash *hash) STUB_BODY

			
 
				+static void sha1_stub_copyfrom(ssh_hash *hash, ssh_hash *orig) STUB_BODY

			
 
				 static void sha1_stub_free(ssh_hash *hash) STUB_BODY

			
 
				-static void sha1_stub_final(ssh_hash *hash, uint8_t *digest) STUB_BODY

			
 
				+static void sha1_stub_digest(ssh_hash *hash, uint8_t *digest) STUB_BODY

			
 
				 

			
 
				 const ssh_hashalg ssh_sha1_hw = {

			
 
				-    sha1_stub_new, sha1_stub_copy, sha1_stub_final, sha1_stub_free,

			
 
				-    20, 64, HASHALG_NAMES_ANNOTATED(

			
 
				-        "SHA-1", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				+    // WINSCP

			
 
				+    /*.new =*/ sha1_stub_new,

			
 
				+    /*.reset =*/ sha1_stub_reset,

			
 
				+    /*.copyfrom =*/ sha1_stub_copyfrom,

			
 
				+    /*.digest =*/ sha1_stub_digest,

			
 
				+    /*.free =*/ sha1_stub_free,

			
 
				+    /*.hlen =*/ 20,

			
 
				+    /*.blocklen =*/ 64,

			
 
				+    HASHALG_NAMES_ANNOTATED("SHA-1", "!NONEXISTENT ACCELERATED VERSION!"),

			
 
				+    NULL,

			
 
				 };

			
 
				 

			
 
				 #endif /* HW_SHA1 */

			
--- a/source/putty/utils.c
+++ b/source/putty/utils.c
@@ -1065,3 +1065,29 @@ size_t encode_utf8(void *output, unsigned long ch)
 
				     }

			
 
				     return p - start;

			
 
				 }

			
 
				+

			
 
				+void memxor(uint8_t *out, const uint8_t *in1, const uint8_t *in2, size_t size)

			
 
				+{

			
 
				+    switch (size & 15) {

			
 
				+      case 0:

			
 
				+        while (size >= 16) {

			
 
				+            size -= 16;

			
 
				+                   *out++ = *in1++ ^ *in2++;

			
 
				+          case 15: *out++ = *in1++ ^ *in2++;

			
 
				+          case 14: *out++ = *in1++ ^ *in2++;

			
 
				+          case 13: *out++ = *in1++ ^ *in2++;

			
 
				+          case 12: *out++ = *in1++ ^ *in2++;

			
 
				+          case 11: *out++ = *in1++ ^ *in2++;

			
 
				+          case 10: *out++ = *in1++ ^ *in2++;

			
 
				+          case 9:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 8:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 7:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 6:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 5:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 4:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 3:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 2:  *out++ = *in1++ ^ *in2++;

			
 
				+          case 1:  *out++ = *in1++ ^ *in2++;

			
 
				+        }

			
 
				+    }

			
 
				+}